nexthop.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Generic nexthop implementation
  4. *
  5. * Copyright (c) 2017-19 Cumulus Networks
  6. * Copyright (c) 2017-19 David Ahern <[email protected]>
  7. */
  8. #ifndef __LINUX_NEXTHOP_H
  9. #define __LINUX_NEXTHOP_H
  10. #include <linux/netdevice.h>
  11. #include <linux/notifier.h>
  12. #include <linux/route.h>
  13. #include <linux/types.h>
  14. #include <net/ip_fib.h>
  15. #include <net/ip6_fib.h>
  16. #include <net/netlink.h>
  17. #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
  18. struct nexthop;
  19. struct nh_config {
  20. u32 nh_id;
  21. u8 nh_family;
  22. u8 nh_protocol;
  23. u8 nh_blackhole;
  24. u8 nh_fdb;
  25. u32 nh_flags;
  26. int nh_ifindex;
  27. struct net_device *dev;
  28. union {
  29. __be32 ipv4;
  30. struct in6_addr ipv6;
  31. } gw;
  32. struct nlattr *nh_grp;
  33. u16 nh_grp_type;
  34. u16 nh_grp_res_num_buckets;
  35. unsigned long nh_grp_res_idle_timer;
  36. unsigned long nh_grp_res_unbalanced_timer;
  37. bool nh_grp_res_has_num_buckets;
  38. bool nh_grp_res_has_idle_timer;
  39. bool nh_grp_res_has_unbalanced_timer;
  40. struct nlattr *nh_encap;
  41. u16 nh_encap_type;
  42. u32 nlflags;
  43. struct nl_info nlinfo;
  44. };
  45. struct nh_info {
  46. struct hlist_node dev_hash; /* entry on netns devhash */
  47. struct nexthop *nh_parent;
  48. u8 family;
  49. bool reject_nh;
  50. bool fdb_nh;
  51. union {
  52. struct fib_nh_common fib_nhc;
  53. struct fib_nh fib_nh;
  54. struct fib6_nh fib6_nh;
  55. };
  56. };
  57. struct nh_res_bucket {
  58. struct nh_grp_entry __rcu *nh_entry;
  59. atomic_long_t used_time;
  60. unsigned long migrated_time;
  61. bool occupied;
  62. u8 nh_flags;
  63. };
  64. struct nh_res_table {
  65. struct net *net;
  66. u32 nhg_id;
  67. struct delayed_work upkeep_dw;
  68. /* List of NHGEs that have too few buckets ("uw" for underweight).
  69. * Reclaimed buckets will be given to entries in this list.
  70. */
  71. struct list_head uw_nh_entries;
  72. unsigned long unbalanced_since;
  73. u32 idle_timer;
  74. u32 unbalanced_timer;
  75. u16 num_nh_buckets;
  76. struct nh_res_bucket nh_buckets[];
  77. };
  78. struct nh_grp_entry {
  79. struct nexthop *nh;
  80. u8 weight;
  81. union {
  82. struct {
  83. atomic_t upper_bound;
  84. } hthr;
  85. struct {
  86. /* Member on uw_nh_entries. */
  87. struct list_head uw_nh_entry;
  88. u16 count_buckets;
  89. u16 wants_buckets;
  90. } res;
  91. };
  92. struct list_head nh_list;
  93. struct nexthop *nh_parent; /* nexthop of group with this entry */
  94. };
  95. struct nh_group {
  96. struct nh_group *spare; /* spare group for removals */
  97. u16 num_nh;
  98. bool is_multipath;
  99. bool hash_threshold;
  100. bool resilient;
  101. bool fdb_nh;
  102. bool has_v4;
  103. struct nh_res_table __rcu *res_table;
  104. struct nh_grp_entry nh_entries[];
  105. };
  106. struct nexthop {
  107. struct rb_node rb_node; /* entry on netns rbtree */
  108. struct list_head fi_list; /* v4 entries using nh */
  109. struct list_head f6i_list; /* v6 entries using nh */
  110. struct list_head fdb_list; /* fdb entries using this nh */
  111. struct list_head grp_list; /* nh group entries using this nh */
  112. struct net *net;
  113. u32 id;
  114. u8 protocol; /* app managing this nh */
  115. u8 nh_flags;
  116. bool is_group;
  117. refcount_t refcnt;
  118. struct rcu_head rcu;
  119. union {
  120. struct nh_info __rcu *nh_info;
  121. struct nh_group __rcu *nh_grp;
  122. };
  123. };
  124. enum nexthop_event_type {
  125. NEXTHOP_EVENT_DEL,
  126. NEXTHOP_EVENT_REPLACE,
  127. NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
  128. NEXTHOP_EVENT_BUCKET_REPLACE,
  129. };
  130. enum nh_notifier_info_type {
  131. NH_NOTIFIER_INFO_TYPE_SINGLE,
  132. NH_NOTIFIER_INFO_TYPE_GRP,
  133. NH_NOTIFIER_INFO_TYPE_RES_TABLE,
  134. NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
  135. };
  136. struct nh_notifier_single_info {
  137. struct net_device *dev;
  138. u8 gw_family;
  139. union {
  140. __be32 ipv4;
  141. struct in6_addr ipv6;
  142. };
  143. u8 is_reject:1,
  144. is_fdb:1,
  145. has_encap:1;
  146. };
  147. struct nh_notifier_grp_entry_info {
  148. u8 weight;
  149. u32 id;
  150. struct nh_notifier_single_info nh;
  151. };
  152. struct nh_notifier_grp_info {
  153. u16 num_nh;
  154. bool is_fdb;
  155. struct nh_notifier_grp_entry_info nh_entries[];
  156. };
  157. struct nh_notifier_res_bucket_info {
  158. u16 bucket_index;
  159. unsigned int idle_timer_ms;
  160. bool force;
  161. struct nh_notifier_single_info old_nh;
  162. struct nh_notifier_single_info new_nh;
  163. };
  164. struct nh_notifier_res_table_info {
  165. u16 num_nh_buckets;
  166. struct nh_notifier_single_info nhs[];
  167. };
  168. struct nh_notifier_info {
  169. struct net *net;
  170. struct netlink_ext_ack *extack;
  171. u32 id;
  172. enum nh_notifier_info_type type;
  173. union {
  174. struct nh_notifier_single_info *nh;
  175. struct nh_notifier_grp_info *nh_grp;
  176. struct nh_notifier_res_table_info *nh_res_table;
  177. struct nh_notifier_res_bucket_info *nh_res_bucket;
  178. };
  179. };
  180. int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
  181. struct netlink_ext_ack *extack);
  182. int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
  183. void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
  184. void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
  185. bool offload, bool trap);
  186. void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
  187. unsigned long *activity);
  188. /* caller is holding rcu or rtnl; no reference taken to nexthop */
  189. struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
  190. void nexthop_free_rcu(struct rcu_head *head);
  191. static inline bool nexthop_get(struct nexthop *nh)
  192. {
  193. return refcount_inc_not_zero(&nh->refcnt);
  194. }
  195. static inline void nexthop_put(struct nexthop *nh)
  196. {
  197. if (refcount_dec_and_test(&nh->refcnt))
  198. call_rcu(&nh->rcu, nexthop_free_rcu);
  199. }
  200. static inline bool nexthop_cmp(const struct nexthop *nh1,
  201. const struct nexthop *nh2)
  202. {
  203. return nh1 == nh2;
  204. }
  205. static inline bool nexthop_is_fdb(const struct nexthop *nh)
  206. {
  207. if (nh->is_group) {
  208. const struct nh_group *nh_grp;
  209. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  210. return nh_grp->fdb_nh;
  211. } else {
  212. const struct nh_info *nhi;
  213. nhi = rcu_dereference_rtnl(nh->nh_info);
  214. return nhi->fdb_nh;
  215. }
  216. }
  217. static inline bool nexthop_has_v4(const struct nexthop *nh)
  218. {
  219. if (nh->is_group) {
  220. struct nh_group *nh_grp;
  221. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  222. return nh_grp->has_v4;
  223. }
  224. return false;
  225. }
  226. static inline bool nexthop_is_multipath(const struct nexthop *nh)
  227. {
  228. if (nh->is_group) {
  229. struct nh_group *nh_grp;
  230. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  231. return nh_grp->is_multipath;
  232. }
  233. return false;
  234. }
  235. struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
  236. static inline unsigned int nexthop_num_path(const struct nexthop *nh)
  237. {
  238. unsigned int rc = 1;
  239. if (nh->is_group) {
  240. struct nh_group *nh_grp;
  241. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  242. if (nh_grp->is_multipath)
  243. rc = nh_grp->num_nh;
  244. }
  245. return rc;
  246. }
  247. static inline
  248. struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
  249. {
  250. /* for_nexthops macros in fib_semantics.c grabs a pointer to
  251. * the nexthop before checking nhsel
  252. */
  253. if (nhsel >= nhg->num_nh)
  254. return NULL;
  255. return nhg->nh_entries[nhsel].nh;
  256. }
  257. static inline
  258. int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
  259. u8 rt_family)
  260. {
  261. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  262. int i;
  263. for (i = 0; i < nhg->num_nh; i++) {
  264. struct nexthop *nhe = nhg->nh_entries[i].nh;
  265. struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
  266. struct fib_nh_common *nhc = &nhi->fib_nhc;
  267. int weight = nhg->nh_entries[i].weight;
  268. if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
  269. return -EMSGSIZE;
  270. }
  271. return 0;
  272. }
  273. /* called with rcu lock */
  274. static inline bool nexthop_is_blackhole(const struct nexthop *nh)
  275. {
  276. const struct nh_info *nhi;
  277. if (nh->is_group) {
  278. struct nh_group *nh_grp;
  279. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  280. if (nh_grp->num_nh > 1)
  281. return false;
  282. nh = nh_grp->nh_entries[0].nh;
  283. }
  284. nhi = rcu_dereference_rtnl(nh->nh_info);
  285. return nhi->reject_nh;
  286. }
  287. static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
  288. {
  289. struct nh_info *nhi;
  290. struct nexthop *nh;
  291. nh = nexthop_select_path(res->fi->nh, hash);
  292. nhi = rcu_dereference(nh->nh_info);
  293. res->nhc = &nhi->fib_nhc;
  294. }
  295. /* called with rcu read lock or rtnl held */
  296. static inline
  297. struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
  298. {
  299. struct nh_info *nhi;
  300. BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
  301. BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
  302. if (nh->is_group) {
  303. struct nh_group *nh_grp;
  304. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  305. if (nh_grp->is_multipath) {
  306. nh = nexthop_mpath_select(nh_grp, nhsel);
  307. if (!nh)
  308. return NULL;
  309. }
  310. }
  311. nhi = rcu_dereference_rtnl(nh->nh_info);
  312. return &nhi->fib_nhc;
  313. }
  314. /* called from fib_table_lookup with rcu_lock */
  315. static inline
  316. struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
  317. int fib_flags,
  318. const struct flowi4 *flp,
  319. int *nhsel)
  320. {
  321. struct nh_info *nhi;
  322. if (nh->is_group) {
  323. struct nh_group *nhg = rcu_dereference(nh->nh_grp);
  324. int i;
  325. for (i = 0; i < nhg->num_nh; i++) {
  326. struct nexthop *nhe = nhg->nh_entries[i].nh;
  327. nhi = rcu_dereference(nhe->nh_info);
  328. if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
  329. *nhsel = i;
  330. return &nhi->fib_nhc;
  331. }
  332. }
  333. } else {
  334. nhi = rcu_dereference(nh->nh_info);
  335. if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
  336. *nhsel = 0;
  337. return &nhi->fib_nhc;
  338. }
  339. }
  340. return NULL;
  341. }
  342. static inline bool nexthop_uses_dev(const struct nexthop *nh,
  343. const struct net_device *dev)
  344. {
  345. struct nh_info *nhi;
  346. if (nh->is_group) {
  347. struct nh_group *nhg = rcu_dereference(nh->nh_grp);
  348. int i;
  349. for (i = 0; i < nhg->num_nh; i++) {
  350. struct nexthop *nhe = nhg->nh_entries[i].nh;
  351. nhi = rcu_dereference(nhe->nh_info);
  352. if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
  353. return true;
  354. }
  355. } else {
  356. nhi = rcu_dereference(nh->nh_info);
  357. if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
  358. return true;
  359. }
  360. return false;
  361. }
  362. static inline unsigned int fib_info_num_path(const struct fib_info *fi)
  363. {
  364. if (unlikely(fi->nh))
  365. return nexthop_num_path(fi->nh);
  366. return fi->fib_nhs;
  367. }
  368. int fib_check_nexthop(struct nexthop *nh, u8 scope,
  369. struct netlink_ext_ack *extack);
  370. static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
  371. {
  372. if (unlikely(fi->nh))
  373. return nexthop_fib_nhc(fi->nh, nhsel);
  374. return &fi->fib_nh[nhsel].nh_common;
  375. }
  376. /* only used when fib_nh is built into fib_info */
  377. static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
  378. {
  379. WARN_ON(fi->nh);
  380. return &fi->fib_nh[nhsel];
  381. }
  382. /*
  383. * IPv6 variants
  384. */
  385. int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
  386. struct netlink_ext_ack *extack);
  387. /* Caller should either hold rcu_read_lock(), or RTNL. */
  388. static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
  389. {
  390. struct nh_info *nhi;
  391. if (nh->is_group) {
  392. struct nh_group *nh_grp;
  393. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  394. nh = nexthop_mpath_select(nh_grp, 0);
  395. if (!nh)
  396. return NULL;
  397. }
  398. nhi = rcu_dereference_rtnl(nh->nh_info);
  399. if (nhi->family == AF_INET6)
  400. return &nhi->fib6_nh;
  401. return NULL;
  402. }
  403. static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
  404. {
  405. struct fib6_nh *fib6_nh;
  406. fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
  407. return fib6_nh->fib_nh_dev;
  408. }
  409. static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
  410. {
  411. struct nexthop *nh = res->f6i->nh;
  412. struct nh_info *nhi;
  413. nh = nexthop_select_path(nh, hash);
  414. nhi = rcu_dereference_rtnl(nh->nh_info);
  415. if (nhi->reject_nh) {
  416. res->fib6_type = RTN_BLACKHOLE;
  417. res->fib6_flags |= RTF_REJECT;
  418. res->nh = nexthop_fib6_nh(nh);
  419. } else {
  420. res->nh = &nhi->fib6_nh;
  421. }
  422. }
  423. int nexthop_for_each_fib6_nh(struct nexthop *nh,
  424. int (*cb)(struct fib6_nh *nh, void *arg),
  425. void *arg);
  426. static inline int nexthop_get_family(struct nexthop *nh)
  427. {
  428. struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
  429. return nhi->family;
  430. }
  431. static inline
  432. struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
  433. {
  434. struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
  435. return &nhi->fib_nhc;
  436. }
  437. static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
  438. int hash)
  439. {
  440. struct nh_info *nhi;
  441. struct nexthop *nhp;
  442. nhp = nexthop_select_path(nh, hash);
  443. if (unlikely(!nhp))
  444. return NULL;
  445. nhi = rcu_dereference(nhp->nh_info);
  446. return &nhi->fib_nhc;
  447. }
  448. #endif