seg6_local.c 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * SR-IPv6 implementation
  4. *
  5. * Authors:
  6. * David Lebrun <[email protected]>
  7. * eBPF support: Mathieu Xhonneux <[email protected]>
  8. */
  9. #include <linux/filter.h>
  10. #include <linux/types.h>
  11. #include <linux/skbuff.h>
  12. #include <linux/net.h>
  13. #include <linux/module.h>
  14. #include <net/ip.h>
  15. #include <net/lwtunnel.h>
  16. #include <net/netevent.h>
  17. #include <net/netns/generic.h>
  18. #include <net/ip6_fib.h>
  19. #include <net/route.h>
  20. #include <net/seg6.h>
  21. #include <linux/seg6.h>
  22. #include <linux/seg6_local.h>
  23. #include <net/addrconf.h>
  24. #include <net/ip6_route.h>
  25. #include <net/dst_cache.h>
  26. #include <net/ip_tunnels.h>
  27. #ifdef CONFIG_IPV6_SEG6_HMAC
  28. #include <net/seg6_hmac.h>
  29. #endif
  30. #include <net/seg6_local.h>
  31. #include <linux/etherdevice.h>
  32. #include <linux/bpf.h>
  33. #include <linux/netfilter.h>
  34. #define SEG6_F_ATTR(i) BIT(i)
  35. struct seg6_local_lwt;
  36. /* callbacks used for customizing the creation and destruction of a behavior */
  37. struct seg6_local_lwtunnel_ops {
  38. int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
  39. struct netlink_ext_ack *extack);
  40. void (*destroy_state)(struct seg6_local_lwt *slwt);
  41. };
  42. struct seg6_action_desc {
  43. int action;
  44. unsigned long attrs;
  45. /* The optattrs field is used for specifying all the optional
  46. * attributes supported by a specific behavior.
  47. * It means that if one of these attributes is not provided in the
  48. * netlink message during the behavior creation, no errors will be
  49. * returned to the userspace.
  50. *
  51. * Each attribute can be only of two types (mutually exclusive):
  52. * 1) required or 2) optional.
  53. * Every user MUST obey to this rule! If you set an attribute as
  54. * required the same attribute CANNOT be set as optional and vice
  55. * versa.
  56. */
  57. unsigned long optattrs;
  58. int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
  59. int static_headroom;
  60. struct seg6_local_lwtunnel_ops slwt_ops;
  61. };
  62. struct bpf_lwt_prog {
  63. struct bpf_prog *prog;
  64. char *name;
  65. };
  66. /* default length values (expressed in bits) for both Locator-Block and
  67. * Locator-Node Function.
  68. *
  69. * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
  70. * i) greater than 0;
  71. * ii) evenly divisible by 8. In other terms, the lengths of the
  72. * Locator-Block and Locator-Node Function must be byte-aligned (we can
  73. * relax this constraint in the future if really needed).
  74. *
  75. * Moreover, a third condition must hold:
  76. * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
  77. *
  78. * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
  79. * values are checked during the kernel compilation. If the compilation stops,
  80. * check the value of these parameters to see if they meet conditions (i), (ii)
  81. * and (iii).
  82. */
  83. #define SEG6_LOCAL_LCBLOCK_DBITS 32
  84. #define SEG6_LOCAL_LCNODE_FN_DBITS 16
  85. /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
  86. * used directly to check whether the lengths (in bits) of Locator-Block and
  87. * Locator-Node Function are valid according to (i), (ii), (iii).
  88. */
  89. #define next_csid_chk_cntr_bits(blen, flen) \
  90. ((blen) + (flen) > 128)
  91. #define next_csid_chk_lcblock_bits(blen) \
  92. ({ \
  93. typeof(blen) __tmp = blen; \
  94. (!__tmp || __tmp > 120 || (__tmp & 0x07)); \
  95. })
  96. #define next_csid_chk_lcnode_fn_bits(flen) \
  97. next_csid_chk_lcblock_bits(flen)
  98. /* Supported Flavor operations are reported in this bitmask */
  99. #define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
  100. struct seg6_flavors_info {
  101. /* Flavor operations */
  102. __u32 flv_ops;
  103. /* Locator-Block length, expressed in bits */
  104. __u8 lcblock_bits;
  105. /* Locator-Node Function length, expressed in bits*/
  106. __u8 lcnode_func_bits;
  107. };
  108. enum seg6_end_dt_mode {
  109. DT_INVALID_MODE = -EINVAL,
  110. DT_LEGACY_MODE = 0,
  111. DT_VRF_MODE = 1,
  112. };
  113. struct seg6_end_dt_info {
  114. enum seg6_end_dt_mode mode;
  115. struct net *net;
  116. /* VRF device associated to the routing table used by the SRv6
  117. * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
  118. */
  119. int vrf_ifindex;
  120. int vrf_table;
  121. /* tunneled packet family (IPv4 or IPv6).
  122. * Protocol and header length are inferred from family.
  123. */
  124. u16 family;
  125. };
  126. struct pcpu_seg6_local_counters {
  127. u64_stats_t packets;
  128. u64_stats_t bytes;
  129. u64_stats_t errors;
  130. struct u64_stats_sync syncp;
  131. };
  132. /* This struct groups all the SRv6 Behavior counters supported so far.
  133. *
  134. * put_nla_counters() makes use of this data structure to collect all counter
  135. * values after the per-CPU counter evaluation has been performed.
  136. * Finally, each counter value (in seg6_local_counters) is stored in the
  137. * corresponding netlink attribute and sent to user space.
  138. *
  139. * NB: we don't want to expose this structure to user space!
  140. */
  141. struct seg6_local_counters {
  142. __u64 packets;
  143. __u64 bytes;
  144. __u64 errors;
  145. };
  146. #define seg6_local_alloc_pcpu_counters(__gfp) \
  147. __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \
  148. ((__gfp) | __GFP_ZERO))
  149. #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
  150. struct seg6_local_lwt {
  151. int action;
  152. struct ipv6_sr_hdr *srh;
  153. int table;
  154. struct in_addr nh4;
  155. struct in6_addr nh6;
  156. int iif;
  157. int oif;
  158. struct bpf_lwt_prog bpf;
  159. #ifdef CONFIG_NET_L3_MASTER_DEV
  160. struct seg6_end_dt_info dt_info;
  161. #endif
  162. struct seg6_flavors_info flv_info;
  163. struct pcpu_seg6_local_counters __percpu *pcpu_counters;
  164. int headroom;
  165. struct seg6_action_desc *desc;
  166. /* unlike the required attrs, we have to track the optional attributes
  167. * that have been effectively parsed.
  168. */
  169. unsigned long parsed_optattrs;
  170. };
  171. static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
  172. {
  173. return (struct seg6_local_lwt *)lwt->data;
  174. }
  175. static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
  176. {
  177. struct ipv6_sr_hdr *srh;
  178. srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
  179. if (!srh)
  180. return NULL;
  181. #ifdef CONFIG_IPV6_SEG6_HMAC
  182. if (!seg6_hmac_validate_skb(skb))
  183. return NULL;
  184. #endif
  185. return srh;
  186. }
  187. static bool decap_and_validate(struct sk_buff *skb, int proto)
  188. {
  189. struct ipv6_sr_hdr *srh;
  190. unsigned int off = 0;
  191. srh = seg6_get_srh(skb, 0);
  192. if (srh && srh->segments_left > 0)
  193. return false;
  194. #ifdef CONFIG_IPV6_SEG6_HMAC
  195. if (srh && !seg6_hmac_validate_skb(skb))
  196. return false;
  197. #endif
  198. if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
  199. return false;
  200. if (!pskb_pull(skb, off))
  201. return false;
  202. skb_postpull_rcsum(skb, skb_network_header(skb), off);
  203. skb_reset_network_header(skb);
  204. skb_reset_transport_header(skb);
  205. if (iptunnel_pull_offloads(skb))
  206. return false;
  207. return true;
  208. }
  209. static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
  210. {
  211. struct in6_addr *addr;
  212. srh->segments_left--;
  213. addr = srh->segments + srh->segments_left;
  214. *daddr = *addr;
  215. }
  216. static int
  217. seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
  218. u32 tbl_id, bool local_delivery)
  219. {
  220. struct net *net = dev_net(skb->dev);
  221. struct ipv6hdr *hdr = ipv6_hdr(skb);
  222. int flags = RT6_LOOKUP_F_HAS_SADDR;
  223. struct dst_entry *dst = NULL;
  224. struct rt6_info *rt;
  225. struct flowi6 fl6;
  226. int dev_flags = 0;
  227. memset(&fl6, 0, sizeof(fl6));
  228. fl6.flowi6_iif = skb->dev->ifindex;
  229. fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
  230. fl6.saddr = hdr->saddr;
  231. fl6.flowlabel = ip6_flowinfo(hdr);
  232. fl6.flowi6_mark = skb->mark;
  233. fl6.flowi6_proto = hdr->nexthdr;
  234. if (nhaddr)
  235. fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
  236. if (!tbl_id) {
  237. dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
  238. } else {
  239. struct fib6_table *table;
  240. table = fib6_get_table(net, tbl_id);
  241. if (!table)
  242. goto out;
  243. rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
  244. dst = &rt->dst;
  245. }
  246. /* we want to discard traffic destined for local packet processing,
  247. * if @local_delivery is set to false.
  248. */
  249. if (!local_delivery)
  250. dev_flags |= IFF_LOOPBACK;
  251. if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
  252. dst_release(dst);
  253. dst = NULL;
  254. }
  255. out:
  256. if (!dst) {
  257. rt = net->ipv6.ip6_blk_hole_entry;
  258. dst = &rt->dst;
  259. dst_hold(dst);
  260. }
  261. skb_dst_drop(skb);
  262. skb_dst_set(skb, dst);
  263. return dst->error;
  264. }
  265. int seg6_lookup_nexthop(struct sk_buff *skb,
  266. struct in6_addr *nhaddr, u32 tbl_id)
  267. {
  268. return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
  269. }
  270. static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
  271. {
  272. return finfo->lcblock_bits >> 3;
  273. }
  274. static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
  275. {
  276. return finfo->lcnode_func_bits >> 3;
  277. }
  278. static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
  279. const struct seg6_flavors_info *finfo)
  280. {
  281. __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
  282. __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
  283. __u8 arg_octects;
  284. int i;
  285. arg_octects = 16 - blk_octects - fnc_octects;
  286. for (i = 0; i < arg_octects; ++i) {
  287. if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
  288. return false;
  289. }
  290. return true;
  291. }
  292. /* assume that DA.Argument length > 0 */
  293. static void seg6_next_csid_advance_arg(struct in6_addr *addr,
  294. const struct seg6_flavors_info *finfo)
  295. {
  296. __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
  297. __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
  298. /* advance DA.Argument */
  299. memmove(&addr->s6_addr[blk_octects],
  300. &addr->s6_addr[blk_octects + fnc_octects],
  301. 16 - blk_octects - fnc_octects);
  302. memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
  303. }
  304. static int input_action_end_core(struct sk_buff *skb,
  305. struct seg6_local_lwt *slwt)
  306. {
  307. struct ipv6_sr_hdr *srh;
  308. srh = get_and_validate_srh(skb);
  309. if (!srh)
  310. goto drop;
  311. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  312. seg6_lookup_nexthop(skb, NULL, 0);
  313. return dst_input(skb);
  314. drop:
  315. kfree_skb(skb);
  316. return -EINVAL;
  317. }
  318. static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  319. {
  320. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  321. struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
  322. if (seg6_next_csid_is_arg_zero(daddr, finfo))
  323. return input_action_end_core(skb, slwt);
  324. /* update DA */
  325. seg6_next_csid_advance_arg(daddr, finfo);
  326. seg6_lookup_nexthop(skb, NULL, 0);
  327. return dst_input(skb);
  328. }
  329. static bool seg6_next_csid_enabled(__u32 fops)
  330. {
  331. return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
  332. }
  333. /* regular endpoint function */
  334. static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  335. {
  336. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  337. if (seg6_next_csid_enabled(finfo->flv_ops))
  338. return end_next_csid_core(skb, slwt);
  339. return input_action_end_core(skb, slwt);
  340. }
  341. /* regular endpoint, and forward to specified nexthop */
  342. static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  343. {
  344. struct ipv6_sr_hdr *srh;
  345. srh = get_and_validate_srh(skb);
  346. if (!srh)
  347. goto drop;
  348. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  349. seg6_lookup_nexthop(skb, &slwt->nh6, 0);
  350. return dst_input(skb);
  351. drop:
  352. kfree_skb(skb);
  353. return -EINVAL;
  354. }
  355. static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  356. {
  357. struct ipv6_sr_hdr *srh;
  358. srh = get_and_validate_srh(skb);
  359. if (!srh)
  360. goto drop;
  361. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  362. seg6_lookup_nexthop(skb, NULL, slwt->table);
  363. return dst_input(skb);
  364. drop:
  365. kfree_skb(skb);
  366. return -EINVAL;
  367. }
  368. /* decapsulate and forward inner L2 frame on specified interface */
  369. static int input_action_end_dx2(struct sk_buff *skb,
  370. struct seg6_local_lwt *slwt)
  371. {
  372. struct net *net = dev_net(skb->dev);
  373. struct net_device *odev;
  374. struct ethhdr *eth;
  375. if (!decap_and_validate(skb, IPPROTO_ETHERNET))
  376. goto drop;
  377. if (!pskb_may_pull(skb, ETH_HLEN))
  378. goto drop;
  379. skb_reset_mac_header(skb);
  380. eth = (struct ethhdr *)skb->data;
  381. /* To determine the frame's protocol, we assume it is 802.3. This avoids
  382. * a call to eth_type_trans(), which is not really relevant for our
  383. * use case.
  384. */
  385. if (!eth_proto_is_802_3(eth->h_proto))
  386. goto drop;
  387. odev = dev_get_by_index_rcu(net, slwt->oif);
  388. if (!odev)
  389. goto drop;
  390. /* As we accept Ethernet frames, make sure the egress device is of
  391. * the correct type.
  392. */
  393. if (odev->type != ARPHRD_ETHER)
  394. goto drop;
  395. if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
  396. goto drop;
  397. skb_orphan(skb);
  398. if (skb_warn_if_lro(skb))
  399. goto drop;
  400. skb_forward_csum(skb);
  401. if (skb->len - ETH_HLEN > odev->mtu)
  402. goto drop;
  403. skb->dev = odev;
  404. skb->protocol = eth->h_proto;
  405. return dev_queue_xmit(skb);
  406. drop:
  407. kfree_skb(skb);
  408. return -EINVAL;
  409. }
  410. static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
  411. struct sk_buff *skb)
  412. {
  413. struct dst_entry *orig_dst = skb_dst(skb);
  414. struct in6_addr *nhaddr = NULL;
  415. struct seg6_local_lwt *slwt;
  416. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  417. /* The inner packet is not associated to any local interface,
  418. * so we do not call netif_rx().
  419. *
  420. * If slwt->nh6 is set to ::, then lookup the nexthop for the
  421. * inner packet's DA. Otherwise, use the specified nexthop.
  422. */
  423. if (!ipv6_addr_any(&slwt->nh6))
  424. nhaddr = &slwt->nh6;
  425. seg6_lookup_nexthop(skb, nhaddr, 0);
  426. return dst_input(skb);
  427. }
  428. /* decapsulate and forward to specified nexthop */
  429. static int input_action_end_dx6(struct sk_buff *skb,
  430. struct seg6_local_lwt *slwt)
  431. {
  432. /* this function accepts IPv6 encapsulated packets, with either
  433. * an SRH with SL=0, or no SRH.
  434. */
  435. if (!decap_and_validate(skb, IPPROTO_IPV6))
  436. goto drop;
  437. if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
  438. goto drop;
  439. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  440. nf_reset_ct(skb);
  441. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  442. return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
  443. dev_net(skb->dev), NULL, skb, NULL,
  444. skb_dst(skb)->dev, input_action_end_dx6_finish);
  445. return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
  446. drop:
  447. kfree_skb(skb);
  448. return -EINVAL;
  449. }
  450. static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
  451. struct sk_buff *skb)
  452. {
  453. struct dst_entry *orig_dst = skb_dst(skb);
  454. struct seg6_local_lwt *slwt;
  455. struct iphdr *iph;
  456. __be32 nhaddr;
  457. int err;
  458. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  459. iph = ip_hdr(skb);
  460. nhaddr = slwt->nh4.s_addr ?: iph->daddr;
  461. skb_dst_drop(skb);
  462. err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
  463. if (err) {
  464. kfree_skb(skb);
  465. return -EINVAL;
  466. }
  467. return dst_input(skb);
  468. }
  469. static int input_action_end_dx4(struct sk_buff *skb,
  470. struct seg6_local_lwt *slwt)
  471. {
  472. if (!decap_and_validate(skb, IPPROTO_IPIP))
  473. goto drop;
  474. if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  475. goto drop;
  476. skb->protocol = htons(ETH_P_IP);
  477. skb_set_transport_header(skb, sizeof(struct iphdr));
  478. nf_reset_ct(skb);
  479. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  480. return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
  481. dev_net(skb->dev), NULL, skb, NULL,
  482. skb_dst(skb)->dev, input_action_end_dx4_finish);
  483. return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
  484. drop:
  485. kfree_skb(skb);
  486. return -EINVAL;
  487. }
  488. #ifdef CONFIG_NET_L3_MASTER_DEV
  489. static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
  490. {
  491. const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
  492. return nli->nl_net;
  493. }
  494. static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
  495. u16 family, struct netlink_ext_ack *extack)
  496. {
  497. struct seg6_end_dt_info *info = &slwt->dt_info;
  498. int vrf_ifindex;
  499. struct net *net;
  500. net = fib6_config_get_net(cfg);
  501. /* note that vrf_table was already set by parse_nla_vrftable() */
  502. vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
  503. info->vrf_table);
  504. if (vrf_ifindex < 0) {
  505. if (vrf_ifindex == -EPERM) {
  506. NL_SET_ERR_MSG(extack,
  507. "Strict mode for VRF is disabled");
  508. } else if (vrf_ifindex == -ENODEV) {
  509. NL_SET_ERR_MSG(extack,
  510. "Table has no associated VRF device");
  511. } else {
  512. pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
  513. vrf_ifindex);
  514. }
  515. return vrf_ifindex;
  516. }
  517. info->net = net;
  518. info->vrf_ifindex = vrf_ifindex;
  519. info->family = family;
  520. info->mode = DT_VRF_MODE;
  521. return 0;
  522. }
  523. /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
  524. * routes the IPv4/IPv6 packet by looking at the configured routing table.
  525. *
  526. * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
  527. * Routing Header packets) from several interfaces and the outer IPv6
  528. * destination address (DA) is used for retrieving the specific instance of the
  529. * End.DT4/DT6 behavior that should process the packets.
  530. *
  531. * However, the inner IPv4/IPv6 packet is not really bound to any receiving
  532. * interface and thus the End.DT4/DT6 sets the VRF (associated with the
  533. * corresponding routing table) as the *receiving* interface.
  534. * In other words, the End.DT4/DT6 processes a packet as if it has been received
  535. * directly by the VRF (and not by one of its slave devices, if any).
  536. * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
  537. * according to the routing table configured by the End.DT4/DT6 instance.
  538. *
  539. * This design allows you to get some interesting features like:
  540. * 1) the statistics on rx packets;
  541. * 2) the possibility to install a packet sniffer on the receiving interface
  542. * (the VRF one) for looking at the incoming packets;
  543. * 3) the possibility to leverage the netfilter prerouting hook for the inner
  544. * IPv4 packet.
  545. *
  546. * This function returns:
  547. * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
  548. * - NULL when the skb is consumed by the VRF rcv handler;
  549. * - a pointer which encodes a negative error number in case of error.
  550. * Note that in this case, the function takes care of freeing the skb.
  551. */
  552. static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
  553. struct net_device *dev)
  554. {
  555. /* based on l3mdev_ip_rcv; we are only interested in the master */
  556. if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
  557. goto drop;
  558. if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
  559. goto drop;
  560. /* the decap packet IPv4/IPv6 does not come with any mac header info.
  561. * We must unset the mac header to allow the VRF device to rebuild it,
  562. * just in case there is a sniffer attached on the device.
  563. */
  564. skb_unset_mac_header(skb);
  565. skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
  566. if (!skb)
  567. /* the skb buffer was consumed by the handler */
  568. return NULL;
  569. /* when a packet is received by a VRF or by one of its slaves, the
  570. * master device reference is set into the skb.
  571. */
  572. if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
  573. goto drop;
  574. return skb;
  575. drop:
  576. kfree_skb(skb);
  577. return ERR_PTR(-EINVAL);
  578. }
  579. static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
  580. struct seg6_end_dt_info *info)
  581. {
  582. int vrf_ifindex = info->vrf_ifindex;
  583. struct net *net = info->net;
  584. if (unlikely(vrf_ifindex < 0))
  585. goto error;
  586. if (unlikely(!net_eq(dev_net(skb->dev), net)))
  587. goto error;
  588. return dev_get_by_index_rcu(net, vrf_ifindex);
  589. error:
  590. return NULL;
  591. }
  592. static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
  593. struct seg6_local_lwt *slwt, u16 family)
  594. {
  595. struct seg6_end_dt_info *info = &slwt->dt_info;
  596. struct net_device *vrf;
  597. __be16 protocol;
  598. int hdrlen;
  599. vrf = end_dt_get_vrf_rcu(skb, info);
  600. if (unlikely(!vrf))
  601. goto drop;
  602. switch (family) {
  603. case AF_INET:
  604. protocol = htons(ETH_P_IP);
  605. hdrlen = sizeof(struct iphdr);
  606. break;
  607. case AF_INET6:
  608. protocol = htons(ETH_P_IPV6);
  609. hdrlen = sizeof(struct ipv6hdr);
  610. break;
  611. case AF_UNSPEC:
  612. fallthrough;
  613. default:
  614. goto drop;
  615. }
  616. if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
  617. pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
  618. goto drop;
  619. }
  620. skb->protocol = protocol;
  621. skb_dst_drop(skb);
  622. skb_set_transport_header(skb, hdrlen);
  623. nf_reset_ct(skb);
  624. return end_dt_vrf_rcv(skb, family, vrf);
  625. drop:
  626. kfree_skb(skb);
  627. return ERR_PTR(-EINVAL);
  628. }
  629. static int input_action_end_dt4(struct sk_buff *skb,
  630. struct seg6_local_lwt *slwt)
  631. {
  632. struct iphdr *iph;
  633. int err;
  634. if (!decap_and_validate(skb, IPPROTO_IPIP))
  635. goto drop;
  636. if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  637. goto drop;
  638. skb = end_dt_vrf_core(skb, slwt, AF_INET);
  639. if (!skb)
  640. /* packet has been processed and consumed by the VRF */
  641. return 0;
  642. if (IS_ERR(skb))
  643. return PTR_ERR(skb);
  644. iph = ip_hdr(skb);
  645. err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
  646. if (unlikely(err))
  647. goto drop;
  648. return dst_input(skb);
  649. drop:
  650. kfree_skb(skb);
  651. return -EINVAL;
  652. }
  653. static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
  654. struct netlink_ext_ack *extack)
  655. {
  656. return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
  657. }
  658. static enum
  659. seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
  660. {
  661. unsigned long parsed_optattrs = slwt->parsed_optattrs;
  662. bool legacy, vrfmode;
  663. legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
  664. vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
  665. if (!(legacy ^ vrfmode))
  666. /* both are absent or present: invalid DT6 mode */
  667. return DT_INVALID_MODE;
  668. return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
  669. }
  670. static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
  671. {
  672. struct seg6_end_dt_info *info = &slwt->dt_info;
  673. return info->mode;
  674. }
  675. static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
  676. struct netlink_ext_ack *extack)
  677. {
  678. enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
  679. struct seg6_end_dt_info *info = &slwt->dt_info;
  680. switch (mode) {
  681. case DT_LEGACY_MODE:
  682. info->mode = DT_LEGACY_MODE;
  683. return 0;
  684. case DT_VRF_MODE:
  685. return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
  686. default:
  687. NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
  688. return -EINVAL;
  689. }
  690. }
  691. #endif
  692. static int input_action_end_dt6(struct sk_buff *skb,
  693. struct seg6_local_lwt *slwt)
  694. {
  695. if (!decap_and_validate(skb, IPPROTO_IPV6))
  696. goto drop;
  697. if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
  698. goto drop;
  699. #ifdef CONFIG_NET_L3_MASTER_DEV
  700. if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
  701. goto legacy_mode;
  702. /* DT6_VRF_MODE */
  703. skb = end_dt_vrf_core(skb, slwt, AF_INET6);
  704. if (!skb)
  705. /* packet has been processed and consumed by the VRF */
  706. return 0;
  707. if (IS_ERR(skb))
  708. return PTR_ERR(skb);
  709. /* note: this time we do not need to specify the table because the VRF
  710. * takes care of selecting the correct table.
  711. */
  712. seg6_lookup_any_nexthop(skb, NULL, 0, true);
  713. return dst_input(skb);
  714. legacy_mode:
  715. #endif
  716. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  717. seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
  718. return dst_input(skb);
  719. drop:
  720. kfree_skb(skb);
  721. return -EINVAL;
  722. }
  723. #ifdef CONFIG_NET_L3_MASTER_DEV
  724. static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
  725. struct netlink_ext_ack *extack)
  726. {
  727. return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
  728. }
  729. static int input_action_end_dt46(struct sk_buff *skb,
  730. struct seg6_local_lwt *slwt)
  731. {
  732. unsigned int off = 0;
  733. int nexthdr;
  734. nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
  735. if (unlikely(nexthdr < 0))
  736. goto drop;
  737. switch (nexthdr) {
  738. case IPPROTO_IPIP:
  739. return input_action_end_dt4(skb, slwt);
  740. case IPPROTO_IPV6:
  741. return input_action_end_dt6(skb, slwt);
  742. }
  743. drop:
  744. kfree_skb(skb);
  745. return -EINVAL;
  746. }
  747. #endif
  748. /* push an SRH on top of the current one */
  749. static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  750. {
  751. struct ipv6_sr_hdr *srh;
  752. int err = -EINVAL;
  753. srh = get_and_validate_srh(skb);
  754. if (!srh)
  755. goto drop;
  756. err = seg6_do_srh_inline(skb, slwt->srh);
  757. if (err)
  758. goto drop;
  759. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  760. seg6_lookup_nexthop(skb, NULL, 0);
  761. return dst_input(skb);
  762. drop:
  763. kfree_skb(skb);
  764. return err;
  765. }
  766. /* encapsulate within an outer IPv6 header and a specified SRH */
  767. static int input_action_end_b6_encap(struct sk_buff *skb,
  768. struct seg6_local_lwt *slwt)
  769. {
  770. struct ipv6_sr_hdr *srh;
  771. int err = -EINVAL;
  772. srh = get_and_validate_srh(skb);
  773. if (!srh)
  774. goto drop;
  775. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  776. skb_reset_inner_headers(skb);
  777. skb->encapsulation = 1;
  778. err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
  779. if (err)
  780. goto drop;
  781. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  782. seg6_lookup_nexthop(skb, NULL, 0);
  783. return dst_input(skb);
  784. drop:
  785. kfree_skb(skb);
  786. return err;
  787. }
  788. DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
  789. bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
  790. {
  791. struct seg6_bpf_srh_state *srh_state =
  792. this_cpu_ptr(&seg6_bpf_srh_states);
  793. struct ipv6_sr_hdr *srh = srh_state->srh;
  794. if (unlikely(srh == NULL))
  795. return false;
  796. if (unlikely(!srh_state->valid)) {
  797. if ((srh_state->hdrlen & 7) != 0)
  798. return false;
  799. srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
  800. if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
  801. return false;
  802. srh_state->valid = true;
  803. }
  804. return true;
  805. }
  806. static int input_action_end_bpf(struct sk_buff *skb,
  807. struct seg6_local_lwt *slwt)
  808. {
  809. struct seg6_bpf_srh_state *srh_state =
  810. this_cpu_ptr(&seg6_bpf_srh_states);
  811. struct ipv6_sr_hdr *srh;
  812. int ret;
  813. srh = get_and_validate_srh(skb);
  814. if (!srh) {
  815. kfree_skb(skb);
  816. return -EINVAL;
  817. }
  818. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  819. /* preempt_disable is needed to protect the per-CPU buffer srh_state,
  820. * which is also accessed by the bpf_lwt_seg6_* helpers
  821. */
  822. preempt_disable();
  823. srh_state->srh = srh;
  824. srh_state->hdrlen = srh->hdrlen << 3;
  825. srh_state->valid = true;
  826. rcu_read_lock();
  827. bpf_compute_data_pointers(skb);
  828. ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
  829. rcu_read_unlock();
  830. switch (ret) {
  831. case BPF_OK:
  832. case BPF_REDIRECT:
  833. break;
  834. case BPF_DROP:
  835. goto drop;
  836. default:
  837. pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
  838. goto drop;
  839. }
  840. if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
  841. goto drop;
  842. preempt_enable();
  843. if (ret != BPF_REDIRECT)
  844. seg6_lookup_nexthop(skb, NULL, 0);
  845. return dst_input(skb);
  846. drop:
  847. preempt_enable();
  848. kfree_skb(skb);
  849. return -EINVAL;
  850. }
  851. static struct seg6_action_desc seg6_action_table[] = {
  852. {
  853. .action = SEG6_LOCAL_ACTION_END,
  854. .attrs = 0,
  855. .optattrs = SEG6_F_LOCAL_COUNTERS |
  856. SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
  857. .input = input_action_end,
  858. },
  859. {
  860. .action = SEG6_LOCAL_ACTION_END_X,
  861. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
  862. .optattrs = SEG6_F_LOCAL_COUNTERS,
  863. .input = input_action_end_x,
  864. },
  865. {
  866. .action = SEG6_LOCAL_ACTION_END_T,
  867. .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
  868. .optattrs = SEG6_F_LOCAL_COUNTERS,
  869. .input = input_action_end_t,
  870. },
  871. {
  872. .action = SEG6_LOCAL_ACTION_END_DX2,
  873. .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
  874. .optattrs = SEG6_F_LOCAL_COUNTERS,
  875. .input = input_action_end_dx2,
  876. },
  877. {
  878. .action = SEG6_LOCAL_ACTION_END_DX6,
  879. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
  880. .optattrs = SEG6_F_LOCAL_COUNTERS,
  881. .input = input_action_end_dx6,
  882. },
  883. {
  884. .action = SEG6_LOCAL_ACTION_END_DX4,
  885. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
  886. .optattrs = SEG6_F_LOCAL_COUNTERS,
  887. .input = input_action_end_dx4,
  888. },
  889. {
  890. .action = SEG6_LOCAL_ACTION_END_DT4,
  891. .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  892. .optattrs = SEG6_F_LOCAL_COUNTERS,
  893. #ifdef CONFIG_NET_L3_MASTER_DEV
  894. .input = input_action_end_dt4,
  895. .slwt_ops = {
  896. .build_state = seg6_end_dt4_build,
  897. },
  898. #endif
  899. },
  900. {
  901. .action = SEG6_LOCAL_ACTION_END_DT6,
  902. #ifdef CONFIG_NET_L3_MASTER_DEV
  903. .attrs = 0,
  904. .optattrs = SEG6_F_LOCAL_COUNTERS |
  905. SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
  906. SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  907. .slwt_ops = {
  908. .build_state = seg6_end_dt6_build,
  909. },
  910. #else
  911. .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
  912. .optattrs = SEG6_F_LOCAL_COUNTERS,
  913. #endif
  914. .input = input_action_end_dt6,
  915. },
  916. {
  917. .action = SEG6_LOCAL_ACTION_END_DT46,
  918. .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  919. .optattrs = SEG6_F_LOCAL_COUNTERS,
  920. #ifdef CONFIG_NET_L3_MASTER_DEV
  921. .input = input_action_end_dt46,
  922. .slwt_ops = {
  923. .build_state = seg6_end_dt46_build,
  924. },
  925. #endif
  926. },
  927. {
  928. .action = SEG6_LOCAL_ACTION_END_B6,
  929. .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
  930. .optattrs = SEG6_F_LOCAL_COUNTERS,
  931. .input = input_action_end_b6,
  932. },
  933. {
  934. .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
  935. .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
  936. .optattrs = SEG6_F_LOCAL_COUNTERS,
  937. .input = input_action_end_b6_encap,
  938. .static_headroom = sizeof(struct ipv6hdr),
  939. },
  940. {
  941. .action = SEG6_LOCAL_ACTION_END_BPF,
  942. .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
  943. .optattrs = SEG6_F_LOCAL_COUNTERS,
  944. .input = input_action_end_bpf,
  945. },
  946. };
  947. static struct seg6_action_desc *__get_action_desc(int action)
  948. {
  949. struct seg6_action_desc *desc;
  950. int i, count;
  951. count = ARRAY_SIZE(seg6_action_table);
  952. for (i = 0; i < count; i++) {
  953. desc = &seg6_action_table[i];
  954. if (desc->action == action)
  955. return desc;
  956. }
  957. return NULL;
  958. }
  959. static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
  960. {
  961. return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
  962. }
  963. static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
  964. unsigned int len, int err)
  965. {
  966. struct pcpu_seg6_local_counters *pcounters;
  967. pcounters = this_cpu_ptr(slwt->pcpu_counters);
  968. u64_stats_update_begin(&pcounters->syncp);
  969. if (likely(!err)) {
  970. u64_stats_inc(&pcounters->packets);
  971. u64_stats_add(&pcounters->bytes, len);
  972. } else {
  973. u64_stats_inc(&pcounters->errors);
  974. }
  975. u64_stats_update_end(&pcounters->syncp);
  976. }
  977. static int seg6_local_input_core(struct net *net, struct sock *sk,
  978. struct sk_buff *skb)
  979. {
  980. struct dst_entry *orig_dst = skb_dst(skb);
  981. struct seg6_action_desc *desc;
  982. struct seg6_local_lwt *slwt;
  983. unsigned int len = skb->len;
  984. int rc;
  985. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  986. desc = slwt->desc;
  987. rc = desc->input(skb, slwt);
  988. if (!seg6_lwtunnel_counters_enabled(slwt))
  989. return rc;
  990. seg6_local_update_counters(slwt, len, rc);
  991. return rc;
  992. }
  993. static int seg6_local_input(struct sk_buff *skb)
  994. {
  995. if (skb->protocol != htons(ETH_P_IPV6)) {
  996. kfree_skb(skb);
  997. return -EINVAL;
  998. }
  999. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  1000. return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
  1001. dev_net(skb->dev), NULL, skb, skb->dev, NULL,
  1002. seg6_local_input_core);
  1003. return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
  1004. }
  1005. static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
  1006. [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
  1007. [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
  1008. [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
  1009. [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
  1010. [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
  1011. .len = sizeof(struct in_addr) },
  1012. [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
  1013. .len = sizeof(struct in6_addr) },
  1014. [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
  1015. [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
  1016. [SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
  1017. [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
  1018. [SEG6_LOCAL_FLAVORS] = { .type = NLA_NESTED },
  1019. };
  1020. static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1021. struct netlink_ext_ack *extack)
  1022. {
  1023. struct ipv6_sr_hdr *srh;
  1024. int len;
  1025. srh = nla_data(attrs[SEG6_LOCAL_SRH]);
  1026. len = nla_len(attrs[SEG6_LOCAL_SRH]);
  1027. /* SRH must contain at least one segment */
  1028. if (len < sizeof(*srh) + sizeof(struct in6_addr))
  1029. return -EINVAL;
  1030. if (!seg6_validate_srh(srh, len, false))
  1031. return -EINVAL;
  1032. slwt->srh = kmemdup(srh, len, GFP_KERNEL);
  1033. if (!slwt->srh)
  1034. return -ENOMEM;
  1035. slwt->headroom += len;
  1036. return 0;
  1037. }
  1038. static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1039. {
  1040. struct ipv6_sr_hdr *srh;
  1041. struct nlattr *nla;
  1042. int len;
  1043. srh = slwt->srh;
  1044. len = (srh->hdrlen + 1) << 3;
  1045. nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
  1046. if (!nla)
  1047. return -EMSGSIZE;
  1048. memcpy(nla_data(nla), srh, len);
  1049. return 0;
  1050. }
  1051. static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1052. {
  1053. int len = (a->srh->hdrlen + 1) << 3;
  1054. if (len != ((b->srh->hdrlen + 1) << 3))
  1055. return 1;
  1056. return memcmp(a->srh, b->srh, len);
  1057. }
  1058. static void destroy_attr_srh(struct seg6_local_lwt *slwt)
  1059. {
  1060. kfree(slwt->srh);
  1061. }
  1062. static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1063. struct netlink_ext_ack *extack)
  1064. {
  1065. slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
  1066. return 0;
  1067. }
  1068. static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1069. {
  1070. if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
  1071. return -EMSGSIZE;
  1072. return 0;
  1073. }
  1074. static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1075. {
  1076. if (a->table != b->table)
  1077. return 1;
  1078. return 0;
  1079. }
  1080. static struct
  1081. seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
  1082. {
  1083. #ifdef CONFIG_NET_L3_MASTER_DEV
  1084. return &slwt->dt_info;
  1085. #else
  1086. return ERR_PTR(-EOPNOTSUPP);
  1087. #endif
  1088. }
  1089. static int parse_nla_vrftable(struct nlattr **attrs,
  1090. struct seg6_local_lwt *slwt,
  1091. struct netlink_ext_ack *extack)
  1092. {
  1093. struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
  1094. if (IS_ERR(info))
  1095. return PTR_ERR(info);
  1096. info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
  1097. return 0;
  1098. }
  1099. static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1100. {
  1101. struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
  1102. if (IS_ERR(info))
  1103. return PTR_ERR(info);
  1104. if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
  1105. return -EMSGSIZE;
  1106. return 0;
  1107. }
  1108. static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1109. {
  1110. struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
  1111. struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
  1112. if (info_a->vrf_table != info_b->vrf_table)
  1113. return 1;
  1114. return 0;
  1115. }
  1116. static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1117. struct netlink_ext_ack *extack)
  1118. {
  1119. memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
  1120. sizeof(struct in_addr));
  1121. return 0;
  1122. }
  1123. static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1124. {
  1125. struct nlattr *nla;
  1126. nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
  1127. if (!nla)
  1128. return -EMSGSIZE;
  1129. memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
  1130. return 0;
  1131. }
  1132. static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1133. {
  1134. return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
  1135. }
  1136. static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1137. struct netlink_ext_ack *extack)
  1138. {
  1139. memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
  1140. sizeof(struct in6_addr));
  1141. return 0;
  1142. }
  1143. static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1144. {
  1145. struct nlattr *nla;
  1146. nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
  1147. if (!nla)
  1148. return -EMSGSIZE;
  1149. memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
  1150. return 0;
  1151. }
  1152. static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1153. {
  1154. return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
  1155. }
  1156. static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1157. struct netlink_ext_ack *extack)
  1158. {
  1159. slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
  1160. return 0;
  1161. }
  1162. static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1163. {
  1164. if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
  1165. return -EMSGSIZE;
  1166. return 0;
  1167. }
  1168. static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1169. {
  1170. if (a->iif != b->iif)
  1171. return 1;
  1172. return 0;
  1173. }
  1174. static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1175. struct netlink_ext_ack *extack)
  1176. {
  1177. slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
  1178. return 0;
  1179. }
  1180. static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1181. {
  1182. if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
  1183. return -EMSGSIZE;
  1184. return 0;
  1185. }
  1186. static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1187. {
  1188. if (a->oif != b->oif)
  1189. return 1;
  1190. return 0;
  1191. }
  1192. #define MAX_PROG_NAME 256
  1193. static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
  1194. [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
  1195. [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
  1196. .len = MAX_PROG_NAME },
  1197. };
  1198. static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1199. struct netlink_ext_ack *extack)
  1200. {
  1201. struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
  1202. struct bpf_prog *p;
  1203. int ret;
  1204. u32 fd;
  1205. ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
  1206. attrs[SEG6_LOCAL_BPF],
  1207. bpf_prog_policy, NULL);
  1208. if (ret < 0)
  1209. return ret;
  1210. if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
  1211. return -EINVAL;
  1212. slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
  1213. if (!slwt->bpf.name)
  1214. return -ENOMEM;
  1215. fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
  1216. p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
  1217. if (IS_ERR(p)) {
  1218. kfree(slwt->bpf.name);
  1219. return PTR_ERR(p);
  1220. }
  1221. slwt->bpf.prog = p;
  1222. return 0;
  1223. }
  1224. static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1225. {
  1226. struct nlattr *nest;
  1227. if (!slwt->bpf.prog)
  1228. return 0;
  1229. nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
  1230. if (!nest)
  1231. return -EMSGSIZE;
  1232. if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
  1233. return -EMSGSIZE;
  1234. if (slwt->bpf.name &&
  1235. nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
  1236. return -EMSGSIZE;
  1237. return nla_nest_end(skb, nest);
  1238. }
  1239. static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1240. {
  1241. if (!a->bpf.name && !b->bpf.name)
  1242. return 0;
  1243. if (!a->bpf.name || !b->bpf.name)
  1244. return 1;
  1245. return strcmp(a->bpf.name, b->bpf.name);
  1246. }
  1247. static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
  1248. {
  1249. kfree(slwt->bpf.name);
  1250. if (slwt->bpf.prog)
  1251. bpf_prog_put(slwt->bpf.prog);
  1252. }
  1253. static const struct
  1254. nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
  1255. [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 },
  1256. [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 },
  1257. [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 },
  1258. };
  1259. static int parse_nla_counters(struct nlattr **attrs,
  1260. struct seg6_local_lwt *slwt,
  1261. struct netlink_ext_ack *extack)
  1262. {
  1263. struct pcpu_seg6_local_counters __percpu *pcounters;
  1264. struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
  1265. int ret;
  1266. ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
  1267. attrs[SEG6_LOCAL_COUNTERS],
  1268. seg6_local_counters_policy, NULL);
  1269. if (ret < 0)
  1270. return ret;
  1271. /* basic support for SRv6 Behavior counters requires at least:
  1272. * packets, bytes and errors.
  1273. */
  1274. if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
  1275. !tb[SEG6_LOCAL_CNT_ERRORS])
  1276. return -EINVAL;
  1277. /* counters are always zero initialized */
  1278. pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
  1279. if (!pcounters)
  1280. return -ENOMEM;
  1281. slwt->pcpu_counters = pcounters;
  1282. return 0;
  1283. }
  1284. static int seg6_local_fill_nla_counters(struct sk_buff *skb,
  1285. struct seg6_local_counters *counters)
  1286. {
  1287. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
  1288. SEG6_LOCAL_CNT_PAD))
  1289. return -EMSGSIZE;
  1290. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
  1291. SEG6_LOCAL_CNT_PAD))
  1292. return -EMSGSIZE;
  1293. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
  1294. SEG6_LOCAL_CNT_PAD))
  1295. return -EMSGSIZE;
  1296. return 0;
  1297. }
  1298. static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1299. {
  1300. struct seg6_local_counters counters = { 0, 0, 0 };
  1301. struct nlattr *nest;
  1302. int rc, i;
  1303. nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
  1304. if (!nest)
  1305. return -EMSGSIZE;
  1306. for_each_possible_cpu(i) {
  1307. struct pcpu_seg6_local_counters *pcounters;
  1308. u64 packets, bytes, errors;
  1309. unsigned int start;
  1310. pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
  1311. do {
  1312. start = u64_stats_fetch_begin_irq(&pcounters->syncp);
  1313. packets = u64_stats_read(&pcounters->packets);
  1314. bytes = u64_stats_read(&pcounters->bytes);
  1315. errors = u64_stats_read(&pcounters->errors);
  1316. } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start));
  1317. counters.packets += packets;
  1318. counters.bytes += bytes;
  1319. counters.errors += errors;
  1320. }
  1321. rc = seg6_local_fill_nla_counters(skb, &counters);
  1322. if (rc < 0) {
  1323. nla_nest_cancel(skb, nest);
  1324. return rc;
  1325. }
  1326. return nla_nest_end(skb, nest);
  1327. }
  1328. static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1329. {
  1330. /* a and b are equal if both have pcpu_counters set or not */
  1331. return (!!((unsigned long)a->pcpu_counters)) ^
  1332. (!!((unsigned long)b->pcpu_counters));
  1333. }
  1334. static void destroy_attr_counters(struct seg6_local_lwt *slwt)
  1335. {
  1336. free_percpu(slwt->pcpu_counters);
  1337. }
  1338. static const
  1339. struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
  1340. [SEG6_LOCAL_FLV_OPERATION] = { .type = NLA_U32 },
  1341. [SEG6_LOCAL_FLV_LCBLOCK_BITS] = { .type = NLA_U8 },
  1342. [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 },
  1343. };
  1344. /* check whether the lengths of the Locator-Block and Locator-Node Function
  1345. * are compatible with the dimension of a C-SID container.
  1346. */
  1347. static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
  1348. {
  1349. /* Locator-Block and Locator-Node Function cannot exceed 128 bits
  1350. * (i.e. C-SID container lenghts).
  1351. */
  1352. if (next_csid_chk_cntr_bits(block_len, func_len))
  1353. return -EINVAL;
  1354. /* Locator-Block length must be greater than zero and evenly divisible
  1355. * by 8. There must be room for a Locator-Node Function, at least.
  1356. */
  1357. if (next_csid_chk_lcblock_bits(block_len))
  1358. return -EINVAL;
  1359. /* Locator-Node Function length must be greater than zero and evenly
  1360. * divisible by 8. There must be room for the Locator-Block.
  1361. */
  1362. if (next_csid_chk_lcnode_fn_bits(func_len))
  1363. return -EINVAL;
  1364. return 0;
  1365. }
  1366. static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
  1367. struct seg6_flavors_info *finfo,
  1368. struct netlink_ext_ack *extack)
  1369. {
  1370. __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
  1371. __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
  1372. int rc;
  1373. if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
  1374. block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
  1375. if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
  1376. func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
  1377. rc = seg6_chk_next_csid_cfg(block_len, func_len);
  1378. if (rc < 0) {
  1379. NL_SET_ERR_MSG(extack,
  1380. "Invalid Locator Block/Node Function lengths");
  1381. return rc;
  1382. }
  1383. finfo->lcblock_bits = block_len;
  1384. finfo->lcnode_func_bits = func_len;
  1385. return 0;
  1386. }
  1387. static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1388. struct netlink_ext_ack *extack)
  1389. {
  1390. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1391. struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
  1392. unsigned long fops;
  1393. int rc;
  1394. rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
  1395. attrs[SEG6_LOCAL_FLAVORS],
  1396. seg6_local_flavors_policy, NULL);
  1397. if (rc < 0)
  1398. return rc;
  1399. /* this attribute MUST always be present since it represents the Flavor
  1400. * operation(s) to be carried out.
  1401. */
  1402. if (!tb[SEG6_LOCAL_FLV_OPERATION])
  1403. return -EINVAL;
  1404. fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
  1405. if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
  1406. NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
  1407. return -EOPNOTSUPP;
  1408. }
  1409. finfo->flv_ops = fops;
  1410. if (seg6_next_csid_enabled(fops)) {
  1411. /* Locator-Block and Locator-Node Function lengths can be
  1412. * provided by the user space. Otherwise, default values are
  1413. * applied.
  1414. */
  1415. rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
  1416. if (rc < 0)
  1417. return rc;
  1418. }
  1419. return 0;
  1420. }
  1421. static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
  1422. struct seg6_flavors_info *finfo)
  1423. {
  1424. if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
  1425. return -EMSGSIZE;
  1426. if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
  1427. finfo->lcnode_func_bits))
  1428. return -EMSGSIZE;
  1429. return 0;
  1430. }
  1431. static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1432. {
  1433. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1434. __u32 fops = finfo->flv_ops;
  1435. struct nlattr *nest;
  1436. int rc;
  1437. nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
  1438. if (!nest)
  1439. return -EMSGSIZE;
  1440. if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
  1441. rc = -EMSGSIZE;
  1442. goto err;
  1443. }
  1444. if (seg6_next_csid_enabled(fops)) {
  1445. rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
  1446. if (rc < 0)
  1447. goto err;
  1448. }
  1449. return nla_nest_end(skb, nest);
  1450. err:
  1451. nla_nest_cancel(skb, nest);
  1452. return rc;
  1453. }
  1454. static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
  1455. struct seg6_flavors_info *finfo_b)
  1456. {
  1457. if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
  1458. return 1;
  1459. if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
  1460. return 1;
  1461. return 0;
  1462. }
  1463. static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1464. {
  1465. struct seg6_flavors_info *finfo_a = &a->flv_info;
  1466. struct seg6_flavors_info *finfo_b = &b->flv_info;
  1467. if (finfo_a->flv_ops != finfo_b->flv_ops)
  1468. return 1;
  1469. if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
  1470. if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
  1471. return 1;
  1472. }
  1473. return 0;
  1474. }
  1475. static int encap_size_flavors(struct seg6_local_lwt *slwt)
  1476. {
  1477. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1478. int nlsize;
  1479. nlsize = nla_total_size(0) + /* nest SEG6_LOCAL_FLAVORS */
  1480. nla_total_size(4); /* SEG6_LOCAL_FLV_OPERATION */
  1481. if (seg6_next_csid_enabled(finfo->flv_ops))
  1482. nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
  1483. nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
  1484. return nlsize;
  1485. }
  1486. struct seg6_action_param {
  1487. int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1488. struct netlink_ext_ack *extack);
  1489. int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
  1490. int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
  1491. /* optional destroy() callback useful for releasing resources which
  1492. * have been previously acquired in the corresponding parse()
  1493. * function.
  1494. */
  1495. void (*destroy)(struct seg6_local_lwt *slwt);
  1496. };
  1497. static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
  1498. [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
  1499. .put = put_nla_srh,
  1500. .cmp = cmp_nla_srh,
  1501. .destroy = destroy_attr_srh },
  1502. [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
  1503. .put = put_nla_table,
  1504. .cmp = cmp_nla_table },
  1505. [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
  1506. .put = put_nla_nh4,
  1507. .cmp = cmp_nla_nh4 },
  1508. [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
  1509. .put = put_nla_nh6,
  1510. .cmp = cmp_nla_nh6 },
  1511. [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
  1512. .put = put_nla_iif,
  1513. .cmp = cmp_nla_iif },
  1514. [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
  1515. .put = put_nla_oif,
  1516. .cmp = cmp_nla_oif },
  1517. [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
  1518. .put = put_nla_bpf,
  1519. .cmp = cmp_nla_bpf,
  1520. .destroy = destroy_attr_bpf },
  1521. [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
  1522. .put = put_nla_vrftable,
  1523. .cmp = cmp_nla_vrftable },
  1524. [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters,
  1525. .put = put_nla_counters,
  1526. .cmp = cmp_nla_counters,
  1527. .destroy = destroy_attr_counters },
  1528. [SEG6_LOCAL_FLAVORS] = { .parse = parse_nla_flavors,
  1529. .put = put_nla_flavors,
  1530. .cmp = cmp_nla_flavors },
  1531. };
  1532. /* call the destroy() callback (if available) for each set attribute in
  1533. * @parsed_attrs, starting from the first attribute up to the @max_parsed
  1534. * (excluded) attribute.
  1535. */
  1536. static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
  1537. struct seg6_local_lwt *slwt)
  1538. {
  1539. struct seg6_action_param *param;
  1540. int i;
  1541. /* Every required seg6local attribute is identified by an ID which is
  1542. * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
  1543. *
  1544. * We scan the 'parsed_attrs' bitmask, starting from the first attribute
  1545. * up to the @max_parsed (excluded) attribute.
  1546. * For each set attribute, we retrieve the corresponding destroy()
  1547. * callback. If the callback is not available, then we skip to the next
  1548. * attribute; otherwise, we call the destroy() callback.
  1549. */
  1550. for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
  1551. if (!(parsed_attrs & SEG6_F_ATTR(i)))
  1552. continue;
  1553. param = &seg6_action_params[i];
  1554. if (param->destroy)
  1555. param->destroy(slwt);
  1556. }
  1557. }
  1558. /* release all the resources that may have been acquired during parsing
  1559. * operations.
  1560. */
  1561. static void destroy_attrs(struct seg6_local_lwt *slwt)
  1562. {
  1563. unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  1564. __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
  1565. }
  1566. static int parse_nla_optional_attrs(struct nlattr **attrs,
  1567. struct seg6_local_lwt *slwt,
  1568. struct netlink_ext_ack *extack)
  1569. {
  1570. struct seg6_action_desc *desc = slwt->desc;
  1571. unsigned long parsed_optattrs = 0;
  1572. struct seg6_action_param *param;
  1573. int err, i;
  1574. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
  1575. if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
  1576. continue;
  1577. /* once here, the i-th attribute is provided by the
  1578. * userspace AND it is identified optional as well.
  1579. */
  1580. param = &seg6_action_params[i];
  1581. err = param->parse(attrs, slwt, extack);
  1582. if (err < 0)
  1583. goto parse_optattrs_err;
  1584. /* current attribute has been correctly parsed */
  1585. parsed_optattrs |= SEG6_F_ATTR(i);
  1586. }
  1587. /* store in the tunnel state all the optional attributed successfully
  1588. * parsed.
  1589. */
  1590. slwt->parsed_optattrs = parsed_optattrs;
  1591. return 0;
  1592. parse_optattrs_err:
  1593. __destroy_attrs(parsed_optattrs, i, slwt);
  1594. return err;
  1595. }
  1596. /* call the custom constructor of the behavior during its initialization phase
  1597. * and after that all its attributes have been parsed successfully.
  1598. */
  1599. static int
  1600. seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
  1601. struct netlink_ext_ack *extack)
  1602. {
  1603. struct seg6_action_desc *desc = slwt->desc;
  1604. struct seg6_local_lwtunnel_ops *ops;
  1605. ops = &desc->slwt_ops;
  1606. if (!ops->build_state)
  1607. return 0;
  1608. return ops->build_state(slwt, cfg, extack);
  1609. }
  1610. /* call the custom destructor of the behavior which is invoked before the
  1611. * tunnel is going to be destroyed.
  1612. */
  1613. static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
  1614. {
  1615. struct seg6_action_desc *desc = slwt->desc;
  1616. struct seg6_local_lwtunnel_ops *ops;
  1617. ops = &desc->slwt_ops;
  1618. if (!ops->destroy_state)
  1619. return;
  1620. ops->destroy_state(slwt);
  1621. }
  1622. static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1623. struct netlink_ext_ack *extack)
  1624. {
  1625. struct seg6_action_param *param;
  1626. struct seg6_action_desc *desc;
  1627. unsigned long invalid_attrs;
  1628. int i, err;
  1629. desc = __get_action_desc(slwt->action);
  1630. if (!desc)
  1631. return -EINVAL;
  1632. if (!desc->input)
  1633. return -EOPNOTSUPP;
  1634. slwt->desc = desc;
  1635. slwt->headroom += desc->static_headroom;
  1636. /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
  1637. * disjoined, this allow us to release acquired resources by optional
  1638. * attributes and by required attributes independently from each other
  1639. * without any interference.
  1640. * In other terms, we are sure that we do not release some the acquired
  1641. * resources twice.
  1642. *
  1643. * Note that if an attribute is configured both as required and as
  1644. * optional, it means that the user has messed something up in the
  1645. * seg6_action_table. Therefore, this check is required for SRv6
  1646. * behaviors to work properly.
  1647. */
  1648. invalid_attrs = desc->attrs & desc->optattrs;
  1649. if (invalid_attrs) {
  1650. WARN_ONCE(1,
  1651. "An attribute cannot be both required AND optional");
  1652. return -EINVAL;
  1653. }
  1654. /* parse the required attributes */
  1655. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  1656. if (desc->attrs & SEG6_F_ATTR(i)) {
  1657. if (!attrs[i])
  1658. return -EINVAL;
  1659. param = &seg6_action_params[i];
  1660. err = param->parse(attrs, slwt, extack);
  1661. if (err < 0)
  1662. goto parse_attrs_err;
  1663. }
  1664. }
  1665. /* parse the optional attributes, if any */
  1666. err = parse_nla_optional_attrs(attrs, slwt, extack);
  1667. if (err < 0)
  1668. goto parse_attrs_err;
  1669. return 0;
  1670. parse_attrs_err:
  1671. /* release any resource that may have been acquired during the i-1
  1672. * parse() operations.
  1673. */
  1674. __destroy_attrs(desc->attrs, i, slwt);
  1675. return err;
  1676. }
  1677. static int seg6_local_build_state(struct net *net, struct nlattr *nla,
  1678. unsigned int family, const void *cfg,
  1679. struct lwtunnel_state **ts,
  1680. struct netlink_ext_ack *extack)
  1681. {
  1682. struct nlattr *tb[SEG6_LOCAL_MAX + 1];
  1683. struct lwtunnel_state *newts;
  1684. struct seg6_local_lwt *slwt;
  1685. int err;
  1686. if (family != AF_INET6)
  1687. return -EINVAL;
  1688. err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
  1689. seg6_local_policy, extack);
  1690. if (err < 0)
  1691. return err;
  1692. if (!tb[SEG6_LOCAL_ACTION])
  1693. return -EINVAL;
  1694. newts = lwtunnel_state_alloc(sizeof(*slwt));
  1695. if (!newts)
  1696. return -ENOMEM;
  1697. slwt = seg6_local_lwtunnel(newts);
  1698. slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
  1699. err = parse_nla_action(tb, slwt, extack);
  1700. if (err < 0)
  1701. goto out_free;
  1702. err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
  1703. if (err < 0)
  1704. goto out_destroy_attrs;
  1705. newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
  1706. newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
  1707. newts->headroom = slwt->headroom;
  1708. *ts = newts;
  1709. return 0;
  1710. out_destroy_attrs:
  1711. destroy_attrs(slwt);
  1712. out_free:
  1713. kfree(newts);
  1714. return err;
  1715. }
  1716. static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
  1717. {
  1718. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  1719. seg6_local_lwtunnel_destroy_state(slwt);
  1720. destroy_attrs(slwt);
  1721. return;
  1722. }
  1723. static int seg6_local_fill_encap(struct sk_buff *skb,
  1724. struct lwtunnel_state *lwt)
  1725. {
  1726. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  1727. struct seg6_action_param *param;
  1728. unsigned long attrs;
  1729. int i, err;
  1730. if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
  1731. return -EMSGSIZE;
  1732. attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  1733. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  1734. if (attrs & SEG6_F_ATTR(i)) {
  1735. param = &seg6_action_params[i];
  1736. err = param->put(skb, slwt);
  1737. if (err < 0)
  1738. return err;
  1739. }
  1740. }
  1741. return 0;
  1742. }
  1743. static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
  1744. {
  1745. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  1746. unsigned long attrs;
  1747. int nlsize;
  1748. nlsize = nla_total_size(4); /* action */
  1749. attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  1750. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
  1751. nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
  1752. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
  1753. nlsize += nla_total_size(4);
  1754. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
  1755. nlsize += nla_total_size(4);
  1756. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
  1757. nlsize += nla_total_size(16);
  1758. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
  1759. nlsize += nla_total_size(4);
  1760. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
  1761. nlsize += nla_total_size(4);
  1762. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
  1763. nlsize += nla_total_size(sizeof(struct nlattr)) +
  1764. nla_total_size(MAX_PROG_NAME) +
  1765. nla_total_size(4);
  1766. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
  1767. nlsize += nla_total_size(4);
  1768. if (attrs & SEG6_F_LOCAL_COUNTERS)
  1769. nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
  1770. /* SEG6_LOCAL_CNT_PACKETS */
  1771. nla_total_size_64bit(sizeof(__u64)) +
  1772. /* SEG6_LOCAL_CNT_BYTES */
  1773. nla_total_size_64bit(sizeof(__u64)) +
  1774. /* SEG6_LOCAL_CNT_ERRORS */
  1775. nla_total_size_64bit(sizeof(__u64));
  1776. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
  1777. nlsize += encap_size_flavors(slwt);
  1778. return nlsize;
  1779. }
  1780. static int seg6_local_cmp_encap(struct lwtunnel_state *a,
  1781. struct lwtunnel_state *b)
  1782. {
  1783. struct seg6_local_lwt *slwt_a, *slwt_b;
  1784. struct seg6_action_param *param;
  1785. unsigned long attrs_a, attrs_b;
  1786. int i;
  1787. slwt_a = seg6_local_lwtunnel(a);
  1788. slwt_b = seg6_local_lwtunnel(b);
  1789. if (slwt_a->action != slwt_b->action)
  1790. return 1;
  1791. attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
  1792. attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
  1793. if (attrs_a != attrs_b)
  1794. return 1;
  1795. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  1796. if (attrs_a & SEG6_F_ATTR(i)) {
  1797. param = &seg6_action_params[i];
  1798. if (param->cmp(slwt_a, slwt_b))
  1799. return 1;
  1800. }
  1801. }
  1802. return 0;
  1803. }
  1804. static const struct lwtunnel_encap_ops seg6_local_ops = {
  1805. .build_state = seg6_local_build_state,
  1806. .destroy_state = seg6_local_destroy_state,
  1807. .input = seg6_local_input,
  1808. .fill_encap = seg6_local_fill_encap,
  1809. .get_encap_size = seg6_local_get_encap_size,
  1810. .cmp_encap = seg6_local_cmp_encap,
  1811. .owner = THIS_MODULE,
  1812. };
  1813. int __init seg6_local_init(void)
  1814. {
  1815. /* If the max total number of defined attributes is reached, then your
  1816. * kernel build stops here.
  1817. *
  1818. * This check is required to avoid arithmetic overflows when processing
  1819. * behavior attributes and the maximum number of defined attributes
  1820. * exceeds the allowed value.
  1821. */
  1822. BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
  1823. /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
  1824. * bits) have been changed with invalid values, kernel build stops
  1825. * here.
  1826. */
  1827. BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
  1828. SEG6_LOCAL_LCNODE_FN_DBITS));
  1829. BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
  1830. BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
  1831. return lwtunnel_encap_add_ops(&seg6_local_ops,
  1832. LWTUNNEL_ENCAP_SEG6_LOCAL);
  1833. }
  1834. void seg6_local_exit(void)
  1835. {
  1836. lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
  1837. }