nexthop.c 91 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Generic nexthop implementation
  3. *
  4. * Copyright (c) 2017-19 Cumulus Networks
  5. * Copyright (c) 2017-19 David Ahern <[email protected]>
  6. */
  7. #include <linux/nexthop.h>
  8. #include <linux/rtnetlink.h>
  9. #include <linux/slab.h>
  10. #include <linux/vmalloc.h>
  11. #include <net/arp.h>
  12. #include <net/ipv6_stubs.h>
  13. #include <net/lwtunnel.h>
  14. #include <net/ndisc.h>
  15. #include <net/nexthop.h>
  16. #include <net/route.h>
  17. #include <net/sock.h>
  18. #define NH_RES_DEFAULT_IDLE_TIMER (120 * HZ)
  19. #define NH_RES_DEFAULT_UNBALANCED_TIMER 0 /* No forced rebalancing. */
  20. static void remove_nexthop(struct net *net, struct nexthop *nh,
  21. struct nl_info *nlinfo);
  22. #define NH_DEV_HASHBITS 8
  23. #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
  24. static const struct nla_policy rtm_nh_policy_new[] = {
  25. [NHA_ID] = { .type = NLA_U32 },
  26. [NHA_GROUP] = { .type = NLA_BINARY },
  27. [NHA_GROUP_TYPE] = { .type = NLA_U16 },
  28. [NHA_BLACKHOLE] = { .type = NLA_FLAG },
  29. [NHA_OIF] = { .type = NLA_U32 },
  30. [NHA_GATEWAY] = { .type = NLA_BINARY },
  31. [NHA_ENCAP_TYPE] = { .type = NLA_U16 },
  32. [NHA_ENCAP] = { .type = NLA_NESTED },
  33. [NHA_FDB] = { .type = NLA_FLAG },
  34. [NHA_RES_GROUP] = { .type = NLA_NESTED },
  35. };
  36. static const struct nla_policy rtm_nh_policy_get[] = {
  37. [NHA_ID] = { .type = NLA_U32 },
  38. };
  39. static const struct nla_policy rtm_nh_policy_dump[] = {
  40. [NHA_OIF] = { .type = NLA_U32 },
  41. [NHA_GROUPS] = { .type = NLA_FLAG },
  42. [NHA_MASTER] = { .type = NLA_U32 },
  43. [NHA_FDB] = { .type = NLA_FLAG },
  44. };
  45. static const struct nla_policy rtm_nh_res_policy_new[] = {
  46. [NHA_RES_GROUP_BUCKETS] = { .type = NLA_U16 },
  47. [NHA_RES_GROUP_IDLE_TIMER] = { .type = NLA_U32 },
  48. [NHA_RES_GROUP_UNBALANCED_TIMER] = { .type = NLA_U32 },
  49. };
  50. static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
  51. [NHA_ID] = { .type = NLA_U32 },
  52. [NHA_OIF] = { .type = NLA_U32 },
  53. [NHA_MASTER] = { .type = NLA_U32 },
  54. [NHA_RES_BUCKET] = { .type = NLA_NESTED },
  55. };
  56. static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
  57. [NHA_RES_BUCKET_NH_ID] = { .type = NLA_U32 },
  58. };
  59. static const struct nla_policy rtm_nh_policy_get_bucket[] = {
  60. [NHA_ID] = { .type = NLA_U32 },
  61. [NHA_RES_BUCKET] = { .type = NLA_NESTED },
  62. };
  63. static const struct nla_policy rtm_nh_res_bucket_policy_get[] = {
  64. [NHA_RES_BUCKET_INDEX] = { .type = NLA_U16 },
  65. };
  66. static bool nexthop_notifiers_is_empty(struct net *net)
  67. {
  68. return !net->nexthop.notifier_chain.head;
  69. }
  70. static void
  71. __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
  72. const struct nh_info *nhi)
  73. {
  74. nh_info->dev = nhi->fib_nhc.nhc_dev;
  75. nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
  76. if (nh_info->gw_family == AF_INET)
  77. nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
  78. else if (nh_info->gw_family == AF_INET6)
  79. nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
  80. nh_info->is_reject = nhi->reject_nh;
  81. nh_info->is_fdb = nhi->fdb_nh;
  82. nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
  83. }
  84. static int nh_notifier_single_info_init(struct nh_notifier_info *info,
  85. const struct nexthop *nh)
  86. {
  87. struct nh_info *nhi = rtnl_dereference(nh->nh_info);
  88. info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
  89. info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
  90. if (!info->nh)
  91. return -ENOMEM;
  92. __nh_notifier_single_info_init(info->nh, nhi);
  93. return 0;
  94. }
  95. static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
  96. {
  97. kfree(info->nh);
  98. }
  99. static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
  100. struct nh_group *nhg)
  101. {
  102. u16 num_nh = nhg->num_nh;
  103. int i;
  104. info->type = NH_NOTIFIER_INFO_TYPE_GRP;
  105. info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
  106. GFP_KERNEL);
  107. if (!info->nh_grp)
  108. return -ENOMEM;
  109. info->nh_grp->num_nh = num_nh;
  110. info->nh_grp->is_fdb = nhg->fdb_nh;
  111. for (i = 0; i < num_nh; i++) {
  112. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  113. struct nh_info *nhi;
  114. nhi = rtnl_dereference(nhge->nh->nh_info);
  115. info->nh_grp->nh_entries[i].id = nhge->nh->id;
  116. info->nh_grp->nh_entries[i].weight = nhge->weight;
  117. __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
  118. nhi);
  119. }
  120. return 0;
  121. }
  122. static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
  123. struct nh_group *nhg)
  124. {
  125. struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
  126. u16 num_nh_buckets = res_table->num_nh_buckets;
  127. unsigned long size;
  128. u16 i;
  129. info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
  130. size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
  131. info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
  132. __GFP_NOWARN);
  133. if (!info->nh_res_table)
  134. return -ENOMEM;
  135. info->nh_res_table->num_nh_buckets = num_nh_buckets;
  136. for (i = 0; i < num_nh_buckets; i++) {
  137. struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
  138. struct nh_grp_entry *nhge;
  139. struct nh_info *nhi;
  140. nhge = rtnl_dereference(bucket->nh_entry);
  141. nhi = rtnl_dereference(nhge->nh->nh_info);
  142. __nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
  143. nhi);
  144. }
  145. return 0;
  146. }
  147. static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
  148. const struct nexthop *nh)
  149. {
  150. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  151. if (nhg->hash_threshold)
  152. return nh_notifier_mpath_info_init(info, nhg);
  153. else if (nhg->resilient)
  154. return nh_notifier_res_table_info_init(info, nhg);
  155. return -EINVAL;
  156. }
  157. static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
  158. const struct nexthop *nh)
  159. {
  160. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  161. if (nhg->hash_threshold)
  162. kfree(info->nh_grp);
  163. else if (nhg->resilient)
  164. vfree(info->nh_res_table);
  165. }
  166. static int nh_notifier_info_init(struct nh_notifier_info *info,
  167. const struct nexthop *nh)
  168. {
  169. info->id = nh->id;
  170. if (nh->is_group)
  171. return nh_notifier_grp_info_init(info, nh);
  172. else
  173. return nh_notifier_single_info_init(info, nh);
  174. }
  175. static void nh_notifier_info_fini(struct nh_notifier_info *info,
  176. const struct nexthop *nh)
  177. {
  178. if (nh->is_group)
  179. nh_notifier_grp_info_fini(info, nh);
  180. else
  181. nh_notifier_single_info_fini(info);
  182. }
  183. static int call_nexthop_notifiers(struct net *net,
  184. enum nexthop_event_type event_type,
  185. struct nexthop *nh,
  186. struct netlink_ext_ack *extack)
  187. {
  188. struct nh_notifier_info info = {
  189. .net = net,
  190. .extack = extack,
  191. };
  192. int err;
  193. ASSERT_RTNL();
  194. if (nexthop_notifiers_is_empty(net))
  195. return 0;
  196. err = nh_notifier_info_init(&info, nh);
  197. if (err) {
  198. NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
  199. return err;
  200. }
  201. err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
  202. event_type, &info);
  203. nh_notifier_info_fini(&info, nh);
  204. return notifier_to_errno(err);
  205. }
  206. static int
  207. nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
  208. bool force, unsigned int *p_idle_timer_ms)
  209. {
  210. struct nh_res_table *res_table;
  211. struct nh_group *nhg;
  212. struct nexthop *nh;
  213. int err = 0;
  214. /* When 'force' is false, nexthop bucket replacement is performed
  215. * because the bucket was deemed to be idle. In this case, capable
  216. * listeners can choose to perform an atomic replacement: The bucket is
  217. * only replaced if it is inactive. However, if the idle timer interval
  218. * is smaller than the interval in which a listener is querying
  219. * buckets' activity from the device, then atomic replacement should
  220. * not be tried. Pass the idle timer value to listeners, so that they
  221. * could determine which type of replacement to perform.
  222. */
  223. if (force) {
  224. *p_idle_timer_ms = 0;
  225. return 0;
  226. }
  227. rcu_read_lock();
  228. nh = nexthop_find_by_id(info->net, info->id);
  229. if (!nh) {
  230. err = -EINVAL;
  231. goto out;
  232. }
  233. nhg = rcu_dereference(nh->nh_grp);
  234. res_table = rcu_dereference(nhg->res_table);
  235. *p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);
  236. out:
  237. rcu_read_unlock();
  238. return err;
  239. }
  240. static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
  241. u16 bucket_index, bool force,
  242. struct nh_info *oldi,
  243. struct nh_info *newi)
  244. {
  245. unsigned int idle_timer_ms;
  246. int err;
  247. err = nh_notifier_res_bucket_idle_timer_get(info, force,
  248. &idle_timer_ms);
  249. if (err)
  250. return err;
  251. info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
  252. info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
  253. GFP_KERNEL);
  254. if (!info->nh_res_bucket)
  255. return -ENOMEM;
  256. info->nh_res_bucket->bucket_index = bucket_index;
  257. info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
  258. info->nh_res_bucket->force = force;
  259. __nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
  260. __nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
  261. return 0;
  262. }
  263. static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
  264. {
  265. kfree(info->nh_res_bucket);
  266. }
  267. static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
  268. u16 bucket_index, bool force,
  269. struct nh_info *oldi,
  270. struct nh_info *newi,
  271. struct netlink_ext_ack *extack)
  272. {
  273. struct nh_notifier_info info = {
  274. .net = net,
  275. .extack = extack,
  276. .id = nhg_id,
  277. };
  278. int err;
  279. if (nexthop_notifiers_is_empty(net))
  280. return 0;
  281. err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
  282. oldi, newi);
  283. if (err)
  284. return err;
  285. err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
  286. NEXTHOP_EVENT_BUCKET_REPLACE, &info);
  287. nh_notifier_res_bucket_info_fini(&info);
  288. return notifier_to_errno(err);
  289. }
  290. /* There are three users of RES_TABLE, and NHs etc. referenced from there:
  291. *
  292. * 1) a collection of callbacks for NH maintenance. This operates under
  293. * RTNL,
  294. * 2) the delayed work that gradually balances the resilient table,
  295. * 3) and nexthop_select_path(), operating under RCU.
  296. *
  297. * Both the delayed work and the RTNL block are writers, and need to
  298. * maintain mutual exclusion. Since there are only two and well-known
  299. * writers for each table, the RTNL code can make sure it has exclusive
  300. * access thus:
  301. *
  302. * - Have the DW operate without locking;
  303. * - synchronously cancel the DW;
  304. * - do the writing;
  305. * - if the write was not actually a delete, call upkeep, which schedules
  306. * DW again if necessary.
  307. *
  308. * The functions that are always called from the RTNL context use
  309. * rtnl_dereference(). The functions that can also be called from the DW do
  310. * a raw dereference and rely on the above mutual exclusion scheme.
  311. */
  312. #define nh_res_dereference(p) (rcu_dereference_raw(p))
  313. static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
  314. u16 bucket_index, bool force,
  315. struct nexthop *old_nh,
  316. struct nexthop *new_nh,
  317. struct netlink_ext_ack *extack)
  318. {
  319. struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
  320. struct nh_info *newi = nh_res_dereference(new_nh->nh_info);
  321. return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
  322. force, oldi, newi, extack);
  323. }
  324. static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
  325. struct netlink_ext_ack *extack)
  326. {
  327. struct nh_notifier_info info = {
  328. .net = net,
  329. .extack = extack,
  330. };
  331. struct nh_group *nhg;
  332. int err;
  333. ASSERT_RTNL();
  334. if (nexthop_notifiers_is_empty(net))
  335. return 0;
  336. /* At this point, the nexthop buckets are still not populated. Only
  337. * emit a notification with the logical nexthops, so that a listener
  338. * could potentially veto it in case of unsupported configuration.
  339. */
  340. nhg = rtnl_dereference(nh->nh_grp);
  341. err = nh_notifier_mpath_info_init(&info, nhg);
  342. if (err) {
  343. NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
  344. return err;
  345. }
  346. err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
  347. NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
  348. &info);
  349. kfree(info.nh_grp);
  350. return notifier_to_errno(err);
  351. }
  352. static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
  353. enum nexthop_event_type event_type,
  354. struct nexthop *nh,
  355. struct netlink_ext_ack *extack)
  356. {
  357. struct nh_notifier_info info = {
  358. .net = net,
  359. .extack = extack,
  360. };
  361. int err;
  362. err = nh_notifier_info_init(&info, nh);
  363. if (err)
  364. return err;
  365. err = nb->notifier_call(nb, event_type, &info);
  366. nh_notifier_info_fini(&info, nh);
  367. return notifier_to_errno(err);
  368. }
  369. static unsigned int nh_dev_hashfn(unsigned int val)
  370. {
  371. unsigned int mask = NH_DEV_HASHSIZE - 1;
  372. return (val ^
  373. (val >> NH_DEV_HASHBITS) ^
  374. (val >> (NH_DEV_HASHBITS * 2))) & mask;
  375. }
  376. static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
  377. {
  378. struct net_device *dev = nhi->fib_nhc.nhc_dev;
  379. struct hlist_head *head;
  380. unsigned int hash;
  381. WARN_ON(!dev);
  382. hash = nh_dev_hashfn(dev->ifindex);
  383. head = &net->nexthop.devhash[hash];
  384. hlist_add_head(&nhi->dev_hash, head);
  385. }
  386. static void nexthop_free_group(struct nexthop *nh)
  387. {
  388. struct nh_group *nhg;
  389. int i;
  390. nhg = rcu_dereference_raw(nh->nh_grp);
  391. for (i = 0; i < nhg->num_nh; ++i) {
  392. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  393. WARN_ON(!list_empty(&nhge->nh_list));
  394. nexthop_put(nhge->nh);
  395. }
  396. WARN_ON(nhg->spare == nhg);
  397. if (nhg->resilient)
  398. vfree(rcu_dereference_raw(nhg->res_table));
  399. kfree(nhg->spare);
  400. kfree(nhg);
  401. }
  402. static void nexthop_free_single(struct nexthop *nh)
  403. {
  404. struct nh_info *nhi;
  405. nhi = rcu_dereference_raw(nh->nh_info);
  406. switch (nhi->family) {
  407. case AF_INET:
  408. fib_nh_release(nh->net, &nhi->fib_nh);
  409. break;
  410. case AF_INET6:
  411. ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
  412. break;
  413. }
  414. kfree(nhi);
  415. }
  416. void nexthop_free_rcu(struct rcu_head *head)
  417. {
  418. struct nexthop *nh = container_of(head, struct nexthop, rcu);
  419. if (nh->is_group)
  420. nexthop_free_group(nh);
  421. else
  422. nexthop_free_single(nh);
  423. kfree(nh);
  424. }
  425. EXPORT_SYMBOL_GPL(nexthop_free_rcu);
  426. static struct nexthop *nexthop_alloc(void)
  427. {
  428. struct nexthop *nh;
  429. nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
  430. if (nh) {
  431. INIT_LIST_HEAD(&nh->fi_list);
  432. INIT_LIST_HEAD(&nh->f6i_list);
  433. INIT_LIST_HEAD(&nh->grp_list);
  434. INIT_LIST_HEAD(&nh->fdb_list);
  435. }
  436. return nh;
  437. }
  438. static struct nh_group *nexthop_grp_alloc(u16 num_nh)
  439. {
  440. struct nh_group *nhg;
  441. nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
  442. if (nhg)
  443. nhg->num_nh = num_nh;
  444. return nhg;
  445. }
  446. static void nh_res_table_upkeep_dw(struct work_struct *work);
  447. static struct nh_res_table *
  448. nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
  449. {
  450. const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
  451. struct nh_res_table *res_table;
  452. unsigned long size;
  453. size = struct_size(res_table, nh_buckets, num_nh_buckets);
  454. res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
  455. if (!res_table)
  456. return NULL;
  457. res_table->net = net;
  458. res_table->nhg_id = nhg_id;
  459. INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
  460. INIT_LIST_HEAD(&res_table->uw_nh_entries);
  461. res_table->idle_timer = cfg->nh_grp_res_idle_timer;
  462. res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
  463. res_table->num_nh_buckets = num_nh_buckets;
  464. return res_table;
  465. }
  466. static void nh_base_seq_inc(struct net *net)
  467. {
  468. while (++net->nexthop.seq == 0)
  469. ;
  470. }
  471. /* no reference taken; rcu lock or rtnl must be held */
  472. struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
  473. {
  474. struct rb_node **pp, *parent = NULL, *next;
  475. pp = &net->nexthop.rb_root.rb_node;
  476. while (1) {
  477. struct nexthop *nh;
  478. next = rcu_dereference_raw(*pp);
  479. if (!next)
  480. break;
  481. parent = next;
  482. nh = rb_entry(parent, struct nexthop, rb_node);
  483. if (id < nh->id)
  484. pp = &next->rb_left;
  485. else if (id > nh->id)
  486. pp = &next->rb_right;
  487. else
  488. return nh;
  489. }
  490. return NULL;
  491. }
  492. EXPORT_SYMBOL_GPL(nexthop_find_by_id);
  493. /* used for auto id allocation; called with rtnl held */
  494. static u32 nh_find_unused_id(struct net *net)
  495. {
  496. u32 id_start = net->nexthop.last_id_allocated;
  497. while (1) {
  498. net->nexthop.last_id_allocated++;
  499. if (net->nexthop.last_id_allocated == id_start)
  500. break;
  501. if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
  502. return net->nexthop.last_id_allocated;
  503. }
  504. return 0;
  505. }
  506. static void nh_res_time_set_deadline(unsigned long next_time,
  507. unsigned long *deadline)
  508. {
  509. if (time_before(next_time, *deadline))
  510. *deadline = next_time;
  511. }
  512. static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table)
  513. {
  514. if (list_empty(&res_table->uw_nh_entries))
  515. return 0;
  516. return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since);
  517. }
  518. static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
  519. {
  520. struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
  521. struct nlattr *nest;
  522. nest = nla_nest_start(skb, NHA_RES_GROUP);
  523. if (!nest)
  524. return -EMSGSIZE;
  525. if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS,
  526. res_table->num_nh_buckets) ||
  527. nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER,
  528. jiffies_to_clock_t(res_table->idle_timer)) ||
  529. nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER,
  530. jiffies_to_clock_t(res_table->unbalanced_timer)) ||
  531. nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME,
  532. nh_res_table_unbalanced_time(res_table),
  533. NHA_RES_GROUP_PAD))
  534. goto nla_put_failure;
  535. nla_nest_end(skb, nest);
  536. return 0;
  537. nla_put_failure:
  538. nla_nest_cancel(skb, nest);
  539. return -EMSGSIZE;
  540. }
  541. static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
  542. {
  543. struct nexthop_grp *p;
  544. size_t len = nhg->num_nh * sizeof(*p);
  545. struct nlattr *nla;
  546. u16 group_type = 0;
  547. int i;
  548. if (nhg->hash_threshold)
  549. group_type = NEXTHOP_GRP_TYPE_MPATH;
  550. else if (nhg->resilient)
  551. group_type = NEXTHOP_GRP_TYPE_RES;
  552. if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
  553. goto nla_put_failure;
  554. nla = nla_reserve(skb, NHA_GROUP, len);
  555. if (!nla)
  556. goto nla_put_failure;
  557. p = nla_data(nla);
  558. for (i = 0; i < nhg->num_nh; ++i) {
  559. p->id = nhg->nh_entries[i].nh->id;
  560. p->weight = nhg->nh_entries[i].weight - 1;
  561. p += 1;
  562. }
  563. if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
  564. goto nla_put_failure;
  565. return 0;
  566. nla_put_failure:
  567. return -EMSGSIZE;
  568. }
  569. static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
  570. int event, u32 portid, u32 seq, unsigned int nlflags)
  571. {
  572. struct fib6_nh *fib6_nh;
  573. struct fib_nh *fib_nh;
  574. struct nlmsghdr *nlh;
  575. struct nh_info *nhi;
  576. struct nhmsg *nhm;
  577. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
  578. if (!nlh)
  579. return -EMSGSIZE;
  580. nhm = nlmsg_data(nlh);
  581. nhm->nh_family = AF_UNSPEC;
  582. nhm->nh_flags = nh->nh_flags;
  583. nhm->nh_protocol = nh->protocol;
  584. nhm->nh_scope = 0;
  585. nhm->resvd = 0;
  586. if (nla_put_u32(skb, NHA_ID, nh->id))
  587. goto nla_put_failure;
  588. if (nh->is_group) {
  589. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  590. if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
  591. goto nla_put_failure;
  592. if (nla_put_nh_group(skb, nhg))
  593. goto nla_put_failure;
  594. goto out;
  595. }
  596. nhi = rtnl_dereference(nh->nh_info);
  597. nhm->nh_family = nhi->family;
  598. if (nhi->reject_nh) {
  599. if (nla_put_flag(skb, NHA_BLACKHOLE))
  600. goto nla_put_failure;
  601. goto out;
  602. } else if (nhi->fdb_nh) {
  603. if (nla_put_flag(skb, NHA_FDB))
  604. goto nla_put_failure;
  605. } else {
  606. const struct net_device *dev;
  607. dev = nhi->fib_nhc.nhc_dev;
  608. if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
  609. goto nla_put_failure;
  610. }
  611. nhm->nh_scope = nhi->fib_nhc.nhc_scope;
  612. switch (nhi->family) {
  613. case AF_INET:
  614. fib_nh = &nhi->fib_nh;
  615. if (fib_nh->fib_nh_gw_family &&
  616. nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
  617. goto nla_put_failure;
  618. break;
  619. case AF_INET6:
  620. fib6_nh = &nhi->fib6_nh;
  621. if (fib6_nh->fib_nh_gw_family &&
  622. nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
  623. goto nla_put_failure;
  624. break;
  625. }
  626. if (nhi->fib_nhc.nhc_lwtstate &&
  627. lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
  628. NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
  629. goto nla_put_failure;
  630. out:
  631. nlmsg_end(skb, nlh);
  632. return 0;
  633. nla_put_failure:
  634. nlmsg_cancel(skb, nlh);
  635. return -EMSGSIZE;
  636. }
  637. static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
  638. {
  639. return nla_total_size(0) + /* NHA_RES_GROUP */
  640. nla_total_size(2) + /* NHA_RES_GROUP_BUCKETS */
  641. nla_total_size(4) + /* NHA_RES_GROUP_IDLE_TIMER */
  642. nla_total_size(4) + /* NHA_RES_GROUP_UNBALANCED_TIMER */
  643. nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
  644. }
  645. static size_t nh_nlmsg_size_grp(struct nexthop *nh)
  646. {
  647. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  648. size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
  649. size_t tot = nla_total_size(sz) +
  650. nla_total_size(2); /* NHA_GROUP_TYPE */
  651. if (nhg->resilient)
  652. tot += nh_nlmsg_size_grp_res(nhg);
  653. return tot;
  654. }
  655. static size_t nh_nlmsg_size_single(struct nexthop *nh)
  656. {
  657. struct nh_info *nhi = rtnl_dereference(nh->nh_info);
  658. size_t sz;
  659. /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
  660. * are mutually exclusive
  661. */
  662. sz = nla_total_size(4); /* NHA_OIF */
  663. switch (nhi->family) {
  664. case AF_INET:
  665. if (nhi->fib_nh.fib_nh_gw_family)
  666. sz += nla_total_size(4); /* NHA_GATEWAY */
  667. break;
  668. case AF_INET6:
  669. /* NHA_GATEWAY */
  670. if (nhi->fib6_nh.fib_nh_gw_family)
  671. sz += nla_total_size(sizeof(const struct in6_addr));
  672. break;
  673. }
  674. if (nhi->fib_nhc.nhc_lwtstate) {
  675. sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
  676. sz += nla_total_size(2); /* NHA_ENCAP_TYPE */
  677. }
  678. return sz;
  679. }
  680. static size_t nh_nlmsg_size(struct nexthop *nh)
  681. {
  682. size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
  683. sz += nla_total_size(4); /* NHA_ID */
  684. if (nh->is_group)
  685. sz += nh_nlmsg_size_grp(nh);
  686. else
  687. sz += nh_nlmsg_size_single(nh);
  688. return sz;
  689. }
  690. static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
  691. {
  692. unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
  693. u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
  694. struct sk_buff *skb;
  695. int err = -ENOBUFS;
  696. skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
  697. if (!skb)
  698. goto errout;
  699. err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
  700. if (err < 0) {
  701. /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
  702. WARN_ON(err == -EMSGSIZE);
  703. kfree_skb(skb);
  704. goto errout;
  705. }
  706. rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
  707. info->nlh, gfp_any());
  708. return;
  709. errout:
  710. if (err < 0)
  711. rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
  712. }
  713. static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
  714. {
  715. return (unsigned long)atomic_long_read(&bucket->used_time);
  716. }
  717. static unsigned long
  718. nh_res_bucket_idle_point(const struct nh_res_table *res_table,
  719. const struct nh_res_bucket *bucket,
  720. unsigned long now)
  721. {
  722. unsigned long time = nh_res_bucket_used_time(bucket);
  723. /* Bucket was not used since it was migrated. The idle time is now. */
  724. if (time == bucket->migrated_time)
  725. return now;
  726. return time + res_table->idle_timer;
  727. }
  728. static unsigned long
  729. nh_res_table_unb_point(const struct nh_res_table *res_table)
  730. {
  731. return res_table->unbalanced_since + res_table->unbalanced_timer;
  732. }
  733. static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
  734. struct nh_res_bucket *bucket)
  735. {
  736. unsigned long now = jiffies;
  737. atomic_long_set(&bucket->used_time, (long)now);
  738. bucket->migrated_time = now;
  739. }
  740. static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
  741. {
  742. atomic_long_set(&bucket->used_time, (long)jiffies);
  743. }
  744. static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
  745. {
  746. unsigned long used_time = nh_res_bucket_used_time(bucket);
  747. return jiffies_delta_to_clock_t(jiffies - used_time);
  748. }
  749. static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
  750. struct nh_res_bucket *bucket, u16 bucket_index,
  751. int event, u32 portid, u32 seq,
  752. unsigned int nlflags,
  753. struct netlink_ext_ack *extack)
  754. {
  755. struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
  756. struct nlmsghdr *nlh;
  757. struct nlattr *nest;
  758. struct nhmsg *nhm;
  759. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
  760. if (!nlh)
  761. return -EMSGSIZE;
  762. nhm = nlmsg_data(nlh);
  763. nhm->nh_family = AF_UNSPEC;
  764. nhm->nh_flags = bucket->nh_flags;
  765. nhm->nh_protocol = nh->protocol;
  766. nhm->nh_scope = 0;
  767. nhm->resvd = 0;
  768. if (nla_put_u32(skb, NHA_ID, nh->id))
  769. goto nla_put_failure;
  770. nest = nla_nest_start(skb, NHA_RES_BUCKET);
  771. if (!nest)
  772. goto nla_put_failure;
  773. if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
  774. nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
  775. nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
  776. nh_res_bucket_idle_time(bucket),
  777. NHA_RES_BUCKET_PAD))
  778. goto nla_put_failure_nest;
  779. nla_nest_end(skb, nest);
  780. nlmsg_end(skb, nlh);
  781. return 0;
  782. nla_put_failure_nest:
  783. nla_nest_cancel(skb, nest);
  784. nla_put_failure:
  785. nlmsg_cancel(skb, nlh);
  786. return -EMSGSIZE;
  787. }
  788. static void nexthop_bucket_notify(struct nh_res_table *res_table,
  789. u16 bucket_index)
  790. {
  791. struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
  792. struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
  793. struct nexthop *nh = nhge->nh_parent;
  794. struct sk_buff *skb;
  795. int err = -ENOBUFS;
  796. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  797. if (!skb)
  798. goto errout;
  799. err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
  800. RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE,
  801. NULL);
  802. if (err < 0) {
  803. kfree_skb(skb);
  804. goto errout;
  805. }
  806. rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
  807. return;
  808. errout:
  809. if (err < 0)
  810. rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
  811. }
  812. static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
  813. bool *is_fdb, struct netlink_ext_ack *extack)
  814. {
  815. if (nh->is_group) {
  816. struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
  817. /* Nesting groups within groups is not supported. */
  818. if (nhg->hash_threshold) {
  819. NL_SET_ERR_MSG(extack,
  820. "Hash-threshold group can not be a nexthop within a group");
  821. return false;
  822. }
  823. if (nhg->resilient) {
  824. NL_SET_ERR_MSG(extack,
  825. "Resilient group can not be a nexthop within a group");
  826. return false;
  827. }
  828. *is_fdb = nhg->fdb_nh;
  829. } else {
  830. struct nh_info *nhi = rtnl_dereference(nh->nh_info);
  831. if (nhi->reject_nh && npaths > 1) {
  832. NL_SET_ERR_MSG(extack,
  833. "Blackhole nexthop can not be used in a group with more than 1 path");
  834. return false;
  835. }
  836. *is_fdb = nhi->fdb_nh;
  837. }
  838. return true;
  839. }
  840. static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
  841. struct netlink_ext_ack *extack)
  842. {
  843. struct nh_info *nhi;
  844. nhi = rtnl_dereference(nh->nh_info);
  845. if (!nhi->fdb_nh) {
  846. NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
  847. return -EINVAL;
  848. }
  849. if (*nh_family == AF_UNSPEC) {
  850. *nh_family = nhi->family;
  851. } else if (*nh_family != nhi->family) {
  852. NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
  853. return -EINVAL;
  854. }
  855. return 0;
  856. }
  857. static int nh_check_attr_group(struct net *net,
  858. struct nlattr *tb[], size_t tb_size,
  859. u16 nh_grp_type, struct netlink_ext_ack *extack)
  860. {
  861. unsigned int len = nla_len(tb[NHA_GROUP]);
  862. u8 nh_family = AF_UNSPEC;
  863. struct nexthop_grp *nhg;
  864. unsigned int i, j;
  865. u8 nhg_fdb = 0;
  866. if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
  867. NL_SET_ERR_MSG(extack,
  868. "Invalid length for nexthop group attribute");
  869. return -EINVAL;
  870. }
  871. /* convert len to number of nexthop ids */
  872. len /= sizeof(*nhg);
  873. nhg = nla_data(tb[NHA_GROUP]);
  874. for (i = 0; i < len; ++i) {
  875. if (nhg[i].resvd1 || nhg[i].resvd2) {
  876. NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
  877. return -EINVAL;
  878. }
  879. if (nhg[i].weight > 254) {
  880. NL_SET_ERR_MSG(extack, "Invalid value for weight");
  881. return -EINVAL;
  882. }
  883. for (j = i + 1; j < len; ++j) {
  884. if (nhg[i].id == nhg[j].id) {
  885. NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
  886. return -EINVAL;
  887. }
  888. }
  889. }
  890. if (tb[NHA_FDB])
  891. nhg_fdb = 1;
  892. nhg = nla_data(tb[NHA_GROUP]);
  893. for (i = 0; i < len; ++i) {
  894. struct nexthop *nh;
  895. bool is_fdb_nh;
  896. nh = nexthop_find_by_id(net, nhg[i].id);
  897. if (!nh) {
  898. NL_SET_ERR_MSG(extack, "Invalid nexthop id");
  899. return -EINVAL;
  900. }
  901. if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
  902. return -EINVAL;
  903. if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
  904. return -EINVAL;
  905. if (!nhg_fdb && is_fdb_nh) {
  906. NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
  907. return -EINVAL;
  908. }
  909. }
  910. for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
  911. if (!tb[i])
  912. continue;
  913. switch (i) {
  914. case NHA_FDB:
  915. continue;
  916. case NHA_RES_GROUP:
  917. if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
  918. continue;
  919. break;
  920. }
  921. NL_SET_ERR_MSG(extack,
  922. "No other attributes can be set in nexthop groups");
  923. return -EINVAL;
  924. }
  925. return 0;
  926. }
  927. static bool ipv6_good_nh(const struct fib6_nh *nh)
  928. {
  929. int state = NUD_REACHABLE;
  930. struct neighbour *n;
  931. rcu_read_lock();
  932. n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
  933. if (n)
  934. state = READ_ONCE(n->nud_state);
  935. rcu_read_unlock();
  936. return !!(state & NUD_VALID);
  937. }
  938. static bool ipv4_good_nh(const struct fib_nh *nh)
  939. {
  940. int state = NUD_REACHABLE;
  941. struct neighbour *n;
  942. rcu_read_lock();
  943. n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
  944. (__force u32)nh->fib_nh_gw4);
  945. if (n)
  946. state = READ_ONCE(n->nud_state);
  947. rcu_read_unlock();
  948. return !!(state & NUD_VALID);
  949. }
  950. static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
  951. {
  952. struct nexthop *rc = NULL;
  953. int i;
  954. for (i = 0; i < nhg->num_nh; ++i) {
  955. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  956. struct nh_info *nhi;
  957. if (hash > atomic_read(&nhge->hthr.upper_bound))
  958. continue;
  959. nhi = rcu_dereference(nhge->nh->nh_info);
  960. if (nhi->fdb_nh)
  961. return nhge->nh;
  962. /* nexthops always check if it is good and does
  963. * not rely on a sysctl for this behavior
  964. */
  965. switch (nhi->family) {
  966. case AF_INET:
  967. if (ipv4_good_nh(&nhi->fib_nh))
  968. return nhge->nh;
  969. break;
  970. case AF_INET6:
  971. if (ipv6_good_nh(&nhi->fib6_nh))
  972. return nhge->nh;
  973. break;
  974. }
  975. if (!rc)
  976. rc = nhge->nh;
  977. }
  978. return rc;
  979. }
  980. static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
  981. {
  982. struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
  983. u16 bucket_index = hash % res_table->num_nh_buckets;
  984. struct nh_res_bucket *bucket;
  985. struct nh_grp_entry *nhge;
  986. /* nexthop_select_path() is expected to return a non-NULL value, so
  987. * skip protocol validation and just hand out whatever there is.
  988. */
  989. bucket = &res_table->nh_buckets[bucket_index];
  990. nh_res_bucket_set_busy(bucket);
  991. nhge = rcu_dereference(bucket->nh_entry);
  992. return nhge->nh;
  993. }
  994. struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
  995. {
  996. struct nh_group *nhg;
  997. if (!nh->is_group)
  998. return nh;
  999. nhg = rcu_dereference(nh->nh_grp);
  1000. if (nhg->hash_threshold)
  1001. return nexthop_select_path_hthr(nhg, hash);
  1002. else if (nhg->resilient)
  1003. return nexthop_select_path_res(nhg, hash);
  1004. /* Unreachable. */
  1005. return NULL;
  1006. }
  1007. EXPORT_SYMBOL_GPL(nexthop_select_path);
  1008. int nexthop_for_each_fib6_nh(struct nexthop *nh,
  1009. int (*cb)(struct fib6_nh *nh, void *arg),
  1010. void *arg)
  1011. {
  1012. struct nh_info *nhi;
  1013. int err;
  1014. if (nh->is_group) {
  1015. struct nh_group *nhg;
  1016. int i;
  1017. nhg = rcu_dereference_rtnl(nh->nh_grp);
  1018. for (i = 0; i < nhg->num_nh; i++) {
  1019. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  1020. nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
  1021. err = cb(&nhi->fib6_nh, arg);
  1022. if (err)
  1023. return err;
  1024. }
  1025. } else {
  1026. nhi = rcu_dereference_rtnl(nh->nh_info);
  1027. err = cb(&nhi->fib6_nh, arg);
  1028. if (err)
  1029. return err;
  1030. }
  1031. return 0;
  1032. }
  1033. EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
  1034. static int check_src_addr(const struct in6_addr *saddr,
  1035. struct netlink_ext_ack *extack)
  1036. {
  1037. if (!ipv6_addr_any(saddr)) {
  1038. NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
  1039. return -EINVAL;
  1040. }
  1041. return 0;
  1042. }
  1043. int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
  1044. struct netlink_ext_ack *extack)
  1045. {
  1046. struct nh_info *nhi;
  1047. bool is_fdb_nh;
  1048. /* fib6_src is unique to a fib6_info and limits the ability to cache
  1049. * routes in fib6_nh within a nexthop that is potentially shared
  1050. * across multiple fib entries. If the config wants to use source
  1051. * routing it can not use nexthop objects. mlxsw also does not allow
  1052. * fib6_src on routes.
  1053. */
  1054. if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
  1055. return -EINVAL;
  1056. if (nh->is_group) {
  1057. struct nh_group *nhg;
  1058. nhg = rtnl_dereference(nh->nh_grp);
  1059. if (nhg->has_v4)
  1060. goto no_v4_nh;
  1061. is_fdb_nh = nhg->fdb_nh;
  1062. } else {
  1063. nhi = rtnl_dereference(nh->nh_info);
  1064. if (nhi->family == AF_INET)
  1065. goto no_v4_nh;
  1066. is_fdb_nh = nhi->fdb_nh;
  1067. }
  1068. if (is_fdb_nh) {
  1069. NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
  1070. return -EINVAL;
  1071. }
  1072. return 0;
  1073. no_v4_nh:
  1074. NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
  1075. return -EINVAL;
  1076. }
  1077. EXPORT_SYMBOL_GPL(fib6_check_nexthop);
  1078. /* if existing nexthop has ipv6 routes linked to it, need
  1079. * to verify this new spec works with ipv6
  1080. */
  1081. static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
  1082. struct netlink_ext_ack *extack)
  1083. {
  1084. struct fib6_info *f6i;
  1085. if (list_empty(&old->f6i_list))
  1086. return 0;
  1087. list_for_each_entry(f6i, &old->f6i_list, nh_list) {
  1088. if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
  1089. return -EINVAL;
  1090. }
  1091. return fib6_check_nexthop(new, NULL, extack);
  1092. }
  1093. static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
  1094. struct netlink_ext_ack *extack)
  1095. {
  1096. if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
  1097. NL_SET_ERR_MSG(extack,
  1098. "Route with host scope can not have a gateway");
  1099. return -EINVAL;
  1100. }
  1101. if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
  1102. NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
  1103. return -EINVAL;
  1104. }
  1105. return 0;
  1106. }
  1107. /* Invoked by fib add code to verify nexthop by id is ok with
  1108. * config for prefix; parts of fib_check_nh not done when nexthop
  1109. * object is used.
  1110. */
  1111. int fib_check_nexthop(struct nexthop *nh, u8 scope,
  1112. struct netlink_ext_ack *extack)
  1113. {
  1114. struct nh_info *nhi;
  1115. int err = 0;
  1116. if (nh->is_group) {
  1117. struct nh_group *nhg;
  1118. nhg = rtnl_dereference(nh->nh_grp);
  1119. if (nhg->fdb_nh) {
  1120. NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
  1121. err = -EINVAL;
  1122. goto out;
  1123. }
  1124. if (scope == RT_SCOPE_HOST) {
  1125. NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
  1126. err = -EINVAL;
  1127. goto out;
  1128. }
  1129. /* all nexthops in a group have the same scope */
  1130. nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
  1131. err = nexthop_check_scope(nhi, scope, extack);
  1132. } else {
  1133. nhi = rtnl_dereference(nh->nh_info);
  1134. if (nhi->fdb_nh) {
  1135. NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
  1136. err = -EINVAL;
  1137. goto out;
  1138. }
  1139. err = nexthop_check_scope(nhi, scope, extack);
  1140. }
  1141. out:
  1142. return err;
  1143. }
  1144. static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
  1145. struct netlink_ext_ack *extack)
  1146. {
  1147. struct fib_info *fi;
  1148. list_for_each_entry(fi, &old->fi_list, nh_list) {
  1149. int err;
  1150. err = fib_check_nexthop(new, fi->fib_scope, extack);
  1151. if (err)
  1152. return err;
  1153. }
  1154. return 0;
  1155. }
  1156. static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
  1157. {
  1158. return nhge->res.count_buckets == nhge->res.wants_buckets;
  1159. }
  1160. static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
  1161. {
  1162. return nhge->res.count_buckets > nhge->res.wants_buckets;
  1163. }
  1164. static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
  1165. {
  1166. return nhge->res.count_buckets < nhge->res.wants_buckets;
  1167. }
  1168. static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
  1169. {
  1170. return list_empty(&res_table->uw_nh_entries);
  1171. }
  1172. static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
  1173. {
  1174. struct nh_grp_entry *nhge;
  1175. if (bucket->occupied) {
  1176. nhge = nh_res_dereference(bucket->nh_entry);
  1177. nhge->res.count_buckets--;
  1178. bucket->occupied = false;
  1179. }
  1180. }
  1181. static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
  1182. struct nh_grp_entry *nhge)
  1183. {
  1184. nh_res_bucket_unset_nh(bucket);
  1185. bucket->occupied = true;
  1186. rcu_assign_pointer(bucket->nh_entry, nhge);
  1187. nhge->res.count_buckets++;
  1188. }
  1189. static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
  1190. struct nh_res_bucket *bucket,
  1191. unsigned long *deadline, bool *force)
  1192. {
  1193. unsigned long now = jiffies;
  1194. struct nh_grp_entry *nhge;
  1195. unsigned long idle_point;
  1196. if (!bucket->occupied) {
  1197. /* The bucket is not occupied, its NHGE pointer is either
  1198. * NULL or obsolete. We _have to_ migrate: set force.
  1199. */
  1200. *force = true;
  1201. return true;
  1202. }
  1203. nhge = nh_res_dereference(bucket->nh_entry);
  1204. /* If the bucket is populated by an underweight or balanced
  1205. * nexthop, do not migrate.
  1206. */
  1207. if (!nh_res_nhge_is_ow(nhge))
  1208. return false;
  1209. /* At this point we know that the bucket is populated with an
  1210. * overweight nexthop. It needs to be migrated to a new nexthop if
  1211. * the idle timer of unbalanced timer expired.
  1212. */
  1213. idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
  1214. if (time_after_eq(now, idle_point)) {
  1215. /* The bucket is idle. We _can_ migrate: unset force. */
  1216. *force = false;
  1217. return true;
  1218. }
  1219. /* Unbalanced timer of 0 means "never force". */
  1220. if (res_table->unbalanced_timer) {
  1221. unsigned long unb_point;
  1222. unb_point = nh_res_table_unb_point(res_table);
  1223. if (time_after(now, unb_point)) {
  1224. /* The bucket is not idle, but the unbalanced timer
  1225. * expired. We _can_ migrate, but set force anyway,
  1226. * so that drivers know to ignore activity reports
  1227. * from the HW.
  1228. */
  1229. *force = true;
  1230. return true;
  1231. }
  1232. nh_res_time_set_deadline(unb_point, deadline);
  1233. }
  1234. nh_res_time_set_deadline(idle_point, deadline);
  1235. return false;
  1236. }
  1237. static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
  1238. u16 bucket_index, bool notify,
  1239. bool notify_nl, bool force)
  1240. {
  1241. struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
  1242. struct nh_grp_entry *new_nhge;
  1243. struct netlink_ext_ack extack;
  1244. int err;
  1245. new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
  1246. struct nh_grp_entry,
  1247. res.uw_nh_entry);
  1248. if (WARN_ON_ONCE(!new_nhge))
  1249. /* If this function is called, "bucket" is either not
  1250. * occupied, or it belongs to a next hop that is
  1251. * overweight. In either case, there ought to be a
  1252. * corresponding underweight next hop.
  1253. */
  1254. return false;
  1255. if (notify) {
  1256. struct nh_grp_entry *old_nhge;
  1257. old_nhge = nh_res_dereference(bucket->nh_entry);
  1258. err = call_nexthop_res_bucket_notifiers(res_table->net,
  1259. res_table->nhg_id,
  1260. bucket_index, force,
  1261. old_nhge->nh,
  1262. new_nhge->nh, &extack);
  1263. if (err) {
  1264. pr_err_ratelimited("%s\n", extack._msg);
  1265. if (!force)
  1266. return false;
  1267. /* It is not possible to veto a forced replacement, so
  1268. * just clear the hardware flags from the nexthop
  1269. * bucket to indicate to user space that this bucket is
  1270. * not correctly populated in hardware.
  1271. */
  1272. bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
  1273. }
  1274. }
  1275. nh_res_bucket_set_nh(bucket, new_nhge);
  1276. nh_res_bucket_set_idle(res_table, bucket);
  1277. if (notify_nl)
  1278. nexthop_bucket_notify(res_table, bucket_index);
  1279. if (nh_res_nhge_is_balanced(new_nhge))
  1280. list_del(&new_nhge->res.uw_nh_entry);
  1281. return true;
  1282. }
  1283. #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
  1284. static void nh_res_table_upkeep(struct nh_res_table *res_table,
  1285. bool notify, bool notify_nl)
  1286. {
  1287. unsigned long now = jiffies;
  1288. unsigned long deadline;
  1289. u16 i;
  1290. /* Deadline is the next time that upkeep should be run. It is the
  1291. * earliest time at which one of the buckets might be migrated.
  1292. * Start at the most pessimistic estimate: either unbalanced_timer
  1293. * from now, or if there is none, idle_timer from now. For each
  1294. * encountered time point, call nh_res_time_set_deadline() to
  1295. * refine the estimate.
  1296. */
  1297. if (res_table->unbalanced_timer)
  1298. deadline = now + res_table->unbalanced_timer;
  1299. else
  1300. deadline = now + res_table->idle_timer;
  1301. for (i = 0; i < res_table->num_nh_buckets; i++) {
  1302. struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
  1303. bool force;
  1304. if (nh_res_bucket_should_migrate(res_table, bucket,
  1305. &deadline, &force)) {
  1306. if (!nh_res_bucket_migrate(res_table, i, notify,
  1307. notify_nl, force)) {
  1308. unsigned long idle_point;
  1309. /* A driver can override the migration
  1310. * decision if the HW reports that the
  1311. * bucket is actually not idle. Therefore
  1312. * remark the bucket as busy again and
  1313. * update the deadline.
  1314. */
  1315. nh_res_bucket_set_busy(bucket);
  1316. idle_point = nh_res_bucket_idle_point(res_table,
  1317. bucket,
  1318. now);
  1319. nh_res_time_set_deadline(idle_point, &deadline);
  1320. }
  1321. }
  1322. }
  1323. /* If the group is still unbalanced, schedule the next upkeep to
  1324. * either the deadline computed above, or the minimum deadline,
  1325. * whichever comes later.
  1326. */
  1327. if (!nh_res_table_is_balanced(res_table)) {
  1328. unsigned long now = jiffies;
  1329. unsigned long min_deadline;
  1330. min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
  1331. if (time_before(deadline, min_deadline))
  1332. deadline = min_deadline;
  1333. queue_delayed_work(system_power_efficient_wq,
  1334. &res_table->upkeep_dw, deadline - now);
  1335. }
  1336. }
  1337. static void nh_res_table_upkeep_dw(struct work_struct *work)
  1338. {
  1339. struct delayed_work *dw = to_delayed_work(work);
  1340. struct nh_res_table *res_table;
  1341. res_table = container_of(dw, struct nh_res_table, upkeep_dw);
  1342. nh_res_table_upkeep(res_table, true, true);
  1343. }
  1344. static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
  1345. {
  1346. cancel_delayed_work_sync(&res_table->upkeep_dw);
  1347. }
  1348. static void nh_res_group_rebalance(struct nh_group *nhg,
  1349. struct nh_res_table *res_table)
  1350. {
  1351. int prev_upper_bound = 0;
  1352. int total = 0;
  1353. int w = 0;
  1354. int i;
  1355. INIT_LIST_HEAD(&res_table->uw_nh_entries);
  1356. for (i = 0; i < nhg->num_nh; ++i)
  1357. total += nhg->nh_entries[i].weight;
  1358. for (i = 0; i < nhg->num_nh; ++i) {
  1359. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  1360. int upper_bound;
  1361. w += nhge->weight;
  1362. upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
  1363. total);
  1364. nhge->res.wants_buckets = upper_bound - prev_upper_bound;
  1365. prev_upper_bound = upper_bound;
  1366. if (nh_res_nhge_is_uw(nhge)) {
  1367. if (list_empty(&res_table->uw_nh_entries))
  1368. res_table->unbalanced_since = jiffies;
  1369. list_add(&nhge->res.uw_nh_entry,
  1370. &res_table->uw_nh_entries);
  1371. }
  1372. }
  1373. }
  1374. /* Migrate buckets in res_table so that they reference NHGE's from NHG with
  1375. * the right NH ID. Set those buckets that do not have a corresponding NHGE
  1376. * entry in NHG as not occupied.
  1377. */
  1378. static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
  1379. struct nh_group *nhg)
  1380. {
  1381. u16 i;
  1382. for (i = 0; i < res_table->num_nh_buckets; i++) {
  1383. struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
  1384. u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
  1385. bool found = false;
  1386. int j;
  1387. for (j = 0; j < nhg->num_nh; j++) {
  1388. struct nh_grp_entry *nhge = &nhg->nh_entries[j];
  1389. if (nhge->nh->id == id) {
  1390. nh_res_bucket_set_nh(bucket, nhge);
  1391. found = true;
  1392. break;
  1393. }
  1394. }
  1395. if (!found)
  1396. nh_res_bucket_unset_nh(bucket);
  1397. }
  1398. }
  1399. static void replace_nexthop_grp_res(struct nh_group *oldg,
  1400. struct nh_group *newg)
  1401. {
  1402. /* For NH group replacement, the new NHG might only have a stub
  1403. * hash table with 0 buckets, because the number of buckets was not
  1404. * specified. For NH removal, oldg and newg both reference the same
  1405. * res_table. So in any case, in the following, we want to work
  1406. * with oldg->res_table.
  1407. */
  1408. struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
  1409. unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
  1410. bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);
  1411. nh_res_table_cancel_upkeep(old_res_table);
  1412. nh_res_table_migrate_buckets(old_res_table, newg);
  1413. nh_res_group_rebalance(newg, old_res_table);
  1414. if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
  1415. old_res_table->unbalanced_since = prev_unbalanced_since;
  1416. nh_res_table_upkeep(old_res_table, true, false);
  1417. }
  1418. static void nh_hthr_group_rebalance(struct nh_group *nhg)
  1419. {
  1420. int total = 0;
  1421. int w = 0;
  1422. int i;
  1423. for (i = 0; i < nhg->num_nh; ++i)
  1424. total += nhg->nh_entries[i].weight;
  1425. for (i = 0; i < nhg->num_nh; ++i) {
  1426. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  1427. int upper_bound;
  1428. w += nhge->weight;
  1429. upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
  1430. atomic_set(&nhge->hthr.upper_bound, upper_bound);
  1431. }
  1432. }
  1433. static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
  1434. struct nl_info *nlinfo)
  1435. {
  1436. struct nh_grp_entry *nhges, *new_nhges;
  1437. struct nexthop *nhp = nhge->nh_parent;
  1438. struct netlink_ext_ack extack;
  1439. struct nexthop *nh = nhge->nh;
  1440. struct nh_group *nhg, *newg;
  1441. int i, j, err;
  1442. WARN_ON(!nh);
  1443. nhg = rtnl_dereference(nhp->nh_grp);
  1444. newg = nhg->spare;
  1445. /* last entry, keep it visible and remove the parent */
  1446. if (nhg->num_nh == 1) {
  1447. remove_nexthop(net, nhp, nlinfo);
  1448. return;
  1449. }
  1450. newg->has_v4 = false;
  1451. newg->is_multipath = nhg->is_multipath;
  1452. newg->hash_threshold = nhg->hash_threshold;
  1453. newg->resilient = nhg->resilient;
  1454. newg->fdb_nh = nhg->fdb_nh;
  1455. newg->num_nh = nhg->num_nh;
  1456. /* copy old entries to new except the one getting removed */
  1457. nhges = nhg->nh_entries;
  1458. new_nhges = newg->nh_entries;
  1459. for (i = 0, j = 0; i < nhg->num_nh; ++i) {
  1460. struct nh_info *nhi;
  1461. /* current nexthop getting removed */
  1462. if (nhg->nh_entries[i].nh == nh) {
  1463. newg->num_nh--;
  1464. continue;
  1465. }
  1466. nhi = rtnl_dereference(nhges[i].nh->nh_info);
  1467. if (nhi->family == AF_INET)
  1468. newg->has_v4 = true;
  1469. list_del(&nhges[i].nh_list);
  1470. new_nhges[j].nh_parent = nhges[i].nh_parent;
  1471. new_nhges[j].nh = nhges[i].nh;
  1472. new_nhges[j].weight = nhges[i].weight;
  1473. list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
  1474. j++;
  1475. }
  1476. if (newg->hash_threshold)
  1477. nh_hthr_group_rebalance(newg);
  1478. else if (newg->resilient)
  1479. replace_nexthop_grp_res(nhg, newg);
  1480. rcu_assign_pointer(nhp->nh_grp, newg);
  1481. list_del(&nhge->nh_list);
  1482. nexthop_put(nhge->nh);
  1483. /* Removal of a NH from a resilient group is notified through
  1484. * bucket notifications.
  1485. */
  1486. if (newg->hash_threshold) {
  1487. err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
  1488. &extack);
  1489. if (err)
  1490. pr_err("%s\n", extack._msg);
  1491. }
  1492. if (nlinfo)
  1493. nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
  1494. }
  1495. static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
  1496. struct nl_info *nlinfo)
  1497. {
  1498. struct nh_grp_entry *nhge, *tmp;
  1499. list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
  1500. remove_nh_grp_entry(net, nhge, nlinfo);
  1501. /* make sure all see the newly published array before releasing rtnl */
  1502. synchronize_net();
  1503. }
  1504. static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
  1505. {
  1506. struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
  1507. struct nh_res_table *res_table;
  1508. int i, num_nh = nhg->num_nh;
  1509. for (i = 0; i < num_nh; ++i) {
  1510. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  1511. if (WARN_ON(!nhge->nh))
  1512. continue;
  1513. list_del_init(&nhge->nh_list);
  1514. }
  1515. if (nhg->resilient) {
  1516. res_table = rtnl_dereference(nhg->res_table);
  1517. nh_res_table_cancel_upkeep(res_table);
  1518. }
  1519. }
  1520. /* not called for nexthop replace */
  1521. static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
  1522. {
  1523. struct fib6_info *f6i, *tmp;
  1524. bool do_flush = false;
  1525. struct fib_info *fi;
  1526. list_for_each_entry(fi, &nh->fi_list, nh_list) {
  1527. fi->fib_flags |= RTNH_F_DEAD;
  1528. do_flush = true;
  1529. }
  1530. if (do_flush)
  1531. fib_flush(net);
  1532. /* ip6_del_rt removes the entry from this list hence the _safe */
  1533. list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
  1534. /* __ip6_del_rt does a release, so do a hold here */
  1535. fib6_info_hold(f6i);
  1536. ipv6_stub->ip6_del_rt(net, f6i,
  1537. !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
  1538. }
  1539. }
  1540. static void __remove_nexthop(struct net *net, struct nexthop *nh,
  1541. struct nl_info *nlinfo)
  1542. {
  1543. __remove_nexthop_fib(net, nh);
  1544. if (nh->is_group) {
  1545. remove_nexthop_group(nh, nlinfo);
  1546. } else {
  1547. struct nh_info *nhi;
  1548. nhi = rtnl_dereference(nh->nh_info);
  1549. if (nhi->fib_nhc.nhc_dev)
  1550. hlist_del(&nhi->dev_hash);
  1551. remove_nexthop_from_groups(net, nh, nlinfo);
  1552. }
  1553. }
  1554. static void remove_nexthop(struct net *net, struct nexthop *nh,
  1555. struct nl_info *nlinfo)
  1556. {
  1557. call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
  1558. /* remove from the tree */
  1559. rb_erase(&nh->rb_node, &net->nexthop.rb_root);
  1560. if (nlinfo)
  1561. nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
  1562. __remove_nexthop(net, nh, nlinfo);
  1563. nh_base_seq_inc(net);
  1564. nexthop_put(nh);
  1565. }
  1566. /* if any FIB entries reference this nexthop, any dst entries
  1567. * need to be regenerated
  1568. */
  1569. static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
  1570. struct nexthop *replaced_nh)
  1571. {
  1572. struct fib6_info *f6i;
  1573. struct nh_group *nhg;
  1574. int i;
  1575. if (!list_empty(&nh->fi_list))
  1576. rt_cache_flush(net);
  1577. list_for_each_entry(f6i, &nh->f6i_list, nh_list)
  1578. ipv6_stub->fib6_update_sernum(net, f6i);
  1579. /* if an IPv6 group was replaced, we have to release all old
  1580. * dsts to make sure all refcounts are released
  1581. */
  1582. if (!replaced_nh->is_group)
  1583. return;
  1584. nhg = rtnl_dereference(replaced_nh->nh_grp);
  1585. for (i = 0; i < nhg->num_nh; i++) {
  1586. struct nh_grp_entry *nhge = &nhg->nh_entries[i];
  1587. struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
  1588. if (nhi->family == AF_INET6)
  1589. ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
  1590. }
  1591. }
  1592. static int replace_nexthop_grp(struct net *net, struct nexthop *old,
  1593. struct nexthop *new, const struct nh_config *cfg,
  1594. struct netlink_ext_ack *extack)
  1595. {
  1596. struct nh_res_table *tmp_table = NULL;
  1597. struct nh_res_table *new_res_table;
  1598. struct nh_res_table *old_res_table;
  1599. struct nh_group *oldg, *newg;
  1600. int i, err;
  1601. if (!new->is_group) {
  1602. NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
  1603. return -EINVAL;
  1604. }
  1605. oldg = rtnl_dereference(old->nh_grp);
  1606. newg = rtnl_dereference(new->nh_grp);
  1607. if (newg->hash_threshold != oldg->hash_threshold) {
  1608. NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
  1609. return -EINVAL;
  1610. }
  1611. if (newg->hash_threshold) {
  1612. err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
  1613. extack);
  1614. if (err)
  1615. return err;
  1616. } else if (newg->resilient) {
  1617. new_res_table = rtnl_dereference(newg->res_table);
  1618. old_res_table = rtnl_dereference(oldg->res_table);
  1619. /* Accept if num_nh_buckets was not given, but if it was
  1620. * given, demand that the value be correct.
  1621. */
  1622. if (cfg->nh_grp_res_has_num_buckets &&
  1623. cfg->nh_grp_res_num_buckets !=
  1624. old_res_table->num_nh_buckets) {
  1625. NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
  1626. return -EINVAL;
  1627. }
  1628. /* Emit a pre-replace notification so that listeners could veto
  1629. * a potentially unsupported configuration. Otherwise,
  1630. * individual bucket replacement notifications would need to be
  1631. * vetoed, which is something that should only happen if the
  1632. * bucket is currently active.
  1633. */
  1634. err = call_nexthop_res_table_notifiers(net, new, extack);
  1635. if (err)
  1636. return err;
  1637. if (cfg->nh_grp_res_has_idle_timer)
  1638. old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
  1639. if (cfg->nh_grp_res_has_unbalanced_timer)
  1640. old_res_table->unbalanced_timer =
  1641. cfg->nh_grp_res_unbalanced_timer;
  1642. replace_nexthop_grp_res(oldg, newg);
  1643. tmp_table = new_res_table;
  1644. rcu_assign_pointer(newg->res_table, old_res_table);
  1645. rcu_assign_pointer(newg->spare->res_table, old_res_table);
  1646. }
  1647. /* update parents - used by nexthop code for cleanup */
  1648. for (i = 0; i < newg->num_nh; i++)
  1649. newg->nh_entries[i].nh_parent = old;
  1650. rcu_assign_pointer(old->nh_grp, newg);
  1651. /* Make sure concurrent readers are not using 'oldg' anymore. */
  1652. synchronize_net();
  1653. if (newg->resilient) {
  1654. rcu_assign_pointer(oldg->res_table, tmp_table);
  1655. rcu_assign_pointer(oldg->spare->res_table, tmp_table);
  1656. }
  1657. for (i = 0; i < oldg->num_nh; i++)
  1658. oldg->nh_entries[i].nh_parent = new;
  1659. rcu_assign_pointer(new->nh_grp, oldg);
  1660. return 0;
  1661. }
  1662. static void nh_group_v4_update(struct nh_group *nhg)
  1663. {
  1664. struct nh_grp_entry *nhges;
  1665. bool has_v4 = false;
  1666. int i;
  1667. nhges = nhg->nh_entries;
  1668. for (i = 0; i < nhg->num_nh; i++) {
  1669. struct nh_info *nhi;
  1670. nhi = rtnl_dereference(nhges[i].nh->nh_info);
  1671. if (nhi->family == AF_INET)
  1672. has_v4 = true;
  1673. }
  1674. nhg->has_v4 = has_v4;
  1675. }
  1676. static int replace_nexthop_single_notify_res(struct net *net,
  1677. struct nh_res_table *res_table,
  1678. struct nexthop *old,
  1679. struct nh_info *oldi,
  1680. struct nh_info *newi,
  1681. struct netlink_ext_ack *extack)
  1682. {
  1683. u32 nhg_id = res_table->nhg_id;
  1684. int err;
  1685. u16 i;
  1686. for (i = 0; i < res_table->num_nh_buckets; i++) {
  1687. struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
  1688. struct nh_grp_entry *nhge;
  1689. nhge = rtnl_dereference(bucket->nh_entry);
  1690. if (nhge->nh == old) {
  1691. err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
  1692. i, true,
  1693. oldi, newi,
  1694. extack);
  1695. if (err)
  1696. goto err_notify;
  1697. }
  1698. }
  1699. return 0;
  1700. err_notify:
  1701. while (i-- > 0) {
  1702. struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
  1703. struct nh_grp_entry *nhge;
  1704. nhge = rtnl_dereference(bucket->nh_entry);
  1705. if (nhge->nh == old)
  1706. __call_nexthop_res_bucket_notifiers(net, nhg_id, i,
  1707. true, newi, oldi,
  1708. extack);
  1709. }
  1710. return err;
  1711. }
  1712. static int replace_nexthop_single_notify(struct net *net,
  1713. struct nexthop *group_nh,
  1714. struct nexthop *old,
  1715. struct nh_info *oldi,
  1716. struct nh_info *newi,
  1717. struct netlink_ext_ack *extack)
  1718. {
  1719. struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
  1720. struct nh_res_table *res_table;
  1721. if (nhg->hash_threshold) {
  1722. return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
  1723. group_nh, extack);
  1724. } else if (nhg->resilient) {
  1725. res_table = rtnl_dereference(nhg->res_table);
  1726. return replace_nexthop_single_notify_res(net, res_table,
  1727. old, oldi, newi,
  1728. extack);
  1729. }
  1730. return -EINVAL;
  1731. }
  1732. static int replace_nexthop_single(struct net *net, struct nexthop *old,
  1733. struct nexthop *new,
  1734. struct netlink_ext_ack *extack)
  1735. {
  1736. u8 old_protocol, old_nh_flags;
  1737. struct nh_info *oldi, *newi;
  1738. struct nh_grp_entry *nhge;
  1739. int err;
  1740. if (new->is_group) {
  1741. NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
  1742. return -EINVAL;
  1743. }
  1744. err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
  1745. if (err)
  1746. return err;
  1747. /* Hardware flags were set on 'old' as 'new' is not in the red-black
  1748. * tree. Therefore, inherit the flags from 'old' to 'new'.
  1749. */
  1750. new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
  1751. oldi = rtnl_dereference(old->nh_info);
  1752. newi = rtnl_dereference(new->nh_info);
  1753. newi->nh_parent = old;
  1754. oldi->nh_parent = new;
  1755. old_protocol = old->protocol;
  1756. old_nh_flags = old->nh_flags;
  1757. old->protocol = new->protocol;
  1758. old->nh_flags = new->nh_flags;
  1759. rcu_assign_pointer(old->nh_info, newi);
  1760. rcu_assign_pointer(new->nh_info, oldi);
  1761. /* Send a replace notification for all the groups using the nexthop. */
  1762. list_for_each_entry(nhge, &old->grp_list, nh_list) {
  1763. struct nexthop *nhp = nhge->nh_parent;
  1764. err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
  1765. extack);
  1766. if (err)
  1767. goto err_notify;
  1768. }
  1769. /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
  1770. * update IPv4 indication in all the groups using the nexthop.
  1771. */
  1772. if (oldi->family == AF_INET && newi->family == AF_INET6) {
  1773. list_for_each_entry(nhge, &old->grp_list, nh_list) {
  1774. struct nexthop *nhp = nhge->nh_parent;
  1775. struct nh_group *nhg;
  1776. nhg = rtnl_dereference(nhp->nh_grp);
  1777. nh_group_v4_update(nhg);
  1778. }
  1779. }
  1780. return 0;
  1781. err_notify:
  1782. rcu_assign_pointer(new->nh_info, newi);
  1783. rcu_assign_pointer(old->nh_info, oldi);
  1784. old->nh_flags = old_nh_flags;
  1785. old->protocol = old_protocol;
  1786. oldi->nh_parent = old;
  1787. newi->nh_parent = new;
  1788. list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
  1789. struct nexthop *nhp = nhge->nh_parent;
  1790. replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
  1791. }
  1792. call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
  1793. return err;
  1794. }
  1795. static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
  1796. struct nl_info *info)
  1797. {
  1798. struct fib6_info *f6i;
  1799. if (!list_empty(&nh->fi_list)) {
  1800. struct fib_info *fi;
  1801. /* expectation is a few fib_info per nexthop and then
  1802. * a lot of routes per fib_info. So mark the fib_info
  1803. * and then walk the fib tables once
  1804. */
  1805. list_for_each_entry(fi, &nh->fi_list, nh_list)
  1806. fi->nh_updated = true;
  1807. fib_info_notify_update(net, info);
  1808. list_for_each_entry(fi, &nh->fi_list, nh_list)
  1809. fi->nh_updated = false;
  1810. }
  1811. list_for_each_entry(f6i, &nh->f6i_list, nh_list)
  1812. ipv6_stub->fib6_rt_update(net, f6i, info);
  1813. }
  1814. /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
  1815. * linked to this nexthop and for all groups that the nexthop
  1816. * is a member of
  1817. */
  1818. static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
  1819. struct nl_info *info)
  1820. {
  1821. struct nh_grp_entry *nhge;
  1822. __nexthop_replace_notify(net, nh, info);
  1823. list_for_each_entry(nhge, &nh->grp_list, nh_list)
  1824. __nexthop_replace_notify(net, nhge->nh_parent, info);
  1825. }
  1826. static int replace_nexthop(struct net *net, struct nexthop *old,
  1827. struct nexthop *new, const struct nh_config *cfg,
  1828. struct netlink_ext_ack *extack)
  1829. {
  1830. bool new_is_reject = false;
  1831. struct nh_grp_entry *nhge;
  1832. int err;
  1833. /* check that existing FIB entries are ok with the
  1834. * new nexthop definition
  1835. */
  1836. err = fib_check_nh_list(old, new, extack);
  1837. if (err)
  1838. return err;
  1839. err = fib6_check_nh_list(old, new, extack);
  1840. if (err)
  1841. return err;
  1842. if (!new->is_group) {
  1843. struct nh_info *nhi = rtnl_dereference(new->nh_info);
  1844. new_is_reject = nhi->reject_nh;
  1845. }
  1846. list_for_each_entry(nhge, &old->grp_list, nh_list) {
  1847. /* if new nexthop is a blackhole, any groups using this
  1848. * nexthop cannot have more than 1 path
  1849. */
  1850. if (new_is_reject &&
  1851. nexthop_num_path(nhge->nh_parent) > 1) {
  1852. NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
  1853. return -EINVAL;
  1854. }
  1855. err = fib_check_nh_list(nhge->nh_parent, new, extack);
  1856. if (err)
  1857. return err;
  1858. err = fib6_check_nh_list(nhge->nh_parent, new, extack);
  1859. if (err)
  1860. return err;
  1861. }
  1862. if (old->is_group)
  1863. err = replace_nexthop_grp(net, old, new, cfg, extack);
  1864. else
  1865. err = replace_nexthop_single(net, old, new, extack);
  1866. if (!err) {
  1867. nh_rt_cache_flush(net, old, new);
  1868. __remove_nexthop(net, new, NULL);
  1869. nexthop_put(new);
  1870. }
  1871. return err;
  1872. }
  1873. /* called with rtnl_lock held */
  1874. static int insert_nexthop(struct net *net, struct nexthop *new_nh,
  1875. struct nh_config *cfg, struct netlink_ext_ack *extack)
  1876. {
  1877. struct rb_node **pp, *parent = NULL, *next;
  1878. struct rb_root *root = &net->nexthop.rb_root;
  1879. bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
  1880. bool create = !!(cfg->nlflags & NLM_F_CREATE);
  1881. u32 new_id = new_nh->id;
  1882. int replace_notify = 0;
  1883. int rc = -EEXIST;
  1884. pp = &root->rb_node;
  1885. while (1) {
  1886. struct nexthop *nh;
  1887. next = *pp;
  1888. if (!next)
  1889. break;
  1890. parent = next;
  1891. nh = rb_entry(parent, struct nexthop, rb_node);
  1892. if (new_id < nh->id) {
  1893. pp = &next->rb_left;
  1894. } else if (new_id > nh->id) {
  1895. pp = &next->rb_right;
  1896. } else if (replace) {
  1897. rc = replace_nexthop(net, nh, new_nh, cfg, extack);
  1898. if (!rc) {
  1899. new_nh = nh; /* send notification with old nh */
  1900. replace_notify = 1;
  1901. }
  1902. goto out;
  1903. } else {
  1904. /* id already exists and not a replace */
  1905. goto out;
  1906. }
  1907. }
  1908. if (replace && !create) {
  1909. NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
  1910. rc = -ENOENT;
  1911. goto out;
  1912. }
  1913. if (new_nh->is_group) {
  1914. struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
  1915. struct nh_res_table *res_table;
  1916. if (nhg->resilient) {
  1917. res_table = rtnl_dereference(nhg->res_table);
  1918. /* Not passing the number of buckets is OK when
  1919. * replacing, but not when creating a new group.
  1920. */
  1921. if (!cfg->nh_grp_res_has_num_buckets) {
  1922. NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
  1923. rc = -EINVAL;
  1924. goto out;
  1925. }
  1926. nh_res_group_rebalance(nhg, res_table);
  1927. /* Do not send bucket notifications, we do full
  1928. * notification below.
  1929. */
  1930. nh_res_table_upkeep(res_table, false, false);
  1931. }
  1932. }
  1933. rb_link_node_rcu(&new_nh->rb_node, parent, pp);
  1934. rb_insert_color(&new_nh->rb_node, root);
  1935. /* The initial insertion is a full notification for hash-threshold as
  1936. * well as resilient groups.
  1937. */
  1938. rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
  1939. if (rc)
  1940. rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
  1941. out:
  1942. if (!rc) {
  1943. nh_base_seq_inc(net);
  1944. nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
  1945. if (replace_notify &&
  1946. READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
  1947. nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
  1948. }
  1949. return rc;
  1950. }
  1951. /* rtnl */
  1952. /* remove all nexthops tied to a device being deleted */
  1953. static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
  1954. {
  1955. unsigned int hash = nh_dev_hashfn(dev->ifindex);
  1956. struct net *net = dev_net(dev);
  1957. struct hlist_head *head = &net->nexthop.devhash[hash];
  1958. struct hlist_node *n;
  1959. struct nh_info *nhi;
  1960. hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
  1961. if (nhi->fib_nhc.nhc_dev != dev)
  1962. continue;
  1963. if (nhi->reject_nh &&
  1964. (event == NETDEV_DOWN || event == NETDEV_CHANGE))
  1965. continue;
  1966. remove_nexthop(net, nhi->nh_parent, NULL);
  1967. }
  1968. }
  1969. /* rtnl; called when net namespace is deleted */
  1970. static void flush_all_nexthops(struct net *net)
  1971. {
  1972. struct rb_root *root = &net->nexthop.rb_root;
  1973. struct rb_node *node;
  1974. struct nexthop *nh;
  1975. while ((node = rb_first(root))) {
  1976. nh = rb_entry(node, struct nexthop, rb_node);
  1977. remove_nexthop(net, nh, NULL);
  1978. cond_resched();
  1979. }
  1980. }
  1981. static struct nexthop *nexthop_create_group(struct net *net,
  1982. struct nh_config *cfg)
  1983. {
  1984. struct nlattr *grps_attr = cfg->nh_grp;
  1985. struct nexthop_grp *entry = nla_data(grps_attr);
  1986. u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
  1987. struct nh_group *nhg;
  1988. struct nexthop *nh;
  1989. int err;
  1990. int i;
  1991. if (WARN_ON(!num_nh))
  1992. return ERR_PTR(-EINVAL);
  1993. nh = nexthop_alloc();
  1994. if (!nh)
  1995. return ERR_PTR(-ENOMEM);
  1996. nh->is_group = 1;
  1997. nhg = nexthop_grp_alloc(num_nh);
  1998. if (!nhg) {
  1999. kfree(nh);
  2000. return ERR_PTR(-ENOMEM);
  2001. }
  2002. /* spare group used for removals */
  2003. nhg->spare = nexthop_grp_alloc(num_nh);
  2004. if (!nhg->spare) {
  2005. kfree(nhg);
  2006. kfree(nh);
  2007. return ERR_PTR(-ENOMEM);
  2008. }
  2009. nhg->spare->spare = nhg;
  2010. for (i = 0; i < nhg->num_nh; ++i) {
  2011. struct nexthop *nhe;
  2012. struct nh_info *nhi;
  2013. nhe = nexthop_find_by_id(net, entry[i].id);
  2014. if (!nexthop_get(nhe)) {
  2015. err = -ENOENT;
  2016. goto out_no_nh;
  2017. }
  2018. nhi = rtnl_dereference(nhe->nh_info);
  2019. if (nhi->family == AF_INET)
  2020. nhg->has_v4 = true;
  2021. nhg->nh_entries[i].nh = nhe;
  2022. nhg->nh_entries[i].weight = entry[i].weight + 1;
  2023. list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
  2024. nhg->nh_entries[i].nh_parent = nh;
  2025. }
  2026. if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
  2027. nhg->hash_threshold = 1;
  2028. nhg->is_multipath = true;
  2029. } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
  2030. struct nh_res_table *res_table;
  2031. res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
  2032. if (!res_table) {
  2033. err = -ENOMEM;
  2034. goto out_no_nh;
  2035. }
  2036. rcu_assign_pointer(nhg->spare->res_table, res_table);
  2037. rcu_assign_pointer(nhg->res_table, res_table);
  2038. nhg->resilient = true;
  2039. nhg->is_multipath = true;
  2040. }
  2041. WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1);
  2042. if (nhg->hash_threshold)
  2043. nh_hthr_group_rebalance(nhg);
  2044. if (cfg->nh_fdb)
  2045. nhg->fdb_nh = 1;
  2046. rcu_assign_pointer(nh->nh_grp, nhg);
  2047. return nh;
  2048. out_no_nh:
  2049. for (i--; i >= 0; --i) {
  2050. list_del(&nhg->nh_entries[i].nh_list);
  2051. nexthop_put(nhg->nh_entries[i].nh);
  2052. }
  2053. kfree(nhg->spare);
  2054. kfree(nhg);
  2055. kfree(nh);
  2056. return ERR_PTR(err);
  2057. }
  2058. static int nh_create_ipv4(struct net *net, struct nexthop *nh,
  2059. struct nh_info *nhi, struct nh_config *cfg,
  2060. struct netlink_ext_ack *extack)
  2061. {
  2062. struct fib_nh *fib_nh = &nhi->fib_nh;
  2063. struct fib_config fib_cfg = {
  2064. .fc_oif = cfg->nh_ifindex,
  2065. .fc_gw4 = cfg->gw.ipv4,
  2066. .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
  2067. .fc_flags = cfg->nh_flags,
  2068. .fc_nlinfo = cfg->nlinfo,
  2069. .fc_encap = cfg->nh_encap,
  2070. .fc_encap_type = cfg->nh_encap_type,
  2071. };
  2072. u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
  2073. int err;
  2074. err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
  2075. if (err) {
  2076. fib_nh_release(net, fib_nh);
  2077. goto out;
  2078. }
  2079. if (nhi->fdb_nh)
  2080. goto out;
  2081. /* sets nh_dev if successful */
  2082. err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
  2083. if (!err) {
  2084. nh->nh_flags = fib_nh->fib_nh_flags;
  2085. fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
  2086. !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1);
  2087. } else {
  2088. fib_nh_release(net, fib_nh);
  2089. }
  2090. out:
  2091. return err;
  2092. }
  2093. static int nh_create_ipv6(struct net *net, struct nexthop *nh,
  2094. struct nh_info *nhi, struct nh_config *cfg,
  2095. struct netlink_ext_ack *extack)
  2096. {
  2097. struct fib6_nh *fib6_nh = &nhi->fib6_nh;
  2098. struct fib6_config fib6_cfg = {
  2099. .fc_table = l3mdev_fib_table(cfg->dev),
  2100. .fc_ifindex = cfg->nh_ifindex,
  2101. .fc_gateway = cfg->gw.ipv6,
  2102. .fc_flags = cfg->nh_flags,
  2103. .fc_nlinfo = cfg->nlinfo,
  2104. .fc_encap = cfg->nh_encap,
  2105. .fc_encap_type = cfg->nh_encap_type,
  2106. .fc_is_fdb = cfg->nh_fdb,
  2107. };
  2108. int err;
  2109. if (!ipv6_addr_any(&cfg->gw.ipv6))
  2110. fib6_cfg.fc_flags |= RTF_GATEWAY;
  2111. /* sets nh_dev if successful */
  2112. err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
  2113. extack);
  2114. if (err) {
  2115. /* IPv6 is not enabled, don't call fib6_nh_release */
  2116. if (err == -EAFNOSUPPORT)
  2117. goto out;
  2118. ipv6_stub->fib6_nh_release(fib6_nh);
  2119. } else {
  2120. nh->nh_flags = fib6_nh->fib_nh_flags;
  2121. }
  2122. out:
  2123. return err;
  2124. }
  2125. static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
  2126. struct netlink_ext_ack *extack)
  2127. {
  2128. struct nh_info *nhi;
  2129. struct nexthop *nh;
  2130. int err = 0;
  2131. nh = nexthop_alloc();
  2132. if (!nh)
  2133. return ERR_PTR(-ENOMEM);
  2134. nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
  2135. if (!nhi) {
  2136. kfree(nh);
  2137. return ERR_PTR(-ENOMEM);
  2138. }
  2139. nh->nh_flags = cfg->nh_flags;
  2140. nh->net = net;
  2141. nhi->nh_parent = nh;
  2142. nhi->family = cfg->nh_family;
  2143. nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
  2144. if (cfg->nh_fdb)
  2145. nhi->fdb_nh = 1;
  2146. if (cfg->nh_blackhole) {
  2147. nhi->reject_nh = 1;
  2148. cfg->nh_ifindex = net->loopback_dev->ifindex;
  2149. }
  2150. switch (cfg->nh_family) {
  2151. case AF_INET:
  2152. err = nh_create_ipv4(net, nh, nhi, cfg, extack);
  2153. break;
  2154. case AF_INET6:
  2155. err = nh_create_ipv6(net, nh, nhi, cfg, extack);
  2156. break;
  2157. }
  2158. if (err) {
  2159. kfree(nhi);
  2160. kfree(nh);
  2161. return ERR_PTR(err);
  2162. }
  2163. /* add the entry to the device based hash */
  2164. if (!nhi->fdb_nh)
  2165. nexthop_devhash_add(net, nhi);
  2166. rcu_assign_pointer(nh->nh_info, nhi);
  2167. return nh;
  2168. }
  2169. /* called with rtnl lock held */
  2170. static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
  2171. struct netlink_ext_ack *extack)
  2172. {
  2173. struct nexthop *nh;
  2174. int err;
  2175. if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
  2176. NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
  2177. return ERR_PTR(-EINVAL);
  2178. }
  2179. if (!cfg->nh_id) {
  2180. cfg->nh_id = nh_find_unused_id(net);
  2181. if (!cfg->nh_id) {
  2182. NL_SET_ERR_MSG(extack, "No unused id");
  2183. return ERR_PTR(-EINVAL);
  2184. }
  2185. }
  2186. if (cfg->nh_grp)
  2187. nh = nexthop_create_group(net, cfg);
  2188. else
  2189. nh = nexthop_create(net, cfg, extack);
  2190. if (IS_ERR(nh))
  2191. return nh;
  2192. refcount_set(&nh->refcnt, 1);
  2193. nh->id = cfg->nh_id;
  2194. nh->protocol = cfg->nh_protocol;
  2195. nh->net = net;
  2196. err = insert_nexthop(net, nh, cfg, extack);
  2197. if (err) {
  2198. __remove_nexthop(net, nh, NULL);
  2199. nexthop_put(nh);
  2200. nh = ERR_PTR(err);
  2201. }
  2202. return nh;
  2203. }
  2204. static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback,
  2205. unsigned long *timer_p, bool *has_p,
  2206. struct netlink_ext_ack *extack)
  2207. {
  2208. unsigned long timer;
  2209. u32 value;
  2210. if (!attr) {
  2211. *timer_p = fallback;
  2212. *has_p = false;
  2213. return 0;
  2214. }
  2215. value = nla_get_u32(attr);
  2216. timer = clock_t_to_jiffies(value);
  2217. if (timer == ~0UL) {
  2218. NL_SET_ERR_MSG(extack, "Timer value too large");
  2219. return -EINVAL;
  2220. }
  2221. *timer_p = timer;
  2222. *has_p = true;
  2223. return 0;
  2224. }
  2225. static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
  2226. struct netlink_ext_ack *extack)
  2227. {
  2228. struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {};
  2229. int err;
  2230. if (res) {
  2231. err = nla_parse_nested(tb,
  2232. ARRAY_SIZE(rtm_nh_res_policy_new) - 1,
  2233. res, rtm_nh_res_policy_new, extack);
  2234. if (err < 0)
  2235. return err;
  2236. }
  2237. if (tb[NHA_RES_GROUP_BUCKETS]) {
  2238. cfg->nh_grp_res_num_buckets =
  2239. nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]);
  2240. cfg->nh_grp_res_has_num_buckets = true;
  2241. if (!cfg->nh_grp_res_num_buckets) {
  2242. NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0");
  2243. return -EINVAL;
  2244. }
  2245. }
  2246. err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER],
  2247. NH_RES_DEFAULT_IDLE_TIMER,
  2248. &cfg->nh_grp_res_idle_timer,
  2249. &cfg->nh_grp_res_has_idle_timer,
  2250. extack);
  2251. if (err)
  2252. return err;
  2253. return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER],
  2254. NH_RES_DEFAULT_UNBALANCED_TIMER,
  2255. &cfg->nh_grp_res_unbalanced_timer,
  2256. &cfg->nh_grp_res_has_unbalanced_timer,
  2257. extack);
  2258. }
  2259. static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
  2260. struct nlmsghdr *nlh, struct nh_config *cfg,
  2261. struct netlink_ext_ack *extack)
  2262. {
  2263. struct nhmsg *nhm = nlmsg_data(nlh);
  2264. struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
  2265. int err;
  2266. err = nlmsg_parse(nlh, sizeof(*nhm), tb,
  2267. ARRAY_SIZE(rtm_nh_policy_new) - 1,
  2268. rtm_nh_policy_new, extack);
  2269. if (err < 0)
  2270. return err;
  2271. err = -EINVAL;
  2272. if (nhm->resvd || nhm->nh_scope) {
  2273. NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
  2274. goto out;
  2275. }
  2276. if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
  2277. NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
  2278. goto out;
  2279. }
  2280. switch (nhm->nh_family) {
  2281. case AF_INET:
  2282. case AF_INET6:
  2283. break;
  2284. case AF_UNSPEC:
  2285. if (tb[NHA_GROUP])
  2286. break;
  2287. fallthrough;
  2288. default:
  2289. NL_SET_ERR_MSG(extack, "Invalid address family");
  2290. goto out;
  2291. }
  2292. memset(cfg, 0, sizeof(*cfg));
  2293. cfg->nlflags = nlh->nlmsg_flags;
  2294. cfg->nlinfo.portid = NETLINK_CB(skb).portid;
  2295. cfg->nlinfo.nlh = nlh;
  2296. cfg->nlinfo.nl_net = net;
  2297. cfg->nh_family = nhm->nh_family;
  2298. cfg->nh_protocol = nhm->nh_protocol;
  2299. cfg->nh_flags = nhm->nh_flags;
  2300. if (tb[NHA_ID])
  2301. cfg->nh_id = nla_get_u32(tb[NHA_ID]);
  2302. if (tb[NHA_FDB]) {
  2303. if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
  2304. tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
  2305. NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
  2306. goto out;
  2307. }
  2308. if (nhm->nh_flags) {
  2309. NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
  2310. goto out;
  2311. }
  2312. cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
  2313. }
  2314. if (tb[NHA_GROUP]) {
  2315. if (nhm->nh_family != AF_UNSPEC) {
  2316. NL_SET_ERR_MSG(extack, "Invalid family for group");
  2317. goto out;
  2318. }
  2319. cfg->nh_grp = tb[NHA_GROUP];
  2320. cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
  2321. if (tb[NHA_GROUP_TYPE])
  2322. cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
  2323. if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
  2324. NL_SET_ERR_MSG(extack, "Invalid group type");
  2325. goto out;
  2326. }
  2327. err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
  2328. cfg->nh_grp_type, extack);
  2329. if (err)
  2330. goto out;
  2331. if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES)
  2332. err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
  2333. cfg, extack);
  2334. /* no other attributes should be set */
  2335. goto out;
  2336. }
  2337. if (tb[NHA_BLACKHOLE]) {
  2338. if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
  2339. tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
  2340. NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
  2341. goto out;
  2342. }
  2343. cfg->nh_blackhole = 1;
  2344. err = 0;
  2345. goto out;
  2346. }
  2347. if (!cfg->nh_fdb && !tb[NHA_OIF]) {
  2348. NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
  2349. goto out;
  2350. }
  2351. if (!cfg->nh_fdb && tb[NHA_OIF]) {
  2352. cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
  2353. if (cfg->nh_ifindex)
  2354. cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
  2355. if (!cfg->dev) {
  2356. NL_SET_ERR_MSG(extack, "Invalid device index");
  2357. goto out;
  2358. } else if (!(cfg->dev->flags & IFF_UP)) {
  2359. NL_SET_ERR_MSG(extack, "Nexthop device is not up");
  2360. err = -ENETDOWN;
  2361. goto out;
  2362. } else if (!netif_carrier_ok(cfg->dev)) {
  2363. NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
  2364. err = -ENETDOWN;
  2365. goto out;
  2366. }
  2367. }
  2368. err = -EINVAL;
  2369. if (tb[NHA_GATEWAY]) {
  2370. struct nlattr *gwa = tb[NHA_GATEWAY];
  2371. switch (cfg->nh_family) {
  2372. case AF_INET:
  2373. if (nla_len(gwa) != sizeof(u32)) {
  2374. NL_SET_ERR_MSG(extack, "Invalid gateway");
  2375. goto out;
  2376. }
  2377. cfg->gw.ipv4 = nla_get_be32(gwa);
  2378. break;
  2379. case AF_INET6:
  2380. if (nla_len(gwa) != sizeof(struct in6_addr)) {
  2381. NL_SET_ERR_MSG(extack, "Invalid gateway");
  2382. goto out;
  2383. }
  2384. cfg->gw.ipv6 = nla_get_in6_addr(gwa);
  2385. break;
  2386. default:
  2387. NL_SET_ERR_MSG(extack,
  2388. "Unknown address family for gateway");
  2389. goto out;
  2390. }
  2391. } else {
  2392. /* device only nexthop (no gateway) */
  2393. if (cfg->nh_flags & RTNH_F_ONLINK) {
  2394. NL_SET_ERR_MSG(extack,
  2395. "ONLINK flag can not be set for nexthop without a gateway");
  2396. goto out;
  2397. }
  2398. }
  2399. if (tb[NHA_ENCAP]) {
  2400. cfg->nh_encap = tb[NHA_ENCAP];
  2401. if (!tb[NHA_ENCAP_TYPE]) {
  2402. NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
  2403. goto out;
  2404. }
  2405. cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
  2406. err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
  2407. if (err < 0)
  2408. goto out;
  2409. } else if (tb[NHA_ENCAP_TYPE]) {
  2410. NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
  2411. goto out;
  2412. }
  2413. err = 0;
  2414. out:
  2415. return err;
  2416. }
  2417. /* rtnl */
  2418. static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
  2419. struct netlink_ext_ack *extack)
  2420. {
  2421. struct net *net = sock_net(skb->sk);
  2422. struct nh_config cfg;
  2423. struct nexthop *nh;
  2424. int err;
  2425. err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
  2426. if (!err) {
  2427. nh = nexthop_add(net, &cfg, extack);
  2428. if (IS_ERR(nh))
  2429. err = PTR_ERR(nh);
  2430. }
  2431. return err;
  2432. }
  2433. static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
  2434. struct nlattr **tb, u32 *id,
  2435. struct netlink_ext_ack *extack)
  2436. {
  2437. struct nhmsg *nhm = nlmsg_data(nlh);
  2438. if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
  2439. NL_SET_ERR_MSG(extack, "Invalid values in header");
  2440. return -EINVAL;
  2441. }
  2442. if (!tb[NHA_ID]) {
  2443. NL_SET_ERR_MSG(extack, "Nexthop id is missing");
  2444. return -EINVAL;
  2445. }
  2446. *id = nla_get_u32(tb[NHA_ID]);
  2447. if (!(*id)) {
  2448. NL_SET_ERR_MSG(extack, "Invalid nexthop id");
  2449. return -EINVAL;
  2450. }
  2451. return 0;
  2452. }
  2453. static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
  2454. struct netlink_ext_ack *extack)
  2455. {
  2456. struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
  2457. int err;
  2458. err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
  2459. ARRAY_SIZE(rtm_nh_policy_get) - 1,
  2460. rtm_nh_policy_get, extack);
  2461. if (err < 0)
  2462. return err;
  2463. return __nh_valid_get_del_req(nlh, tb, id, extack);
  2464. }
  2465. /* rtnl */
  2466. static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
  2467. struct netlink_ext_ack *extack)
  2468. {
  2469. struct net *net = sock_net(skb->sk);
  2470. struct nl_info nlinfo = {
  2471. .nlh = nlh,
  2472. .nl_net = net,
  2473. .portid = NETLINK_CB(skb).portid,
  2474. };
  2475. struct nexthop *nh;
  2476. int err;
  2477. u32 id;
  2478. err = nh_valid_get_del_req(nlh, &id, extack);
  2479. if (err)
  2480. return err;
  2481. nh = nexthop_find_by_id(net, id);
  2482. if (!nh)
  2483. return -ENOENT;
  2484. remove_nexthop(net, nh, &nlinfo);
  2485. return 0;
  2486. }
  2487. /* rtnl */
  2488. static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  2489. struct netlink_ext_ack *extack)
  2490. {
  2491. struct net *net = sock_net(in_skb->sk);
  2492. struct sk_buff *skb = NULL;
  2493. struct nexthop *nh;
  2494. int err;
  2495. u32 id;
  2496. err = nh_valid_get_del_req(nlh, &id, extack);
  2497. if (err)
  2498. return err;
  2499. err = -ENOBUFS;
  2500. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  2501. if (!skb)
  2502. goto out;
  2503. err = -ENOENT;
  2504. nh = nexthop_find_by_id(net, id);
  2505. if (!nh)
  2506. goto errout_free;
  2507. err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
  2508. nlh->nlmsg_seq, 0);
  2509. if (err < 0) {
  2510. WARN_ON(err == -EMSGSIZE);
  2511. goto errout_free;
  2512. }
  2513. err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
  2514. out:
  2515. return err;
  2516. errout_free:
  2517. kfree_skb(skb);
  2518. goto out;
  2519. }
  2520. struct nh_dump_filter {
  2521. u32 nh_id;
  2522. int dev_idx;
  2523. int master_idx;
  2524. bool group_filter;
  2525. bool fdb_filter;
  2526. u32 res_bucket_nh_id;
  2527. };
  2528. static bool nh_dump_filtered(struct nexthop *nh,
  2529. struct nh_dump_filter *filter, u8 family)
  2530. {
  2531. const struct net_device *dev;
  2532. const struct nh_info *nhi;
  2533. if (filter->group_filter && !nh->is_group)
  2534. return true;
  2535. if (!filter->dev_idx && !filter->master_idx && !family)
  2536. return false;
  2537. if (nh->is_group)
  2538. return true;
  2539. nhi = rtnl_dereference(nh->nh_info);
  2540. if (family && nhi->family != family)
  2541. return true;
  2542. dev = nhi->fib_nhc.nhc_dev;
  2543. if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
  2544. return true;
  2545. if (filter->master_idx) {
  2546. struct net_device *master;
  2547. if (!dev)
  2548. return true;
  2549. master = netdev_master_upper_dev_get((struct net_device *)dev);
  2550. if (!master || master->ifindex != filter->master_idx)
  2551. return true;
  2552. }
  2553. return false;
  2554. }
  2555. static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
  2556. struct nh_dump_filter *filter,
  2557. struct netlink_ext_ack *extack)
  2558. {
  2559. struct nhmsg *nhm;
  2560. u32 idx;
  2561. if (tb[NHA_OIF]) {
  2562. idx = nla_get_u32(tb[NHA_OIF]);
  2563. if (idx > INT_MAX) {
  2564. NL_SET_ERR_MSG(extack, "Invalid device index");
  2565. return -EINVAL;
  2566. }
  2567. filter->dev_idx = idx;
  2568. }
  2569. if (tb[NHA_MASTER]) {
  2570. idx = nla_get_u32(tb[NHA_MASTER]);
  2571. if (idx > INT_MAX) {
  2572. NL_SET_ERR_MSG(extack, "Invalid master device index");
  2573. return -EINVAL;
  2574. }
  2575. filter->master_idx = idx;
  2576. }
  2577. filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
  2578. filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
  2579. nhm = nlmsg_data(nlh);
  2580. if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
  2581. NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
  2582. return -EINVAL;
  2583. }
  2584. return 0;
  2585. }
  2586. static int nh_valid_dump_req(const struct nlmsghdr *nlh,
  2587. struct nh_dump_filter *filter,
  2588. struct netlink_callback *cb)
  2589. {
  2590. struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
  2591. int err;
  2592. err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
  2593. ARRAY_SIZE(rtm_nh_policy_dump) - 1,
  2594. rtm_nh_policy_dump, cb->extack);
  2595. if (err < 0)
  2596. return err;
  2597. return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
  2598. }
  2599. struct rtm_dump_nh_ctx {
  2600. u32 idx;
  2601. };
  2602. static struct rtm_dump_nh_ctx *
  2603. rtm_dump_nh_ctx(struct netlink_callback *cb)
  2604. {
  2605. struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
  2606. BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
  2607. return ctx;
  2608. }
  2609. static int rtm_dump_walk_nexthops(struct sk_buff *skb,
  2610. struct netlink_callback *cb,
  2611. struct rb_root *root,
  2612. struct rtm_dump_nh_ctx *ctx,
  2613. int (*nh_cb)(struct sk_buff *skb,
  2614. struct netlink_callback *cb,
  2615. struct nexthop *nh, void *data),
  2616. void *data)
  2617. {
  2618. struct rb_node *node;
  2619. int s_idx;
  2620. int err;
  2621. s_idx = ctx->idx;
  2622. for (node = rb_first(root); node; node = rb_next(node)) {
  2623. struct nexthop *nh;
  2624. nh = rb_entry(node, struct nexthop, rb_node);
  2625. if (nh->id < s_idx)
  2626. continue;
  2627. ctx->idx = nh->id;
  2628. err = nh_cb(skb, cb, nh, data);
  2629. if (err)
  2630. return err;
  2631. }
  2632. ctx->idx++;
  2633. return 0;
  2634. }
  2635. static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
  2636. struct nexthop *nh, void *data)
  2637. {
  2638. struct nhmsg *nhm = nlmsg_data(cb->nlh);
  2639. struct nh_dump_filter *filter = data;
  2640. if (nh_dump_filtered(nh, filter, nhm->nh_family))
  2641. return 0;
  2642. return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
  2643. NETLINK_CB(cb->skb).portid,
  2644. cb->nlh->nlmsg_seq, NLM_F_MULTI);
  2645. }
  2646. /* rtnl */
  2647. static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
  2648. {
  2649. struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
  2650. struct net *net = sock_net(skb->sk);
  2651. struct rb_root *root = &net->nexthop.rb_root;
  2652. struct nh_dump_filter filter = {};
  2653. int err;
  2654. err = nh_valid_dump_req(cb->nlh, &filter, cb);
  2655. if (err < 0)
  2656. return err;
  2657. err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
  2658. &rtm_dump_nexthop_cb, &filter);
  2659. if (err < 0) {
  2660. if (likely(skb->len))
  2661. err = skb->len;
  2662. }
  2663. cb->seq = net->nexthop.seq;
  2664. nl_dump_check_consistent(cb, nlmsg_hdr(skb));
  2665. return err;
  2666. }
  2667. static struct nexthop *
  2668. nexthop_find_group_resilient(struct net *net, u32 id,
  2669. struct netlink_ext_ack *extack)
  2670. {
  2671. struct nh_group *nhg;
  2672. struct nexthop *nh;
  2673. nh = nexthop_find_by_id(net, id);
  2674. if (!nh)
  2675. return ERR_PTR(-ENOENT);
  2676. if (!nh->is_group) {
  2677. NL_SET_ERR_MSG(extack, "Not a nexthop group");
  2678. return ERR_PTR(-EINVAL);
  2679. }
  2680. nhg = rtnl_dereference(nh->nh_grp);
  2681. if (!nhg->resilient) {
  2682. NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
  2683. return ERR_PTR(-EINVAL);
  2684. }
  2685. return nh;
  2686. }
  2687. static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
  2688. struct netlink_ext_ack *extack)
  2689. {
  2690. u32 idx;
  2691. if (attr) {
  2692. idx = nla_get_u32(attr);
  2693. if (!idx) {
  2694. NL_SET_ERR_MSG(extack, "Invalid nexthop id");
  2695. return -EINVAL;
  2696. }
  2697. *nh_id_p = idx;
  2698. } else {
  2699. *nh_id_p = 0;
  2700. }
  2701. return 0;
  2702. }
  2703. static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
  2704. struct nh_dump_filter *filter,
  2705. struct netlink_callback *cb)
  2706. {
  2707. struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
  2708. struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
  2709. int err;
  2710. err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
  2711. ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
  2712. rtm_nh_policy_dump_bucket, NULL);
  2713. if (err < 0)
  2714. return err;
  2715. err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
  2716. if (err)
  2717. return err;
  2718. if (tb[NHA_RES_BUCKET]) {
  2719. size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;
  2720. err = nla_parse_nested(res_tb, max,
  2721. tb[NHA_RES_BUCKET],
  2722. rtm_nh_res_bucket_policy_dump,
  2723. cb->extack);
  2724. if (err < 0)
  2725. return err;
  2726. err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
  2727. &filter->res_bucket_nh_id,
  2728. cb->extack);
  2729. if (err)
  2730. return err;
  2731. }
  2732. return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
  2733. }
  2734. struct rtm_dump_res_bucket_ctx {
  2735. struct rtm_dump_nh_ctx nh;
  2736. u16 bucket_index;
  2737. u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
  2738. };
  2739. static struct rtm_dump_res_bucket_ctx *
  2740. rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
  2741. {
  2742. struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;
  2743. BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
  2744. return ctx;
  2745. }
  2746. struct rtm_dump_nexthop_bucket_data {
  2747. struct rtm_dump_res_bucket_ctx *ctx;
  2748. struct nh_dump_filter filter;
  2749. };
  2750. static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
  2751. struct netlink_callback *cb,
  2752. struct nexthop *nh,
  2753. struct rtm_dump_nexthop_bucket_data *dd)
  2754. {
  2755. u32 portid = NETLINK_CB(cb->skb).portid;
  2756. struct nhmsg *nhm = nlmsg_data(cb->nlh);
  2757. struct nh_res_table *res_table;
  2758. struct nh_group *nhg;
  2759. u16 bucket_index;
  2760. int err;
  2761. if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
  2762. return 0;
  2763. nhg = rtnl_dereference(nh->nh_grp);
  2764. res_table = rtnl_dereference(nhg->res_table);
  2765. for (bucket_index = dd->ctx->bucket_index;
  2766. bucket_index < res_table->num_nh_buckets;
  2767. bucket_index++) {
  2768. struct nh_res_bucket *bucket;
  2769. struct nh_grp_entry *nhge;
  2770. bucket = &res_table->nh_buckets[bucket_index];
  2771. nhge = rtnl_dereference(bucket->nh_entry);
  2772. if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
  2773. continue;
  2774. if (dd->filter.res_bucket_nh_id &&
  2775. dd->filter.res_bucket_nh_id != nhge->nh->id)
  2776. continue;
  2777. dd->ctx->bucket_index = bucket_index;
  2778. err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
  2779. RTM_NEWNEXTHOPBUCKET, portid,
  2780. cb->nlh->nlmsg_seq, NLM_F_MULTI,
  2781. cb->extack);
  2782. if (err)
  2783. return err;
  2784. }
  2785. dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
  2786. dd->ctx->bucket_index = 0;
  2787. return 0;
  2788. }
  2789. static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
  2790. struct netlink_callback *cb,
  2791. struct nexthop *nh, void *data)
  2792. {
  2793. struct rtm_dump_nexthop_bucket_data *dd = data;
  2794. struct nh_group *nhg;
  2795. if (!nh->is_group)
  2796. return 0;
  2797. nhg = rtnl_dereference(nh->nh_grp);
  2798. if (!nhg->resilient)
  2799. return 0;
  2800. return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
  2801. }
  2802. /* rtnl */
  2803. static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
  2804. struct netlink_callback *cb)
  2805. {
  2806. struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
  2807. struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
  2808. struct net *net = sock_net(skb->sk);
  2809. struct nexthop *nh;
  2810. int err;
  2811. err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
  2812. if (err)
  2813. return err;
  2814. if (dd.filter.nh_id) {
  2815. nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
  2816. cb->extack);
  2817. if (IS_ERR(nh))
  2818. return PTR_ERR(nh);
  2819. err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
  2820. } else {
  2821. struct rb_root *root = &net->nexthop.rb_root;
  2822. err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
  2823. &rtm_dump_nexthop_bucket_cb, &dd);
  2824. }
  2825. if (err < 0) {
  2826. if (likely(skb->len))
  2827. err = skb->len;
  2828. }
  2829. cb->seq = net->nexthop.seq;
  2830. nl_dump_check_consistent(cb, nlmsg_hdr(skb));
  2831. return err;
  2832. }
  2833. static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res,
  2834. u16 *bucket_index,
  2835. struct netlink_ext_ack *extack)
  2836. {
  2837. struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)];
  2838. int err;
  2839. err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1,
  2840. res, rtm_nh_res_bucket_policy_get, extack);
  2841. if (err < 0)
  2842. return err;
  2843. if (!tb[NHA_RES_BUCKET_INDEX]) {
  2844. NL_SET_ERR_MSG(extack, "Bucket index is missing");
  2845. return -EINVAL;
  2846. }
  2847. *bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]);
  2848. return 0;
  2849. }
  2850. static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
  2851. u32 *id, u16 *bucket_index,
  2852. struct netlink_ext_ack *extack)
  2853. {
  2854. struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)];
  2855. int err;
  2856. err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
  2857. ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
  2858. rtm_nh_policy_get_bucket, extack);
  2859. if (err < 0)
  2860. return err;
  2861. err = __nh_valid_get_del_req(nlh, tb, id, extack);
  2862. if (err)
  2863. return err;
  2864. if (!tb[NHA_RES_BUCKET]) {
  2865. NL_SET_ERR_MSG(extack, "Bucket information is missing");
  2866. return -EINVAL;
  2867. }
  2868. err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET],
  2869. bucket_index, extack);
  2870. if (err)
  2871. return err;
  2872. return 0;
  2873. }
  2874. /* rtnl */
  2875. static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  2876. struct netlink_ext_ack *extack)
  2877. {
  2878. struct net *net = sock_net(in_skb->sk);
  2879. struct nh_res_table *res_table;
  2880. struct sk_buff *skb = NULL;
  2881. struct nh_group *nhg;
  2882. struct nexthop *nh;
  2883. u16 bucket_index;
  2884. int err;
  2885. u32 id;
  2886. err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack);
  2887. if (err)
  2888. return err;
  2889. nh = nexthop_find_group_resilient(net, id, extack);
  2890. if (IS_ERR(nh))
  2891. return PTR_ERR(nh);
  2892. nhg = rtnl_dereference(nh->nh_grp);
  2893. res_table = rtnl_dereference(nhg->res_table);
  2894. if (bucket_index >= res_table->num_nh_buckets) {
  2895. NL_SET_ERR_MSG(extack, "Bucket index out of bounds");
  2896. return -ENOENT;
  2897. }
  2898. skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  2899. if (!skb)
  2900. return -ENOBUFS;
  2901. err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index],
  2902. bucket_index, RTM_NEWNEXTHOPBUCKET,
  2903. NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
  2904. 0, extack);
  2905. if (err < 0) {
  2906. WARN_ON(err == -EMSGSIZE);
  2907. goto errout_free;
  2908. }
  2909. return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
  2910. errout_free:
  2911. kfree_skb(skb);
  2912. return err;
  2913. }
  2914. static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
  2915. {
  2916. unsigned int hash = nh_dev_hashfn(dev->ifindex);
  2917. struct net *net = dev_net(dev);
  2918. struct hlist_head *head = &net->nexthop.devhash[hash];
  2919. struct hlist_node *n;
  2920. struct nh_info *nhi;
  2921. hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
  2922. if (nhi->fib_nhc.nhc_dev == dev) {
  2923. if (nhi->family == AF_INET)
  2924. fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
  2925. orig_mtu);
  2926. }
  2927. }
  2928. }
  2929. /* rtnl */
  2930. static int nh_netdev_event(struct notifier_block *this,
  2931. unsigned long event, void *ptr)
  2932. {
  2933. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  2934. struct netdev_notifier_info_ext *info_ext;
  2935. switch (event) {
  2936. case NETDEV_DOWN:
  2937. case NETDEV_UNREGISTER:
  2938. nexthop_flush_dev(dev, event);
  2939. break;
  2940. case NETDEV_CHANGE:
  2941. if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
  2942. nexthop_flush_dev(dev, event);
  2943. break;
  2944. case NETDEV_CHANGEMTU:
  2945. info_ext = ptr;
  2946. nexthop_sync_mtu(dev, info_ext->ext.mtu);
  2947. rt_cache_flush(dev_net(dev));
  2948. break;
  2949. }
  2950. return NOTIFY_DONE;
  2951. }
  2952. static struct notifier_block nh_netdev_notifier = {
  2953. .notifier_call = nh_netdev_event,
  2954. };
  2955. static int nexthops_dump(struct net *net, struct notifier_block *nb,
  2956. enum nexthop_event_type event_type,
  2957. struct netlink_ext_ack *extack)
  2958. {
  2959. struct rb_root *root = &net->nexthop.rb_root;
  2960. struct rb_node *node;
  2961. int err = 0;
  2962. for (node = rb_first(root); node; node = rb_next(node)) {
  2963. struct nexthop *nh;
  2964. nh = rb_entry(node, struct nexthop, rb_node);
  2965. err = call_nexthop_notifier(nb, net, event_type, nh, extack);
  2966. if (err)
  2967. break;
  2968. }
  2969. return err;
  2970. }
  2971. int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
  2972. struct netlink_ext_ack *extack)
  2973. {
  2974. int err;
  2975. rtnl_lock();
  2976. err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
  2977. if (err)
  2978. goto unlock;
  2979. err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
  2980. nb);
  2981. unlock:
  2982. rtnl_unlock();
  2983. return err;
  2984. }
  2985. EXPORT_SYMBOL(register_nexthop_notifier);
  2986. int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
  2987. {
  2988. int err;
  2989. rtnl_lock();
  2990. err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
  2991. nb);
  2992. if (err)
  2993. goto unlock;
  2994. nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
  2995. unlock:
  2996. rtnl_unlock();
  2997. return err;
  2998. }
  2999. EXPORT_SYMBOL(unregister_nexthop_notifier);
  3000. void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
  3001. {
  3002. struct nexthop *nexthop;
  3003. rcu_read_lock();
  3004. nexthop = nexthop_find_by_id(net, id);
  3005. if (!nexthop)
  3006. goto out;
  3007. nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
  3008. if (offload)
  3009. nexthop->nh_flags |= RTNH_F_OFFLOAD;
  3010. if (trap)
  3011. nexthop->nh_flags |= RTNH_F_TRAP;
  3012. out:
  3013. rcu_read_unlock();
  3014. }
  3015. EXPORT_SYMBOL(nexthop_set_hw_flags);
  3016. void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
  3017. bool offload, bool trap)
  3018. {
  3019. struct nh_res_table *res_table;
  3020. struct nh_res_bucket *bucket;
  3021. struct nexthop *nexthop;
  3022. struct nh_group *nhg;
  3023. rcu_read_lock();
  3024. nexthop = nexthop_find_by_id(net, id);
  3025. if (!nexthop || !nexthop->is_group)
  3026. goto out;
  3027. nhg = rcu_dereference(nexthop->nh_grp);
  3028. if (!nhg->resilient)
  3029. goto out;
  3030. if (bucket_index >= nhg->res_table->num_nh_buckets)
  3031. goto out;
  3032. res_table = rcu_dereference(nhg->res_table);
  3033. bucket = &res_table->nh_buckets[bucket_index];
  3034. bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
  3035. if (offload)
  3036. bucket->nh_flags |= RTNH_F_OFFLOAD;
  3037. if (trap)
  3038. bucket->nh_flags |= RTNH_F_TRAP;
  3039. out:
  3040. rcu_read_unlock();
  3041. }
  3042. EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);
  3043. void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
  3044. unsigned long *activity)
  3045. {
  3046. struct nh_res_table *res_table;
  3047. struct nexthop *nexthop;
  3048. struct nh_group *nhg;
  3049. u16 i;
  3050. rcu_read_lock();
  3051. nexthop = nexthop_find_by_id(net, id);
  3052. if (!nexthop || !nexthop->is_group)
  3053. goto out;
  3054. nhg = rcu_dereference(nexthop->nh_grp);
  3055. if (!nhg->resilient)
  3056. goto out;
  3057. /* Instead of silently ignoring some buckets, demand that the sizes
  3058. * be the same.
  3059. */
  3060. res_table = rcu_dereference(nhg->res_table);
  3061. if (num_buckets != res_table->num_nh_buckets)
  3062. goto out;
  3063. for (i = 0; i < num_buckets; i++) {
  3064. if (test_bit(i, activity))
  3065. nh_res_bucket_set_busy(&res_table->nh_buckets[i]);
  3066. }
  3067. out:
  3068. rcu_read_unlock();
  3069. }
  3070. EXPORT_SYMBOL(nexthop_res_grp_activity_update);
  3071. static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
  3072. {
  3073. struct net *net;
  3074. rtnl_lock();
  3075. list_for_each_entry(net, net_list, exit_list) {
  3076. flush_all_nexthops(net);
  3077. kfree(net->nexthop.devhash);
  3078. }
  3079. rtnl_unlock();
  3080. }
  3081. static int __net_init nexthop_net_init(struct net *net)
  3082. {
  3083. size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
  3084. net->nexthop.rb_root = RB_ROOT;
  3085. net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
  3086. if (!net->nexthop.devhash)
  3087. return -ENOMEM;
  3088. BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
  3089. return 0;
  3090. }
  3091. static struct pernet_operations nexthop_net_ops = {
  3092. .init = nexthop_net_init,
  3093. .exit_batch = nexthop_net_exit_batch,
  3094. };
  3095. static int __init nexthop_init(void)
  3096. {
  3097. register_pernet_subsys(&nexthop_net_ops);
  3098. register_netdevice_notifier(&nh_netdev_notifier);
  3099. rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
  3100. rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
  3101. rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
  3102. rtm_dump_nexthop, 0);
  3103. rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
  3104. rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
  3105. rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
  3106. rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
  3107. rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
  3108. rtm_dump_nexthop_bucket, 0);
  3109. return 0;
  3110. }
  3111. subsys_initcall(nexthop_init);