pm_netlink.c 59 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Multipath TCP
  3. *
  4. * Copyright (c) 2020, Red Hat, Inc.
  5. */
  6. #define pr_fmt(fmt) "MPTCP: " fmt
  7. #include <linux/inet.h>
  8. #include <linux/kernel.h>
  9. #include <net/tcp.h>
  10. #include <net/netns/generic.h>
  11. #include <net/mptcp.h>
  12. #include <net/genetlink.h>
  13. #include <uapi/linux/mptcp.h>
  14. #include "protocol.h"
  15. #include "mib.h"
  16. /* forward declaration */
  17. static struct genl_family mptcp_genl_family;
  18. static int pm_nl_pernet_id;
  19. struct mptcp_pm_add_entry {
  20. struct list_head list;
  21. struct mptcp_addr_info addr;
  22. struct timer_list add_timer;
  23. struct mptcp_sock *sock;
  24. u8 retrans_times;
  25. };
  26. struct pm_nl_pernet {
  27. /* protects pernet updates */
  28. spinlock_t lock;
  29. struct list_head local_addr_list;
  30. unsigned int addrs;
  31. unsigned int stale_loss_cnt;
  32. unsigned int add_addr_signal_max;
  33. unsigned int add_addr_accept_max;
  34. unsigned int local_addr_max;
  35. unsigned int subflows_max;
  36. unsigned int next_id;
  37. DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
  38. };
  39. #define MPTCP_PM_ADDR_MAX 8
  40. #define ADD_ADDR_RETRANS_MAX 3
  41. static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
  42. {
  43. return net_generic(net, pm_nl_pernet_id);
  44. }
  45. static struct pm_nl_pernet *
  46. pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
  47. {
  48. return pm_nl_get_pernet(sock_net((struct sock *)msk));
  49. }
  50. bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
  51. const struct mptcp_addr_info *b, bool use_port)
  52. {
  53. bool addr_equals = false;
  54. if (a->family == b->family) {
  55. if (a->family == AF_INET)
  56. addr_equals = a->addr.s_addr == b->addr.s_addr;
  57. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  58. else
  59. addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
  60. } else if (a->family == AF_INET) {
  61. if (ipv6_addr_v4mapped(&b->addr6))
  62. addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3];
  63. } else if (b->family == AF_INET) {
  64. if (ipv6_addr_v4mapped(&a->addr6))
  65. addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr;
  66. #endif
  67. }
  68. if (!addr_equals)
  69. return false;
  70. if (!use_port)
  71. return true;
  72. return a->port == b->port;
  73. }
  74. static void local_address(const struct sock_common *skc,
  75. struct mptcp_addr_info *addr)
  76. {
  77. addr->family = skc->skc_family;
  78. addr->port = htons(skc->skc_num);
  79. if (addr->family == AF_INET)
  80. addr->addr.s_addr = skc->skc_rcv_saddr;
  81. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  82. else if (addr->family == AF_INET6)
  83. addr->addr6 = skc->skc_v6_rcv_saddr;
  84. #endif
  85. }
  86. static void remote_address(const struct sock_common *skc,
  87. struct mptcp_addr_info *addr)
  88. {
  89. addr->family = skc->skc_family;
  90. addr->port = skc->skc_dport;
  91. if (addr->family == AF_INET)
  92. addr->addr.s_addr = skc->skc_daddr;
  93. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  94. else if (addr->family == AF_INET6)
  95. addr->addr6 = skc->skc_v6_daddr;
  96. #endif
  97. }
  98. static bool lookup_subflow_by_saddr(const struct list_head *list,
  99. const struct mptcp_addr_info *saddr)
  100. {
  101. struct mptcp_subflow_context *subflow;
  102. struct mptcp_addr_info cur;
  103. struct sock_common *skc;
  104. list_for_each_entry(subflow, list, node) {
  105. skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
  106. local_address(skc, &cur);
  107. if (mptcp_addresses_equal(&cur, saddr, saddr->port))
  108. return true;
  109. }
  110. return false;
  111. }
  112. static bool lookup_subflow_by_daddr(const struct list_head *list,
  113. const struct mptcp_addr_info *daddr)
  114. {
  115. struct mptcp_subflow_context *subflow;
  116. struct mptcp_addr_info cur;
  117. struct sock_common *skc;
  118. list_for_each_entry(subflow, list, node) {
  119. skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
  120. remote_address(skc, &cur);
  121. if (mptcp_addresses_equal(&cur, daddr, daddr->port))
  122. return true;
  123. }
  124. return false;
  125. }
  126. static struct mptcp_pm_addr_entry *
  127. select_local_address(const struct pm_nl_pernet *pernet,
  128. const struct mptcp_sock *msk)
  129. {
  130. const struct sock *sk = (const struct sock *)msk;
  131. struct mptcp_pm_addr_entry *entry, *ret = NULL;
  132. msk_owned_by_me(msk);
  133. rcu_read_lock();
  134. list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
  135. if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
  136. continue;
  137. if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
  138. continue;
  139. if (entry->addr.family != sk->sk_family) {
  140. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  141. if ((entry->addr.family == AF_INET &&
  142. !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
  143. (sk->sk_family == AF_INET &&
  144. !ipv6_addr_v4mapped(&entry->addr.addr6)))
  145. #endif
  146. continue;
  147. }
  148. ret = entry;
  149. break;
  150. }
  151. rcu_read_unlock();
  152. return ret;
  153. }
  154. static struct mptcp_pm_addr_entry *
  155. select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
  156. {
  157. struct mptcp_pm_addr_entry *entry, *ret = NULL;
  158. rcu_read_lock();
  159. /* do not keep any additional per socket state, just signal
  160. * the address list in order.
  161. * Note: removal from the local address list during the msk life-cycle
  162. * can lead to additional addresses not being announced.
  163. */
  164. list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
  165. if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
  166. continue;
  167. if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
  168. continue;
  169. ret = entry;
  170. break;
  171. }
  172. rcu_read_unlock();
  173. return ret;
  174. }
  175. unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
  176. {
  177. const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
  178. return READ_ONCE(pernet->add_addr_signal_max);
  179. }
  180. EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
  181. unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
  182. {
  183. struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
  184. return READ_ONCE(pernet->add_addr_accept_max);
  185. }
  186. EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
  187. unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
  188. {
  189. struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
  190. return READ_ONCE(pernet->subflows_max);
  191. }
  192. EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
  193. unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
  194. {
  195. struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
  196. return READ_ONCE(pernet->local_addr_max);
  197. }
  198. EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
  199. bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
  200. {
  201. struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
  202. if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
  203. (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
  204. MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) {
  205. WRITE_ONCE(msk->pm.work_pending, false);
  206. return false;
  207. }
  208. return true;
  209. }
  210. struct mptcp_pm_add_entry *
  211. mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
  212. const struct mptcp_addr_info *addr)
  213. {
  214. struct mptcp_pm_add_entry *entry;
  215. lockdep_assert_held(&msk->pm.lock);
  216. list_for_each_entry(entry, &msk->pm.anno_list, list) {
  217. if (mptcp_addresses_equal(&entry->addr, addr, true))
  218. return entry;
  219. }
  220. return NULL;
  221. }
  222. bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
  223. {
  224. struct mptcp_pm_add_entry *entry;
  225. struct mptcp_addr_info saddr;
  226. bool ret = false;
  227. local_address((struct sock_common *)sk, &saddr);
  228. spin_lock_bh(&msk->pm.lock);
  229. list_for_each_entry(entry, &msk->pm.anno_list, list) {
  230. if (mptcp_addresses_equal(&entry->addr, &saddr, true)) {
  231. ret = true;
  232. goto out;
  233. }
  234. }
  235. out:
  236. spin_unlock_bh(&msk->pm.lock);
  237. return ret;
  238. }
  239. static void mptcp_pm_add_timer(struct timer_list *timer)
  240. {
  241. struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer);
  242. struct mptcp_sock *msk = entry->sock;
  243. struct sock *sk = (struct sock *)msk;
  244. pr_debug("msk=%p", msk);
  245. if (!msk)
  246. return;
  247. if (inet_sk_state_load(sk) == TCP_CLOSE)
  248. return;
  249. if (!entry->addr.id)
  250. return;
  251. if (mptcp_pm_should_add_signal_addr(msk)) {
  252. sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
  253. goto out;
  254. }
  255. spin_lock_bh(&msk->pm.lock);
  256. if (!mptcp_pm_should_add_signal_addr(msk)) {
  257. pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
  258. mptcp_pm_announce_addr(msk, &entry->addr, false);
  259. mptcp_pm_add_addr_send_ack(msk);
  260. entry->retrans_times++;
  261. }
  262. if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
  263. sk_reset_timer(sk, timer,
  264. jiffies + mptcp_get_add_addr_timeout(sock_net(sk)));
  265. spin_unlock_bh(&msk->pm.lock);
  266. if (entry->retrans_times == ADD_ADDR_RETRANS_MAX)
  267. mptcp_pm_subflow_established(msk);
  268. out:
  269. __sock_put(sk);
  270. }
  271. struct mptcp_pm_add_entry *
  272. mptcp_pm_del_add_timer(struct mptcp_sock *msk,
  273. const struct mptcp_addr_info *addr, bool check_id)
  274. {
  275. struct mptcp_pm_add_entry *entry;
  276. struct sock *sk = (struct sock *)msk;
  277. spin_lock_bh(&msk->pm.lock);
  278. entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
  279. if (entry && (!check_id || entry->addr.id == addr->id))
  280. entry->retrans_times = ADD_ADDR_RETRANS_MAX;
  281. spin_unlock_bh(&msk->pm.lock);
  282. if (entry && (!check_id || entry->addr.id == addr->id))
  283. sk_stop_timer_sync(sk, &entry->add_timer);
  284. return entry;
  285. }
  286. bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
  287. const struct mptcp_pm_addr_entry *entry)
  288. {
  289. struct mptcp_pm_add_entry *add_entry = NULL;
  290. struct sock *sk = (struct sock *)msk;
  291. struct net *net = sock_net(sk);
  292. lockdep_assert_held(&msk->pm.lock);
  293. add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
  294. if (add_entry) {
  295. if (mptcp_pm_is_kernel(msk))
  296. return false;
  297. sk_reset_timer(sk, &add_entry->add_timer,
  298. jiffies + mptcp_get_add_addr_timeout(net));
  299. return true;
  300. }
  301. add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
  302. if (!add_entry)
  303. return false;
  304. list_add(&add_entry->list, &msk->pm.anno_list);
  305. add_entry->addr = entry->addr;
  306. add_entry->sock = msk;
  307. add_entry->retrans_times = 0;
  308. timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
  309. sk_reset_timer(sk, &add_entry->add_timer,
  310. jiffies + mptcp_get_add_addr_timeout(net));
  311. return true;
  312. }
  313. void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
  314. {
  315. struct mptcp_pm_add_entry *entry, *tmp;
  316. struct sock *sk = (struct sock *)msk;
  317. LIST_HEAD(free_list);
  318. pr_debug("msk=%p", msk);
  319. spin_lock_bh(&msk->pm.lock);
  320. list_splice_init(&msk->pm.anno_list, &free_list);
  321. spin_unlock_bh(&msk->pm.lock);
  322. list_for_each_entry_safe(entry, tmp, &free_list, list) {
  323. sk_stop_timer_sync(sk, &entry->add_timer);
  324. kfree(entry);
  325. }
  326. }
  327. static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
  328. const struct mptcp_addr_info *addr)
  329. {
  330. int i;
  331. for (i = 0; i < nr; i++) {
  332. if (addrs[i].id == addr->id)
  333. return true;
  334. }
  335. return false;
  336. }
  337. /* Fill all the remote addresses into the array addrs[],
  338. * and return the array size.
  339. */
  340. static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
  341. struct mptcp_addr_info *addrs)
  342. {
  343. bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
  344. struct sock *sk = (struct sock *)msk, *ssk;
  345. struct mptcp_subflow_context *subflow;
  346. struct mptcp_addr_info remote = { 0 };
  347. unsigned int subflows_max;
  348. int i = 0;
  349. subflows_max = mptcp_pm_get_subflows_max(msk);
  350. remote_address((struct sock_common *)sk, &remote);
  351. /* Non-fullmesh endpoint, fill in the single entry
  352. * corresponding to the primary MPC subflow remote address
  353. */
  354. if (!fullmesh) {
  355. if (deny_id0)
  356. return 0;
  357. msk->pm.subflows++;
  358. addrs[i++] = remote;
  359. } else {
  360. mptcp_for_each_subflow(msk, subflow) {
  361. ssk = mptcp_subflow_tcp_sock(subflow);
  362. remote_address((struct sock_common *)ssk, &addrs[i]);
  363. addrs[i].id = subflow->remote_id;
  364. if (deny_id0 && !addrs[i].id)
  365. continue;
  366. if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
  367. msk->pm.subflows < subflows_max) {
  368. msk->pm.subflows++;
  369. i++;
  370. }
  371. }
  372. }
  373. return i;
  374. }
  375. static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
  376. bool prio, bool backup)
  377. {
  378. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  379. bool slow;
  380. pr_debug("send ack for %s",
  381. prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"));
  382. slow = lock_sock_fast(ssk);
  383. if (prio) {
  384. if (subflow->backup != backup)
  385. msk->last_snd = NULL;
  386. subflow->send_mp_prio = 1;
  387. subflow->backup = backup;
  388. subflow->request_bkup = backup;
  389. }
  390. __mptcp_subflow_send_ack(ssk);
  391. unlock_sock_fast(ssk, slow);
  392. }
  393. static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
  394. bool prio, bool backup)
  395. {
  396. spin_unlock_bh(&msk->pm.lock);
  397. __mptcp_pm_send_ack(msk, subflow, prio, backup);
  398. spin_lock_bh(&msk->pm.lock);
  399. }
  400. static struct mptcp_pm_addr_entry *
  401. __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
  402. {
  403. struct mptcp_pm_addr_entry *entry;
  404. list_for_each_entry(entry, &pernet->local_addr_list, list) {
  405. if (entry->addr.id == id)
  406. return entry;
  407. }
  408. return NULL;
  409. }
  410. static struct mptcp_pm_addr_entry *
  411. __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
  412. bool lookup_by_id)
  413. {
  414. struct mptcp_pm_addr_entry *entry;
  415. list_for_each_entry(entry, &pernet->local_addr_list, list) {
  416. if ((!lookup_by_id &&
  417. mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) ||
  418. (lookup_by_id && entry->addr.id == info->id))
  419. return entry;
  420. }
  421. return NULL;
  422. }
  423. static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
  424. {
  425. struct sock *sk = (struct sock *)msk;
  426. struct mptcp_pm_addr_entry *local;
  427. unsigned int add_addr_signal_max;
  428. unsigned int local_addr_max;
  429. struct pm_nl_pernet *pernet;
  430. unsigned int subflows_max;
  431. pernet = pm_nl_get_pernet(sock_net(sk));
  432. add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
  433. local_addr_max = mptcp_pm_get_local_addr_max(msk);
  434. subflows_max = mptcp_pm_get_subflows_max(msk);
  435. /* do lazy endpoint usage accounting for the MPC subflows */
  436. if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
  437. struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first);
  438. struct mptcp_pm_addr_entry *entry;
  439. struct mptcp_addr_info mpc_addr;
  440. bool backup = false;
  441. local_address((struct sock_common *)msk->first, &mpc_addr);
  442. rcu_read_lock();
  443. entry = __lookup_addr(pernet, &mpc_addr, false);
  444. if (entry) {
  445. __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
  446. msk->mpc_endpoint_id = entry->addr.id;
  447. backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
  448. }
  449. rcu_read_unlock();
  450. if (backup)
  451. mptcp_pm_send_ack(msk, subflow, true, backup);
  452. msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
  453. }
  454. pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
  455. msk->pm.local_addr_used, local_addr_max,
  456. msk->pm.add_addr_signaled, add_addr_signal_max,
  457. msk->pm.subflows, subflows_max);
  458. /* check first for announce */
  459. if (msk->pm.add_addr_signaled < add_addr_signal_max) {
  460. local = select_signal_address(pernet, msk);
  461. /* due to racing events on both ends we can reach here while
  462. * previous add address is still running: if we invoke now
  463. * mptcp_pm_announce_addr(), that will fail and the
  464. * corresponding id will be marked as used.
  465. * Instead let the PM machinery reschedule us when the
  466. * current address announce will be completed.
  467. */
  468. if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
  469. return;
  470. if (local) {
  471. if (mptcp_pm_alloc_anno_list(msk, local)) {
  472. __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
  473. msk->pm.add_addr_signaled++;
  474. mptcp_pm_announce_addr(msk, &local->addr, false);
  475. mptcp_pm_nl_addr_send_ack(msk);
  476. }
  477. }
  478. }
  479. /* check if should create a new subflow */
  480. while (msk->pm.local_addr_used < local_addr_max &&
  481. msk->pm.subflows < subflows_max) {
  482. struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
  483. bool fullmesh;
  484. int i, nr;
  485. local = select_local_address(pernet, msk);
  486. if (!local)
  487. break;
  488. fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
  489. msk->pm.local_addr_used++;
  490. nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
  491. if (nr)
  492. __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
  493. spin_unlock_bh(&msk->pm.lock);
  494. for (i = 0; i < nr; i++)
  495. __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
  496. spin_lock_bh(&msk->pm.lock);
  497. }
  498. mptcp_pm_nl_check_work_pending(msk);
  499. }
  500. static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
  501. {
  502. mptcp_pm_create_subflow_or_signal_addr(msk);
  503. }
  504. static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
  505. {
  506. mptcp_pm_create_subflow_or_signal_addr(msk);
  507. }
  508. /* Fill all the local addresses into the array addrs[],
  509. * and return the array size.
  510. */
  511. static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
  512. struct mptcp_addr_info *addrs)
  513. {
  514. struct sock *sk = (struct sock *)msk;
  515. struct mptcp_pm_addr_entry *entry;
  516. struct mptcp_addr_info local;
  517. struct pm_nl_pernet *pernet;
  518. unsigned int subflows_max;
  519. int i = 0;
  520. pernet = pm_nl_get_pernet_from_msk(msk);
  521. subflows_max = mptcp_pm_get_subflows_max(msk);
  522. rcu_read_lock();
  523. list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
  524. if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
  525. continue;
  526. if (entry->addr.family != sk->sk_family) {
  527. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  528. if ((entry->addr.family == AF_INET &&
  529. !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
  530. (sk->sk_family == AF_INET &&
  531. !ipv6_addr_v4mapped(&entry->addr.addr6)))
  532. #endif
  533. continue;
  534. }
  535. if (msk->pm.subflows < subflows_max) {
  536. msk->pm.subflows++;
  537. addrs[i++] = entry->addr;
  538. }
  539. }
  540. rcu_read_unlock();
  541. /* If the array is empty, fill in the single
  542. * 'IPADDRANY' local address
  543. */
  544. if (!i) {
  545. memset(&local, 0, sizeof(local));
  546. local.family = msk->pm.remote.family;
  547. msk->pm.subflows++;
  548. addrs[i++] = local;
  549. }
  550. return i;
  551. }
  552. static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
  553. {
  554. struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
  555. struct sock *sk = (struct sock *)msk;
  556. unsigned int add_addr_accept_max;
  557. struct mptcp_addr_info remote;
  558. unsigned int subflows_max;
  559. int i, nr;
  560. add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
  561. subflows_max = mptcp_pm_get_subflows_max(msk);
  562. pr_debug("accepted %d:%d remote family %d",
  563. msk->pm.add_addr_accepted, add_addr_accept_max,
  564. msk->pm.remote.family);
  565. remote = msk->pm.remote;
  566. mptcp_pm_announce_addr(msk, &remote, true);
  567. mptcp_pm_nl_addr_send_ack(msk);
  568. if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
  569. return;
  570. /* pick id 0 port, if none is provided the remote address */
  571. if (!remote.port)
  572. remote.port = sk->sk_dport;
  573. /* connect to the specified remote address, using whatever
  574. * local address the routing configuration will pick.
  575. */
  576. nr = fill_local_addresses_vec(msk, addrs);
  577. msk->pm.add_addr_accepted++;
  578. if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
  579. msk->pm.subflows >= subflows_max)
  580. WRITE_ONCE(msk->pm.accept_addr, false);
  581. spin_unlock_bh(&msk->pm.lock);
  582. for (i = 0; i < nr; i++)
  583. __mptcp_subflow_connect(sk, &addrs[i], &remote);
  584. spin_lock_bh(&msk->pm.lock);
  585. }
  586. void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
  587. {
  588. struct mptcp_subflow_context *subflow;
  589. msk_owned_by_me(msk);
  590. lockdep_assert_held(&msk->pm.lock);
  591. if (!mptcp_pm_should_add_signal(msk) &&
  592. !mptcp_pm_should_rm_signal(msk))
  593. return;
  594. subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
  595. if (subflow)
  596. mptcp_pm_send_ack(msk, subflow, false, false);
  597. }
  598. int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
  599. struct mptcp_addr_info *addr,
  600. struct mptcp_addr_info *rem,
  601. u8 bkup)
  602. {
  603. struct mptcp_subflow_context *subflow;
  604. pr_debug("bkup=%d", bkup);
  605. mptcp_for_each_subflow(msk, subflow) {
  606. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  607. struct mptcp_addr_info local, remote;
  608. local_address((struct sock_common *)ssk, &local);
  609. if (!mptcp_addresses_equal(&local, addr, addr->port))
  610. continue;
  611. if (rem && rem->family != AF_UNSPEC) {
  612. remote_address((struct sock_common *)ssk, &remote);
  613. if (!mptcp_addresses_equal(&remote, rem, rem->port))
  614. continue;
  615. }
  616. __mptcp_pm_send_ack(msk, subflow, true, bkup);
  617. return 0;
  618. }
  619. return -EINVAL;
  620. }
  621. static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id)
  622. {
  623. return local_id == id || (!local_id && msk->mpc_endpoint_id == id);
  624. }
  625. static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
  626. const struct mptcp_rm_list *rm_list,
  627. enum linux_mptcp_mib_field rm_type)
  628. {
  629. struct mptcp_subflow_context *subflow, *tmp;
  630. struct sock *sk = (struct sock *)msk;
  631. u8 i;
  632. pr_debug("%s rm_list_nr %d",
  633. rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr);
  634. msk_owned_by_me(msk);
  635. if (sk->sk_state == TCP_LISTEN)
  636. return;
  637. if (!rm_list->nr)
  638. return;
  639. if (list_empty(&msk->conn_list))
  640. return;
  641. for (i = 0; i < rm_list->nr; i++) {
  642. u8 rm_id = rm_list->ids[i];
  643. bool removed = false;
  644. mptcp_for_each_subflow_safe(msk, subflow, tmp) {
  645. struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  646. int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
  647. u8 id = subflow->local_id;
  648. if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
  649. continue;
  650. if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
  651. continue;
  652. pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
  653. rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
  654. i, rm_id, subflow->local_id, subflow->remote_id,
  655. msk->mpc_endpoint_id);
  656. spin_unlock_bh(&msk->pm.lock);
  657. mptcp_subflow_shutdown(sk, ssk, how);
  658. /* the following takes care of updating the subflows counter */
  659. mptcp_close_ssk(sk, ssk, subflow);
  660. spin_lock_bh(&msk->pm.lock);
  661. removed = true;
  662. __MPTCP_INC_STATS(sock_net(sk), rm_type);
  663. }
  664. if (rm_type == MPTCP_MIB_RMSUBFLOW)
  665. __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
  666. if (!removed)
  667. continue;
  668. if (!mptcp_pm_is_kernel(msk))
  669. continue;
  670. if (rm_type == MPTCP_MIB_RMADDR) {
  671. msk->pm.add_addr_accepted--;
  672. WRITE_ONCE(msk->pm.accept_addr, true);
  673. } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
  674. msk->pm.local_addr_used--;
  675. }
  676. }
  677. }
  678. static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
  679. {
  680. mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
  681. }
  682. void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
  683. const struct mptcp_rm_list *rm_list)
  684. {
  685. mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
  686. }
  687. void mptcp_pm_nl_work(struct mptcp_sock *msk)
  688. {
  689. struct mptcp_pm_data *pm = &msk->pm;
  690. msk_owned_by_me(msk);
  691. if (!(pm->status & MPTCP_PM_WORK_MASK))
  692. return;
  693. spin_lock_bh(&msk->pm.lock);
  694. pr_debug("msk=%p status=%x", msk, pm->status);
  695. if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
  696. pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
  697. mptcp_pm_nl_add_addr_received(msk);
  698. }
  699. if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
  700. pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
  701. mptcp_pm_nl_addr_send_ack(msk);
  702. }
  703. if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
  704. pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
  705. mptcp_pm_nl_rm_addr_received(msk);
  706. }
  707. if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
  708. pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
  709. mptcp_pm_nl_fully_established(msk);
  710. }
  711. if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
  712. pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
  713. mptcp_pm_nl_subflow_established(msk);
  714. }
  715. spin_unlock_bh(&msk->pm.lock);
  716. }
  717. static bool address_use_port(struct mptcp_pm_addr_entry *entry)
  718. {
  719. return (entry->flags &
  720. (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
  721. MPTCP_PM_ADDR_FLAG_SIGNAL;
  722. }
  723. /* caller must ensure the RCU grace period is already elapsed */
  724. static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
  725. {
  726. if (entry->lsk)
  727. sock_release(entry->lsk);
  728. kfree(entry);
  729. }
  730. static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
  731. struct mptcp_pm_addr_entry *entry)
  732. {
  733. struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
  734. unsigned int addr_max;
  735. int ret = -EINVAL;
  736. spin_lock_bh(&pernet->lock);
  737. /* to keep the code simple, don't do IDR-like allocation for address ID,
  738. * just bail when we exceed limits
  739. */
  740. if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
  741. pernet->next_id = 1;
  742. if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
  743. goto out;
  744. if (test_bit(entry->addr.id, pernet->id_bitmap))
  745. goto out;
  746. /* do not insert duplicate address, differentiate on port only
  747. * singled addresses
  748. */
  749. if (!address_use_port(entry))
  750. entry->addr.port = 0;
  751. list_for_each_entry(cur, &pernet->local_addr_list, list) {
  752. if (mptcp_addresses_equal(&cur->addr, &entry->addr,
  753. cur->addr.port || entry->addr.port)) {
  754. /* allow replacing the exiting endpoint only if such
  755. * endpoint is an implicit one and the user-space
  756. * did not provide an endpoint id
  757. */
  758. if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
  759. goto out;
  760. if (entry->addr.id)
  761. goto out;
  762. pernet->addrs--;
  763. entry->addr.id = cur->addr.id;
  764. list_del_rcu(&cur->list);
  765. del_entry = cur;
  766. break;
  767. }
  768. }
  769. if (!entry->addr.id) {
  770. find_next:
  771. entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
  772. MPTCP_PM_MAX_ADDR_ID + 1,
  773. pernet->next_id);
  774. if (!entry->addr.id && pernet->next_id != 1) {
  775. pernet->next_id = 1;
  776. goto find_next;
  777. }
  778. }
  779. if (!entry->addr.id)
  780. goto out;
  781. __set_bit(entry->addr.id, pernet->id_bitmap);
  782. if (entry->addr.id > pernet->next_id)
  783. pernet->next_id = entry->addr.id;
  784. if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
  785. addr_max = pernet->add_addr_signal_max;
  786. WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1);
  787. }
  788. if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
  789. addr_max = pernet->local_addr_max;
  790. WRITE_ONCE(pernet->local_addr_max, addr_max + 1);
  791. }
  792. pernet->addrs++;
  793. if (!entry->addr.port)
  794. list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
  795. else
  796. list_add_rcu(&entry->list, &pernet->local_addr_list);
  797. ret = entry->addr.id;
  798. out:
  799. spin_unlock_bh(&pernet->lock);
  800. /* just replaced an existing entry, free it */
  801. if (del_entry) {
  802. synchronize_rcu();
  803. __mptcp_pm_release_addr_entry(del_entry);
  804. }
  805. return ret;
  806. }
  807. static struct lock_class_key mptcp_slock_keys[2];
  808. static struct lock_class_key mptcp_keys[2];
  809. static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
  810. struct mptcp_pm_addr_entry *entry)
  811. {
  812. bool is_ipv6 = sk->sk_family == AF_INET6;
  813. int addrlen = sizeof(struct sockaddr_in);
  814. struct sockaddr_storage addr;
  815. struct socket *ssock;
  816. struct sock *newsk;
  817. int backlog = 1024;
  818. int err;
  819. err = sock_create_kern(sock_net(sk), entry->addr.family,
  820. SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
  821. if (err)
  822. return err;
  823. newsk = entry->lsk->sk;
  824. if (!newsk)
  825. return -EINVAL;
  826. /* The subflow socket lock is acquired in a nested to the msk one
  827. * in several places, even by the TCP stack, and this msk is a kernel
  828. * socket: lockdep complains. Instead of propagating the _nested
  829. * modifiers in several places, re-init the lock class for the msk
  830. * socket to an mptcp specific one.
  831. */
  832. sock_lock_init_class_and_name(newsk,
  833. is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
  834. &mptcp_slock_keys[is_ipv6],
  835. is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
  836. &mptcp_keys[is_ipv6]);
  837. lock_sock(newsk);
  838. ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
  839. release_sock(newsk);
  840. if (!ssock)
  841. return -EINVAL;
  842. mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
  843. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  844. if (entry->addr.family == AF_INET6)
  845. addrlen = sizeof(struct sockaddr_in6);
  846. #endif
  847. err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
  848. if (err) {
  849. pr_warn("kernel_bind error, err=%d", err);
  850. return err;
  851. }
  852. inet_sk_state_store(newsk, TCP_LISTEN);
  853. err = kernel_listen(ssock, backlog);
  854. if (err) {
  855. pr_warn("kernel_listen error, err=%d", err);
  856. return err;
  857. }
  858. return 0;
  859. }
  860. int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
  861. {
  862. struct mptcp_pm_addr_entry *entry;
  863. struct mptcp_addr_info skc_local;
  864. struct mptcp_addr_info msk_local;
  865. struct pm_nl_pernet *pernet;
  866. int ret = -1;
  867. if (WARN_ON_ONCE(!msk))
  868. return -1;
  869. /* The 0 ID mapping is defined by the first subflow, copied into the msk
  870. * addr
  871. */
  872. local_address((struct sock_common *)msk, &msk_local);
  873. local_address((struct sock_common *)skc, &skc_local);
  874. if (mptcp_addresses_equal(&msk_local, &skc_local, false))
  875. return 0;
  876. if (mptcp_pm_is_userspace(msk))
  877. return mptcp_userspace_pm_get_local_id(msk, &skc_local);
  878. pernet = pm_nl_get_pernet_from_msk(msk);
  879. rcu_read_lock();
  880. list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
  881. if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
  882. ret = entry->addr.id;
  883. break;
  884. }
  885. }
  886. rcu_read_unlock();
  887. if (ret >= 0)
  888. return ret;
  889. /* address not found, add to local list */
  890. entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
  891. if (!entry)
  892. return -ENOMEM;
  893. entry->addr = skc_local;
  894. entry->addr.id = 0;
  895. entry->addr.port = 0;
  896. entry->ifindex = 0;
  897. entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
  898. entry->lsk = NULL;
  899. ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
  900. if (ret < 0)
  901. kfree(entry);
  902. return ret;
  903. }
  904. #define MPTCP_PM_CMD_GRP_OFFSET 0
  905. #define MPTCP_PM_EV_GRP_OFFSET 1
  906. static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
  907. [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
  908. [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
  909. .flags = GENL_UNS_ADMIN_PERM,
  910. },
  911. };
  912. static const struct nla_policy
  913. mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = {
  914. [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
  915. [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
  916. [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
  917. [MPTCP_PM_ADDR_ATTR_ADDR6] =
  918. NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
  919. [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 },
  920. [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 },
  921. [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 },
  922. };
  923. static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
  924. [MPTCP_PM_ATTR_ADDR] =
  925. NLA_POLICY_NESTED(mptcp_pm_addr_policy),
  926. [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
  927. [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
  928. [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
  929. [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
  930. [MPTCP_PM_ATTR_ADDR_REMOTE] =
  931. NLA_POLICY_NESTED(mptcp_pm_addr_policy),
  932. };
  933. void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
  934. {
  935. struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
  936. struct sock *sk = (struct sock *)msk;
  937. unsigned int active_max_loss_cnt;
  938. struct net *net = sock_net(sk);
  939. unsigned int stale_loss_cnt;
  940. bool slow;
  941. stale_loss_cnt = mptcp_stale_loss_cnt(net);
  942. if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
  943. return;
  944. /* look for another available subflow not in loss state */
  945. active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
  946. mptcp_for_each_subflow(msk, iter) {
  947. if (iter != subflow && mptcp_subflow_active(iter) &&
  948. iter->stale_count < active_max_loss_cnt) {
  949. /* we have some alternatives, try to mark this subflow as idle ...*/
  950. slow = lock_sock_fast(ssk);
  951. if (!tcp_rtx_and_write_queues_empty(ssk)) {
  952. subflow->stale = 1;
  953. __mptcp_retransmit_pending_data(sk);
  954. MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
  955. }
  956. unlock_sock_fast(ssk, slow);
  957. /* always try to push the pending data regardless of re-injections:
  958. * we can possibly use backup subflows now, and subflow selection
  959. * is cheap under the msk socket lock
  960. */
  961. __mptcp_push_pending(sk, 0);
  962. return;
  963. }
  964. }
  965. }
  966. static int mptcp_pm_family_to_addr(int family)
  967. {
  968. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  969. if (family == AF_INET6)
  970. return MPTCP_PM_ADDR_ATTR_ADDR6;
  971. #endif
  972. return MPTCP_PM_ADDR_ATTR_ADDR4;
  973. }
  974. static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
  975. const struct nlattr *attr,
  976. struct genl_info *info,
  977. struct mptcp_addr_info *addr,
  978. bool require_family)
  979. {
  980. int err, addr_addr;
  981. if (!attr) {
  982. GENL_SET_ERR_MSG(info, "missing address info");
  983. return -EINVAL;
  984. }
  985. /* no validation needed - was already done via nested policy */
  986. err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
  987. mptcp_pm_addr_policy, info->extack);
  988. if (err)
  989. return err;
  990. if (tb[MPTCP_PM_ADDR_ATTR_ID])
  991. addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
  992. if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
  993. if (!require_family)
  994. return err;
  995. NL_SET_ERR_MSG_ATTR(info->extack, attr,
  996. "missing family");
  997. return -EINVAL;
  998. }
  999. addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
  1000. if (addr->family != AF_INET
  1001. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  1002. && addr->family != AF_INET6
  1003. #endif
  1004. ) {
  1005. NL_SET_ERR_MSG_ATTR(info->extack, attr,
  1006. "unknown address family");
  1007. return -EINVAL;
  1008. }
  1009. addr_addr = mptcp_pm_family_to_addr(addr->family);
  1010. if (!tb[addr_addr]) {
  1011. NL_SET_ERR_MSG_ATTR(info->extack, attr,
  1012. "missing address data");
  1013. return -EINVAL;
  1014. }
  1015. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  1016. if (addr->family == AF_INET6)
  1017. addr->addr6 = nla_get_in6_addr(tb[addr_addr]);
  1018. else
  1019. #endif
  1020. addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]);
  1021. if (tb[MPTCP_PM_ADDR_ATTR_PORT])
  1022. addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
  1023. return err;
  1024. }
  1025. int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
  1026. struct mptcp_addr_info *addr)
  1027. {
  1028. struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
  1029. memset(addr, 0, sizeof(*addr));
  1030. return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true);
  1031. }
  1032. int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
  1033. bool require_family,
  1034. struct mptcp_pm_addr_entry *entry)
  1035. {
  1036. struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
  1037. int err;
  1038. memset(entry, 0, sizeof(*entry));
  1039. err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family);
  1040. if (err)
  1041. return err;
  1042. if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
  1043. u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
  1044. entry->ifindex = val;
  1045. }
  1046. if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
  1047. entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
  1048. if (tb[MPTCP_PM_ADDR_ATTR_PORT])
  1049. entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
  1050. return 0;
  1051. }
  1052. static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
  1053. {
  1054. return pm_nl_get_pernet(genl_info_net(info));
  1055. }
  1056. static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
  1057. {
  1058. struct mptcp_sock *msk;
  1059. long s_slot = 0, s_num = 0;
  1060. while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
  1061. struct sock *sk = (struct sock *)msk;
  1062. if (!READ_ONCE(msk->fully_established) ||
  1063. mptcp_pm_is_userspace(msk))
  1064. goto next;
  1065. lock_sock(sk);
  1066. spin_lock_bh(&msk->pm.lock);
  1067. mptcp_pm_create_subflow_or_signal_addr(msk);
  1068. spin_unlock_bh(&msk->pm.lock);
  1069. release_sock(sk);
  1070. next:
  1071. sock_put(sk);
  1072. cond_resched();
  1073. }
  1074. return 0;
  1075. }
  1076. static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
  1077. {
  1078. struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
  1079. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1080. struct mptcp_pm_addr_entry addr, *entry;
  1081. int ret;
  1082. ret = mptcp_pm_parse_entry(attr, info, true, &addr);
  1083. if (ret < 0)
  1084. return ret;
  1085. if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
  1086. GENL_SET_ERR_MSG(info, "flags must have signal when using port");
  1087. return -EINVAL;
  1088. }
  1089. if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
  1090. addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
  1091. GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh");
  1092. return -EINVAL;
  1093. }
  1094. if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
  1095. GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
  1096. return -EINVAL;
  1097. }
  1098. entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT);
  1099. if (!entry) {
  1100. GENL_SET_ERR_MSG(info, "can't allocate addr");
  1101. return -ENOMEM;
  1102. }
  1103. *entry = addr;
  1104. if (entry->addr.port) {
  1105. ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
  1106. if (ret) {
  1107. GENL_SET_ERR_MSG(info, "create listen socket error");
  1108. goto out_free;
  1109. }
  1110. }
  1111. ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
  1112. if (ret < 0) {
  1113. GENL_SET_ERR_MSG(info, "too many addresses or duplicate one");
  1114. goto out_free;
  1115. }
  1116. mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk));
  1117. return 0;
  1118. out_free:
  1119. __mptcp_pm_release_addr_entry(entry);
  1120. return ret;
  1121. }
  1122. int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
  1123. u8 *flags, int *ifindex)
  1124. {
  1125. struct mptcp_pm_addr_entry *entry;
  1126. struct sock *sk = (struct sock *)msk;
  1127. struct net *net = sock_net(sk);
  1128. *flags = 0;
  1129. *ifindex = 0;
  1130. if (id) {
  1131. if (mptcp_pm_is_userspace(msk))
  1132. return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
  1133. id,
  1134. flags,
  1135. ifindex);
  1136. rcu_read_lock();
  1137. entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
  1138. if (entry) {
  1139. *flags = entry->flags;
  1140. *ifindex = entry->ifindex;
  1141. }
  1142. rcu_read_unlock();
  1143. }
  1144. return 0;
  1145. }
  1146. static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
  1147. const struct mptcp_addr_info *addr)
  1148. {
  1149. struct mptcp_pm_add_entry *entry;
  1150. entry = mptcp_pm_del_add_timer(msk, addr, false);
  1151. if (entry) {
  1152. list_del(&entry->list);
  1153. kfree(entry);
  1154. return true;
  1155. }
  1156. return false;
  1157. }
  1158. static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
  1159. const struct mptcp_addr_info *addr,
  1160. bool force)
  1161. {
  1162. struct mptcp_rm_list list = { .nr = 0 };
  1163. bool ret;
  1164. list.ids[list.nr++] = addr->id;
  1165. ret = remove_anno_list_by_saddr(msk, addr);
  1166. if (ret || force) {
  1167. spin_lock_bh(&msk->pm.lock);
  1168. mptcp_pm_remove_addr(msk, &list);
  1169. spin_unlock_bh(&msk->pm.lock);
  1170. }
  1171. return ret;
  1172. }
  1173. static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
  1174. const struct mptcp_pm_addr_entry *entry)
  1175. {
  1176. const struct mptcp_addr_info *addr = &entry->addr;
  1177. struct mptcp_rm_list list = { .nr = 0 };
  1178. long s_slot = 0, s_num = 0;
  1179. struct mptcp_sock *msk;
  1180. pr_debug("remove_id=%d", addr->id);
  1181. list.ids[list.nr++] = addr->id;
  1182. while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
  1183. struct sock *sk = (struct sock *)msk;
  1184. bool remove_subflow;
  1185. if (mptcp_pm_is_userspace(msk))
  1186. goto next;
  1187. if (list_empty(&msk->conn_list)) {
  1188. mptcp_pm_remove_anno_addr(msk, addr, false);
  1189. goto next;
  1190. }
  1191. lock_sock(sk);
  1192. remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
  1193. mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
  1194. !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
  1195. if (remove_subflow)
  1196. mptcp_pm_remove_subflow(msk, &list);
  1197. release_sock(sk);
  1198. next:
  1199. sock_put(sk);
  1200. cond_resched();
  1201. }
  1202. return 0;
  1203. }
  1204. static int mptcp_nl_remove_id_zero_address(struct net *net,
  1205. struct mptcp_addr_info *addr)
  1206. {
  1207. struct mptcp_rm_list list = { .nr = 0 };
  1208. long s_slot = 0, s_num = 0;
  1209. struct mptcp_sock *msk;
  1210. list.ids[list.nr++] = 0;
  1211. while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
  1212. struct sock *sk = (struct sock *)msk;
  1213. struct mptcp_addr_info msk_local;
  1214. if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
  1215. goto next;
  1216. local_address((struct sock_common *)msk, &msk_local);
  1217. if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
  1218. goto next;
  1219. lock_sock(sk);
  1220. spin_lock_bh(&msk->pm.lock);
  1221. mptcp_pm_remove_addr(msk, &list);
  1222. mptcp_pm_nl_rm_subflow_received(msk, &list);
  1223. spin_unlock_bh(&msk->pm.lock);
  1224. release_sock(sk);
  1225. next:
  1226. sock_put(sk);
  1227. cond_resched();
  1228. }
  1229. return 0;
  1230. }
  1231. static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
  1232. {
  1233. struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
  1234. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1235. struct mptcp_pm_addr_entry addr, *entry;
  1236. unsigned int addr_max;
  1237. int ret;
  1238. ret = mptcp_pm_parse_entry(attr, info, false, &addr);
  1239. if (ret < 0)
  1240. return ret;
  1241. /* the zero id address is special: the first address used by the msk
  1242. * always gets such an id, so different subflows can have different zero
  1243. * id addresses. Additionally zero id is not accounted for in id_bitmap.
  1244. * Let's use an 'mptcp_rm_list' instead of the common remove code.
  1245. */
  1246. if (addr.addr.id == 0)
  1247. return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
  1248. spin_lock_bh(&pernet->lock);
  1249. entry = __lookup_addr_by_id(pernet, addr.addr.id);
  1250. if (!entry) {
  1251. GENL_SET_ERR_MSG(info, "address not found");
  1252. spin_unlock_bh(&pernet->lock);
  1253. return -EINVAL;
  1254. }
  1255. if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
  1256. addr_max = pernet->add_addr_signal_max;
  1257. WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1);
  1258. }
  1259. if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
  1260. addr_max = pernet->local_addr_max;
  1261. WRITE_ONCE(pernet->local_addr_max, addr_max - 1);
  1262. }
  1263. pernet->addrs--;
  1264. list_del_rcu(&entry->list);
  1265. __clear_bit(entry->addr.id, pernet->id_bitmap);
  1266. spin_unlock_bh(&pernet->lock);
  1267. mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
  1268. synchronize_rcu();
  1269. __mptcp_pm_release_addr_entry(entry);
  1270. return ret;
  1271. }
  1272. void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
  1273. {
  1274. struct mptcp_rm_list alist = { .nr = 0 };
  1275. struct mptcp_pm_addr_entry *entry;
  1276. list_for_each_entry(entry, rm_list, list) {
  1277. if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
  1278. lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
  1279. alist.nr < MPTCP_RM_IDS_MAX)
  1280. alist.ids[alist.nr++] = entry->addr.id;
  1281. }
  1282. if (alist.nr) {
  1283. spin_lock_bh(&msk->pm.lock);
  1284. mptcp_pm_remove_addr(msk, &alist);
  1285. spin_unlock_bh(&msk->pm.lock);
  1286. }
  1287. }
  1288. void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
  1289. struct list_head *rm_list)
  1290. {
  1291. struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
  1292. struct mptcp_pm_addr_entry *entry;
  1293. list_for_each_entry(entry, rm_list, list) {
  1294. if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
  1295. slist.nr < MPTCP_RM_IDS_MAX)
  1296. slist.ids[slist.nr++] = entry->addr.id;
  1297. if (remove_anno_list_by_saddr(msk, &entry->addr) &&
  1298. alist.nr < MPTCP_RM_IDS_MAX)
  1299. alist.ids[alist.nr++] = entry->addr.id;
  1300. }
  1301. if (alist.nr) {
  1302. spin_lock_bh(&msk->pm.lock);
  1303. mptcp_pm_remove_addr(msk, &alist);
  1304. spin_unlock_bh(&msk->pm.lock);
  1305. }
  1306. if (slist.nr)
  1307. mptcp_pm_remove_subflow(msk, &slist);
  1308. }
  1309. static void mptcp_nl_remove_addrs_list(struct net *net,
  1310. struct list_head *rm_list)
  1311. {
  1312. long s_slot = 0, s_num = 0;
  1313. struct mptcp_sock *msk;
  1314. if (list_empty(rm_list))
  1315. return;
  1316. while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
  1317. struct sock *sk = (struct sock *)msk;
  1318. if (!mptcp_pm_is_userspace(msk)) {
  1319. lock_sock(sk);
  1320. mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
  1321. release_sock(sk);
  1322. }
  1323. sock_put(sk);
  1324. cond_resched();
  1325. }
  1326. }
  1327. /* caller must ensure the RCU grace period is already elapsed */
  1328. static void __flush_addrs(struct list_head *list)
  1329. {
  1330. while (!list_empty(list)) {
  1331. struct mptcp_pm_addr_entry *cur;
  1332. cur = list_entry(list->next,
  1333. struct mptcp_pm_addr_entry, list);
  1334. list_del_rcu(&cur->list);
  1335. __mptcp_pm_release_addr_entry(cur);
  1336. }
  1337. }
  1338. static void __reset_counters(struct pm_nl_pernet *pernet)
  1339. {
  1340. WRITE_ONCE(pernet->add_addr_signal_max, 0);
  1341. WRITE_ONCE(pernet->add_addr_accept_max, 0);
  1342. WRITE_ONCE(pernet->local_addr_max, 0);
  1343. pernet->addrs = 0;
  1344. }
  1345. static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
  1346. {
  1347. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1348. LIST_HEAD(free_list);
  1349. spin_lock_bh(&pernet->lock);
  1350. list_splice_init(&pernet->local_addr_list, &free_list);
  1351. __reset_counters(pernet);
  1352. pernet->next_id = 1;
  1353. bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
  1354. spin_unlock_bh(&pernet->lock);
  1355. mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
  1356. synchronize_rcu();
  1357. __flush_addrs(&free_list);
  1358. return 0;
  1359. }
  1360. static int mptcp_nl_fill_addr(struct sk_buff *skb,
  1361. struct mptcp_pm_addr_entry *entry)
  1362. {
  1363. struct mptcp_addr_info *addr = &entry->addr;
  1364. struct nlattr *attr;
  1365. attr = nla_nest_start(skb, MPTCP_PM_ATTR_ADDR);
  1366. if (!attr)
  1367. return -EMSGSIZE;
  1368. if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family))
  1369. goto nla_put_failure;
  1370. if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port)))
  1371. goto nla_put_failure;
  1372. if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
  1373. goto nla_put_failure;
  1374. if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags))
  1375. goto nla_put_failure;
  1376. if (entry->ifindex &&
  1377. nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
  1378. goto nla_put_failure;
  1379. if (addr->family == AF_INET &&
  1380. nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4,
  1381. addr->addr.s_addr))
  1382. goto nla_put_failure;
  1383. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  1384. else if (addr->family == AF_INET6 &&
  1385. nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6))
  1386. goto nla_put_failure;
  1387. #endif
  1388. nla_nest_end(skb, attr);
  1389. return 0;
  1390. nla_put_failure:
  1391. nla_nest_cancel(skb, attr);
  1392. return -EMSGSIZE;
  1393. }
  1394. static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
  1395. {
  1396. struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
  1397. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1398. struct mptcp_pm_addr_entry addr, *entry;
  1399. struct sk_buff *msg;
  1400. void *reply;
  1401. int ret;
  1402. ret = mptcp_pm_parse_entry(attr, info, false, &addr);
  1403. if (ret < 0)
  1404. return ret;
  1405. msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  1406. if (!msg)
  1407. return -ENOMEM;
  1408. reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
  1409. info->genlhdr->cmd);
  1410. if (!reply) {
  1411. GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
  1412. ret = -EMSGSIZE;
  1413. goto fail;
  1414. }
  1415. spin_lock_bh(&pernet->lock);
  1416. entry = __lookup_addr_by_id(pernet, addr.addr.id);
  1417. if (!entry) {
  1418. GENL_SET_ERR_MSG(info, "address not found");
  1419. ret = -EINVAL;
  1420. goto unlock_fail;
  1421. }
  1422. ret = mptcp_nl_fill_addr(msg, entry);
  1423. if (ret)
  1424. goto unlock_fail;
  1425. genlmsg_end(msg, reply);
  1426. ret = genlmsg_reply(msg, info);
  1427. spin_unlock_bh(&pernet->lock);
  1428. return ret;
  1429. unlock_fail:
  1430. spin_unlock_bh(&pernet->lock);
  1431. fail:
  1432. nlmsg_free(msg);
  1433. return ret;
  1434. }
  1435. static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
  1436. struct netlink_callback *cb)
  1437. {
  1438. struct net *net = sock_net(msg->sk);
  1439. struct mptcp_pm_addr_entry *entry;
  1440. struct pm_nl_pernet *pernet;
  1441. int id = cb->args[0];
  1442. void *hdr;
  1443. int i;
  1444. pernet = pm_nl_get_pernet(net);
  1445. spin_lock_bh(&pernet->lock);
  1446. for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
  1447. if (test_bit(i, pernet->id_bitmap)) {
  1448. entry = __lookup_addr_by_id(pernet, i);
  1449. if (!entry)
  1450. break;
  1451. if (entry->addr.id <= id)
  1452. continue;
  1453. hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
  1454. cb->nlh->nlmsg_seq, &mptcp_genl_family,
  1455. NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
  1456. if (!hdr)
  1457. break;
  1458. if (mptcp_nl_fill_addr(msg, entry) < 0) {
  1459. genlmsg_cancel(msg, hdr);
  1460. break;
  1461. }
  1462. id = entry->addr.id;
  1463. genlmsg_end(msg, hdr);
  1464. }
  1465. }
  1466. spin_unlock_bh(&pernet->lock);
  1467. cb->args[0] = id;
  1468. return msg->len;
  1469. }
  1470. static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
  1471. {
  1472. struct nlattr *attr = info->attrs[id];
  1473. if (!attr)
  1474. return 0;
  1475. *limit = nla_get_u32(attr);
  1476. if (*limit > MPTCP_PM_ADDR_MAX) {
  1477. GENL_SET_ERR_MSG(info, "limit greater than maximum");
  1478. return -EINVAL;
  1479. }
  1480. return 0;
  1481. }
  1482. static int
  1483. mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info)
  1484. {
  1485. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1486. unsigned int rcv_addrs, subflows;
  1487. int ret;
  1488. spin_lock_bh(&pernet->lock);
  1489. rcv_addrs = pernet->add_addr_accept_max;
  1490. ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs);
  1491. if (ret)
  1492. goto unlock;
  1493. subflows = pernet->subflows_max;
  1494. ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows);
  1495. if (ret)
  1496. goto unlock;
  1497. WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs);
  1498. WRITE_ONCE(pernet->subflows_max, subflows);
  1499. unlock:
  1500. spin_unlock_bh(&pernet->lock);
  1501. return ret;
  1502. }
  1503. static int
  1504. mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info)
  1505. {
  1506. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1507. struct sk_buff *msg;
  1508. void *reply;
  1509. msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  1510. if (!msg)
  1511. return -ENOMEM;
  1512. reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
  1513. MPTCP_PM_CMD_GET_LIMITS);
  1514. if (!reply)
  1515. goto fail;
  1516. if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS,
  1517. READ_ONCE(pernet->add_addr_accept_max)))
  1518. goto fail;
  1519. if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS,
  1520. READ_ONCE(pernet->subflows_max)))
  1521. goto fail;
  1522. genlmsg_end(msg, reply);
  1523. return genlmsg_reply(msg, info);
  1524. fail:
  1525. GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
  1526. nlmsg_free(msg);
  1527. return -EMSGSIZE;
  1528. }
  1529. static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
  1530. struct mptcp_addr_info *addr)
  1531. {
  1532. struct mptcp_rm_list list = { .nr = 0 };
  1533. list.ids[list.nr++] = addr->id;
  1534. spin_lock_bh(&msk->pm.lock);
  1535. mptcp_pm_nl_rm_subflow_received(msk, &list);
  1536. mptcp_pm_create_subflow_or_signal_addr(msk);
  1537. spin_unlock_bh(&msk->pm.lock);
  1538. }
  1539. static int mptcp_nl_set_flags(struct net *net,
  1540. struct mptcp_addr_info *addr,
  1541. u8 bkup, u8 changed)
  1542. {
  1543. long s_slot = 0, s_num = 0;
  1544. struct mptcp_sock *msk;
  1545. int ret = -EINVAL;
  1546. while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
  1547. struct sock *sk = (struct sock *)msk;
  1548. if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
  1549. goto next;
  1550. lock_sock(sk);
  1551. if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
  1552. ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup);
  1553. if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
  1554. mptcp_pm_nl_fullmesh(msk, addr);
  1555. release_sock(sk);
  1556. next:
  1557. sock_put(sk);
  1558. cond_resched();
  1559. }
  1560. return ret;
  1561. }
  1562. static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
  1563. {
  1564. struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
  1565. struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
  1566. struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
  1567. struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
  1568. struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
  1569. struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
  1570. u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
  1571. MPTCP_PM_ADDR_FLAG_FULLMESH;
  1572. struct net *net = sock_net(skb->sk);
  1573. u8 bkup = 0, lookup_by_id = 0;
  1574. int ret;
  1575. ret = mptcp_pm_parse_entry(attr, info, false, &addr);
  1576. if (ret < 0)
  1577. return ret;
  1578. if (attr_rem) {
  1579. ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
  1580. if (ret < 0)
  1581. return ret;
  1582. }
  1583. if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
  1584. bkup = 1;
  1585. if (addr.addr.family == AF_UNSPEC) {
  1586. lookup_by_id = 1;
  1587. if (!addr.addr.id)
  1588. return -EOPNOTSUPP;
  1589. }
  1590. if (token)
  1591. return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
  1592. token, &addr, &remote, bkup);
  1593. spin_lock_bh(&pernet->lock);
  1594. entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
  1595. if (!entry) {
  1596. spin_unlock_bh(&pernet->lock);
  1597. return -EINVAL;
  1598. }
  1599. if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
  1600. (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
  1601. spin_unlock_bh(&pernet->lock);
  1602. return -EINVAL;
  1603. }
  1604. changed = (addr.flags ^ entry->flags) & mask;
  1605. entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
  1606. addr = *entry;
  1607. spin_unlock_bh(&pernet->lock);
  1608. mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
  1609. return 0;
  1610. }
  1611. static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
  1612. {
  1613. genlmsg_multicast_netns(&mptcp_genl_family, net,
  1614. nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
  1615. }
  1616. bool mptcp_userspace_pm_active(const struct mptcp_sock *msk)
  1617. {
  1618. return genl_has_listeners(&mptcp_genl_family,
  1619. sock_net((const struct sock *)msk),
  1620. MPTCP_PM_EV_GRP_OFFSET);
  1621. }
  1622. static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
  1623. {
  1624. const struct inet_sock *issk = inet_sk(ssk);
  1625. const struct mptcp_subflow_context *sf;
  1626. if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
  1627. return -EMSGSIZE;
  1628. switch (ssk->sk_family) {
  1629. case AF_INET:
  1630. if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
  1631. return -EMSGSIZE;
  1632. if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr))
  1633. return -EMSGSIZE;
  1634. break;
  1635. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  1636. case AF_INET6: {
  1637. const struct ipv6_pinfo *np = inet6_sk(ssk);
  1638. if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
  1639. return -EMSGSIZE;
  1640. if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr))
  1641. return -EMSGSIZE;
  1642. break;
  1643. }
  1644. #endif
  1645. default:
  1646. WARN_ON_ONCE(1);
  1647. return -EMSGSIZE;
  1648. }
  1649. if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
  1650. return -EMSGSIZE;
  1651. if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport))
  1652. return -EMSGSIZE;
  1653. sf = mptcp_subflow_ctx(ssk);
  1654. if (WARN_ON_ONCE(!sf))
  1655. return -EINVAL;
  1656. if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
  1657. return -EMSGSIZE;
  1658. if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
  1659. return -EMSGSIZE;
  1660. return 0;
  1661. }
  1662. static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
  1663. const struct mptcp_sock *msk,
  1664. const struct sock *ssk)
  1665. {
  1666. const struct sock *sk = (const struct sock *)msk;
  1667. const struct mptcp_subflow_context *sf;
  1668. u8 sk_err;
  1669. if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
  1670. return -EMSGSIZE;
  1671. if (mptcp_event_add_subflow(skb, ssk))
  1672. return -EMSGSIZE;
  1673. sf = mptcp_subflow_ctx(ssk);
  1674. if (WARN_ON_ONCE(!sf))
  1675. return -EINVAL;
  1676. if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup))
  1677. return -EMSGSIZE;
  1678. if (ssk->sk_bound_dev_if &&
  1679. nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
  1680. return -EMSGSIZE;
  1681. sk_err = READ_ONCE(ssk->sk_err);
  1682. if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
  1683. nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
  1684. return -EMSGSIZE;
  1685. return 0;
  1686. }
  1687. static int mptcp_event_sub_established(struct sk_buff *skb,
  1688. const struct mptcp_sock *msk,
  1689. const struct sock *ssk)
  1690. {
  1691. return mptcp_event_put_token_and_ssk(skb, msk, ssk);
  1692. }
  1693. static int mptcp_event_sub_closed(struct sk_buff *skb,
  1694. const struct mptcp_sock *msk,
  1695. const struct sock *ssk)
  1696. {
  1697. const struct mptcp_subflow_context *sf;
  1698. if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
  1699. return -EMSGSIZE;
  1700. sf = mptcp_subflow_ctx(ssk);
  1701. if (!sf->reset_seen)
  1702. return 0;
  1703. if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason))
  1704. return -EMSGSIZE;
  1705. if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient))
  1706. return -EMSGSIZE;
  1707. return 0;
  1708. }
  1709. static int mptcp_event_created(struct sk_buff *skb,
  1710. const struct mptcp_sock *msk,
  1711. const struct sock *ssk)
  1712. {
  1713. int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
  1714. if (err)
  1715. return err;
  1716. if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
  1717. return -EMSGSIZE;
  1718. return mptcp_event_add_subflow(skb, ssk);
  1719. }
  1720. void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
  1721. {
  1722. struct net *net = sock_net((const struct sock *)msk);
  1723. struct nlmsghdr *nlh;
  1724. struct sk_buff *skb;
  1725. if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
  1726. return;
  1727. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  1728. if (!skb)
  1729. return;
  1730. nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED);
  1731. if (!nlh)
  1732. goto nla_put_failure;
  1733. if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
  1734. goto nla_put_failure;
  1735. if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
  1736. goto nla_put_failure;
  1737. genlmsg_end(skb, nlh);
  1738. mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
  1739. return;
  1740. nla_put_failure:
  1741. kfree_skb(skb);
  1742. }
  1743. void mptcp_event_addr_announced(const struct sock *ssk,
  1744. const struct mptcp_addr_info *info)
  1745. {
  1746. struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
  1747. struct mptcp_sock *msk = mptcp_sk(subflow->conn);
  1748. struct net *net = sock_net(ssk);
  1749. struct nlmsghdr *nlh;
  1750. struct sk_buff *skb;
  1751. if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
  1752. return;
  1753. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  1754. if (!skb)
  1755. return;
  1756. nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0,
  1757. MPTCP_EVENT_ANNOUNCED);
  1758. if (!nlh)
  1759. goto nla_put_failure;
  1760. if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
  1761. goto nla_put_failure;
  1762. if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
  1763. goto nla_put_failure;
  1764. if (nla_put_be16(skb, MPTCP_ATTR_DPORT,
  1765. info->port == 0 ?
  1766. inet_sk(ssk)->inet_dport :
  1767. info->port))
  1768. goto nla_put_failure;
  1769. switch (info->family) {
  1770. case AF_INET:
  1771. if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr))
  1772. goto nla_put_failure;
  1773. break;
  1774. #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  1775. case AF_INET6:
  1776. if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6))
  1777. goto nla_put_failure;
  1778. break;
  1779. #endif
  1780. default:
  1781. WARN_ON_ONCE(1);
  1782. goto nla_put_failure;
  1783. }
  1784. genlmsg_end(skb, nlh);
  1785. mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
  1786. return;
  1787. nla_put_failure:
  1788. kfree_skb(skb);
  1789. }
  1790. void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
  1791. const struct sock *ssk, gfp_t gfp)
  1792. {
  1793. struct net *net = sock_net((const struct sock *)msk);
  1794. struct nlmsghdr *nlh;
  1795. struct sk_buff *skb;
  1796. if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
  1797. return;
  1798. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
  1799. if (!skb)
  1800. return;
  1801. nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type);
  1802. if (!nlh)
  1803. goto nla_put_failure;
  1804. switch (type) {
  1805. case MPTCP_EVENT_UNSPEC:
  1806. WARN_ON_ONCE(1);
  1807. break;
  1808. case MPTCP_EVENT_CREATED:
  1809. case MPTCP_EVENT_ESTABLISHED:
  1810. if (mptcp_event_created(skb, msk, ssk) < 0)
  1811. goto nla_put_failure;
  1812. break;
  1813. case MPTCP_EVENT_CLOSED:
  1814. if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
  1815. goto nla_put_failure;
  1816. break;
  1817. case MPTCP_EVENT_ANNOUNCED:
  1818. case MPTCP_EVENT_REMOVED:
  1819. /* call mptcp_event_addr_announced()/removed instead */
  1820. WARN_ON_ONCE(1);
  1821. break;
  1822. case MPTCP_EVENT_SUB_ESTABLISHED:
  1823. case MPTCP_EVENT_SUB_PRIORITY:
  1824. if (mptcp_event_sub_established(skb, msk, ssk) < 0)
  1825. goto nla_put_failure;
  1826. break;
  1827. case MPTCP_EVENT_SUB_CLOSED:
  1828. if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
  1829. goto nla_put_failure;
  1830. break;
  1831. }
  1832. genlmsg_end(skb, nlh);
  1833. mptcp_nl_mcast_send(net, skb, gfp);
  1834. return;
  1835. nla_put_failure:
  1836. kfree_skb(skb);
  1837. }
  1838. static const struct genl_small_ops mptcp_pm_ops[] = {
  1839. {
  1840. .cmd = MPTCP_PM_CMD_ADD_ADDR,
  1841. .doit = mptcp_nl_cmd_add_addr,
  1842. .flags = GENL_UNS_ADMIN_PERM,
  1843. },
  1844. {
  1845. .cmd = MPTCP_PM_CMD_DEL_ADDR,
  1846. .doit = mptcp_nl_cmd_del_addr,
  1847. .flags = GENL_UNS_ADMIN_PERM,
  1848. },
  1849. {
  1850. .cmd = MPTCP_PM_CMD_FLUSH_ADDRS,
  1851. .doit = mptcp_nl_cmd_flush_addrs,
  1852. .flags = GENL_UNS_ADMIN_PERM,
  1853. },
  1854. {
  1855. .cmd = MPTCP_PM_CMD_GET_ADDR,
  1856. .doit = mptcp_nl_cmd_get_addr,
  1857. .dumpit = mptcp_nl_cmd_dump_addrs,
  1858. },
  1859. {
  1860. .cmd = MPTCP_PM_CMD_SET_LIMITS,
  1861. .doit = mptcp_nl_cmd_set_limits,
  1862. .flags = GENL_UNS_ADMIN_PERM,
  1863. },
  1864. {
  1865. .cmd = MPTCP_PM_CMD_GET_LIMITS,
  1866. .doit = mptcp_nl_cmd_get_limits,
  1867. },
  1868. {
  1869. .cmd = MPTCP_PM_CMD_SET_FLAGS,
  1870. .doit = mptcp_nl_cmd_set_flags,
  1871. .flags = GENL_UNS_ADMIN_PERM,
  1872. },
  1873. {
  1874. .cmd = MPTCP_PM_CMD_ANNOUNCE,
  1875. .doit = mptcp_nl_cmd_announce,
  1876. .flags = GENL_UNS_ADMIN_PERM,
  1877. },
  1878. {
  1879. .cmd = MPTCP_PM_CMD_REMOVE,
  1880. .doit = mptcp_nl_cmd_remove,
  1881. .flags = GENL_UNS_ADMIN_PERM,
  1882. },
  1883. {
  1884. .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
  1885. .doit = mptcp_nl_cmd_sf_create,
  1886. .flags = GENL_UNS_ADMIN_PERM,
  1887. },
  1888. {
  1889. .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
  1890. .doit = mptcp_nl_cmd_sf_destroy,
  1891. .flags = GENL_UNS_ADMIN_PERM,
  1892. },
  1893. };
  1894. static struct genl_family mptcp_genl_family __ro_after_init = {
  1895. .name = MPTCP_PM_NAME,
  1896. .version = MPTCP_PM_VER,
  1897. .maxattr = MPTCP_PM_ATTR_MAX,
  1898. .policy = mptcp_pm_policy,
  1899. .netnsok = true,
  1900. .module = THIS_MODULE,
  1901. .small_ops = mptcp_pm_ops,
  1902. .n_small_ops = ARRAY_SIZE(mptcp_pm_ops),
  1903. .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1,
  1904. .mcgrps = mptcp_pm_mcgrps,
  1905. .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps),
  1906. };
  1907. static int __net_init pm_nl_init_net(struct net *net)
  1908. {
  1909. struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
  1910. INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
  1911. /* Cit. 2 subflows ought to be enough for anybody. */
  1912. pernet->subflows_max = 2;
  1913. pernet->next_id = 1;
  1914. pernet->stale_loss_cnt = 4;
  1915. spin_lock_init(&pernet->lock);
  1916. /* No need to initialize other pernet fields, the struct is zeroed at
  1917. * allocation time.
  1918. */
  1919. return 0;
  1920. }
  1921. static void __net_exit pm_nl_exit_net(struct list_head *net_list)
  1922. {
  1923. struct net *net;
  1924. list_for_each_entry(net, net_list, exit_list) {
  1925. struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
  1926. /* net is removed from namespace list, can't race with
  1927. * other modifiers, also netns core already waited for a
  1928. * RCU grace period.
  1929. */
  1930. __flush_addrs(&pernet->local_addr_list);
  1931. }
  1932. }
  1933. static struct pernet_operations mptcp_pm_pernet_ops = {
  1934. .init = pm_nl_init_net,
  1935. .exit_batch = pm_nl_exit_net,
  1936. .id = &pm_nl_pernet_id,
  1937. .size = sizeof(struct pm_nl_pernet),
  1938. };
  1939. void __init mptcp_pm_nl_init(void)
  1940. {
  1941. if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
  1942. panic("Failed to register MPTCP PM pernet subsystem.\n");
  1943. if (genl_register_family(&mptcp_genl_family))
  1944. panic("Failed to register MPTCP PM netlink family\n");
  1945. }