net_namespace.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/bpf.h>
  3. #include <linux/bpf-netns.h>
  4. #include <linux/filter.h>
  5. #include <net/net_namespace.h>
  6. /*
  7. * Functions to manage BPF programs attached to netns
  8. */
  9. struct bpf_netns_link {
  10. struct bpf_link link;
  11. enum bpf_attach_type type;
  12. enum netns_bpf_attach_type netns_type;
  13. /* We don't hold a ref to net in order to auto-detach the link
  14. * when netns is going away. Instead we rely on pernet
  15. * pre_exit callback to clear this pointer. Must be accessed
  16. * with netns_bpf_mutex held.
  17. */
  18. struct net *net;
  19. struct list_head node; /* node in list of links attached to net */
  20. };
  21. /* Protects updates to netns_bpf */
  22. DEFINE_MUTEX(netns_bpf_mutex);
  23. static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
  24. {
  25. switch (type) {
  26. #ifdef CONFIG_INET
  27. case NETNS_BPF_SK_LOOKUP:
  28. static_branch_dec(&bpf_sk_lookup_enabled);
  29. break;
  30. #endif
  31. default:
  32. break;
  33. }
  34. }
  35. static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
  36. {
  37. switch (type) {
  38. #ifdef CONFIG_INET
  39. case NETNS_BPF_SK_LOOKUP:
  40. static_branch_inc(&bpf_sk_lookup_enabled);
  41. break;
  42. #endif
  43. default:
  44. break;
  45. }
  46. }
  47. /* Must be called with netns_bpf_mutex held. */
  48. static void netns_bpf_run_array_detach(struct net *net,
  49. enum netns_bpf_attach_type type)
  50. {
  51. struct bpf_prog_array *run_array;
  52. run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
  53. lockdep_is_held(&netns_bpf_mutex));
  54. bpf_prog_array_free(run_array);
  55. }
  56. static int link_index(struct net *net, enum netns_bpf_attach_type type,
  57. struct bpf_netns_link *link)
  58. {
  59. struct bpf_netns_link *pos;
  60. int i = 0;
  61. list_for_each_entry(pos, &net->bpf.links[type], node) {
  62. if (pos == link)
  63. return i;
  64. i++;
  65. }
  66. return -ENOENT;
  67. }
  68. static int link_count(struct net *net, enum netns_bpf_attach_type type)
  69. {
  70. struct list_head *pos;
  71. int i = 0;
  72. list_for_each(pos, &net->bpf.links[type])
  73. i++;
  74. return i;
  75. }
  76. static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
  77. struct bpf_prog_array *prog_array)
  78. {
  79. struct bpf_netns_link *pos;
  80. unsigned int i = 0;
  81. list_for_each_entry(pos, &net->bpf.links[type], node) {
  82. prog_array->items[i].prog = pos->link.prog;
  83. i++;
  84. }
  85. }
  86. static void bpf_netns_link_release(struct bpf_link *link)
  87. {
  88. struct bpf_netns_link *net_link =
  89. container_of(link, struct bpf_netns_link, link);
  90. enum netns_bpf_attach_type type = net_link->netns_type;
  91. struct bpf_prog_array *old_array, *new_array;
  92. struct net *net;
  93. int cnt, idx;
  94. mutex_lock(&netns_bpf_mutex);
  95. /* We can race with cleanup_net, but if we see a non-NULL
  96. * struct net pointer, pre_exit has not run yet and wait for
  97. * netns_bpf_mutex.
  98. */
  99. net = net_link->net;
  100. if (!net)
  101. goto out_unlock;
  102. /* Mark attach point as unused */
  103. netns_bpf_attach_type_unneed(type);
  104. /* Remember link position in case of safe delete */
  105. idx = link_index(net, type, net_link);
  106. list_del(&net_link->node);
  107. cnt = link_count(net, type);
  108. if (!cnt) {
  109. netns_bpf_run_array_detach(net, type);
  110. goto out_unlock;
  111. }
  112. old_array = rcu_dereference_protected(net->bpf.run_array[type],
  113. lockdep_is_held(&netns_bpf_mutex));
  114. new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
  115. if (!new_array) {
  116. WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
  117. goto out_unlock;
  118. }
  119. fill_prog_array(net, type, new_array);
  120. rcu_assign_pointer(net->bpf.run_array[type], new_array);
  121. bpf_prog_array_free(old_array);
  122. out_unlock:
  123. net_link->net = NULL;
  124. mutex_unlock(&netns_bpf_mutex);
  125. }
  126. static int bpf_netns_link_detach(struct bpf_link *link)
  127. {
  128. bpf_netns_link_release(link);
  129. return 0;
  130. }
  131. static void bpf_netns_link_dealloc(struct bpf_link *link)
  132. {
  133. struct bpf_netns_link *net_link =
  134. container_of(link, struct bpf_netns_link, link);
  135. kfree(net_link);
  136. }
  137. static int bpf_netns_link_update_prog(struct bpf_link *link,
  138. struct bpf_prog *new_prog,
  139. struct bpf_prog *old_prog)
  140. {
  141. struct bpf_netns_link *net_link =
  142. container_of(link, struct bpf_netns_link, link);
  143. enum netns_bpf_attach_type type = net_link->netns_type;
  144. struct bpf_prog_array *run_array;
  145. struct net *net;
  146. int idx, ret;
  147. if (old_prog && old_prog != link->prog)
  148. return -EPERM;
  149. if (new_prog->type != link->prog->type)
  150. return -EINVAL;
  151. mutex_lock(&netns_bpf_mutex);
  152. net = net_link->net;
  153. if (!net || !check_net(net)) {
  154. /* Link auto-detached or netns dying */
  155. ret = -ENOLINK;
  156. goto out_unlock;
  157. }
  158. run_array = rcu_dereference_protected(net->bpf.run_array[type],
  159. lockdep_is_held(&netns_bpf_mutex));
  160. idx = link_index(net, type, net_link);
  161. ret = bpf_prog_array_update_at(run_array, idx, new_prog);
  162. if (ret)
  163. goto out_unlock;
  164. old_prog = xchg(&link->prog, new_prog);
  165. bpf_prog_put(old_prog);
  166. out_unlock:
  167. mutex_unlock(&netns_bpf_mutex);
  168. return ret;
  169. }
  170. static int bpf_netns_link_fill_info(const struct bpf_link *link,
  171. struct bpf_link_info *info)
  172. {
  173. const struct bpf_netns_link *net_link =
  174. container_of(link, struct bpf_netns_link, link);
  175. unsigned int inum = 0;
  176. struct net *net;
  177. mutex_lock(&netns_bpf_mutex);
  178. net = net_link->net;
  179. if (net && check_net(net))
  180. inum = net->ns.inum;
  181. mutex_unlock(&netns_bpf_mutex);
  182. info->netns.netns_ino = inum;
  183. info->netns.attach_type = net_link->type;
  184. return 0;
  185. }
  186. static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
  187. struct seq_file *seq)
  188. {
  189. struct bpf_link_info info = {};
  190. bpf_netns_link_fill_info(link, &info);
  191. seq_printf(seq,
  192. "netns_ino:\t%u\n"
  193. "attach_type:\t%u\n",
  194. info.netns.netns_ino,
  195. info.netns.attach_type);
  196. }
  197. static const struct bpf_link_ops bpf_netns_link_ops = {
  198. .release = bpf_netns_link_release,
  199. .dealloc = bpf_netns_link_dealloc,
  200. .detach = bpf_netns_link_detach,
  201. .update_prog = bpf_netns_link_update_prog,
  202. .fill_link_info = bpf_netns_link_fill_info,
  203. .show_fdinfo = bpf_netns_link_show_fdinfo,
  204. };
  205. /* Must be called with netns_bpf_mutex held. */
  206. static int __netns_bpf_prog_query(const union bpf_attr *attr,
  207. union bpf_attr __user *uattr,
  208. struct net *net,
  209. enum netns_bpf_attach_type type)
  210. {
  211. __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
  212. struct bpf_prog_array *run_array;
  213. u32 prog_cnt = 0, flags = 0;
  214. run_array = rcu_dereference_protected(net->bpf.run_array[type],
  215. lockdep_is_held(&netns_bpf_mutex));
  216. if (run_array)
  217. prog_cnt = bpf_prog_array_length(run_array);
  218. if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
  219. return -EFAULT;
  220. if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
  221. return -EFAULT;
  222. if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
  223. return 0;
  224. return bpf_prog_array_copy_to_user(run_array, prog_ids,
  225. attr->query.prog_cnt);
  226. }
  227. int netns_bpf_prog_query(const union bpf_attr *attr,
  228. union bpf_attr __user *uattr)
  229. {
  230. enum netns_bpf_attach_type type;
  231. struct net *net;
  232. int ret;
  233. if (attr->query.query_flags)
  234. return -EINVAL;
  235. type = to_netns_bpf_attach_type(attr->query.attach_type);
  236. if (type < 0)
  237. return -EINVAL;
  238. net = get_net_ns_by_fd(attr->query.target_fd);
  239. if (IS_ERR(net))
  240. return PTR_ERR(net);
  241. mutex_lock(&netns_bpf_mutex);
  242. ret = __netns_bpf_prog_query(attr, uattr, net, type);
  243. mutex_unlock(&netns_bpf_mutex);
  244. put_net(net);
  245. return ret;
  246. }
  247. int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
  248. {
  249. struct bpf_prog_array *run_array;
  250. enum netns_bpf_attach_type type;
  251. struct bpf_prog *attached;
  252. struct net *net;
  253. int ret;
  254. if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
  255. return -EINVAL;
  256. type = to_netns_bpf_attach_type(attr->attach_type);
  257. if (type < 0)
  258. return -EINVAL;
  259. net = current->nsproxy->net_ns;
  260. mutex_lock(&netns_bpf_mutex);
  261. /* Attaching prog directly is not compatible with links */
  262. if (!list_empty(&net->bpf.links[type])) {
  263. ret = -EEXIST;
  264. goto out_unlock;
  265. }
  266. switch (type) {
  267. case NETNS_BPF_FLOW_DISSECTOR:
  268. ret = flow_dissector_bpf_prog_attach_check(net, prog);
  269. break;
  270. default:
  271. ret = -EINVAL;
  272. break;
  273. }
  274. if (ret)
  275. goto out_unlock;
  276. attached = net->bpf.progs[type];
  277. if (attached == prog) {
  278. /* The same program cannot be attached twice */
  279. ret = -EINVAL;
  280. goto out_unlock;
  281. }
  282. run_array = rcu_dereference_protected(net->bpf.run_array[type],
  283. lockdep_is_held(&netns_bpf_mutex));
  284. if (run_array) {
  285. WRITE_ONCE(run_array->items[0].prog, prog);
  286. } else {
  287. run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
  288. if (!run_array) {
  289. ret = -ENOMEM;
  290. goto out_unlock;
  291. }
  292. run_array->items[0].prog = prog;
  293. rcu_assign_pointer(net->bpf.run_array[type], run_array);
  294. }
  295. net->bpf.progs[type] = prog;
  296. if (attached)
  297. bpf_prog_put(attached);
  298. out_unlock:
  299. mutex_unlock(&netns_bpf_mutex);
  300. return ret;
  301. }
  302. /* Must be called with netns_bpf_mutex held. */
  303. static int __netns_bpf_prog_detach(struct net *net,
  304. enum netns_bpf_attach_type type,
  305. struct bpf_prog *old)
  306. {
  307. struct bpf_prog *attached;
  308. /* Progs attached via links cannot be detached */
  309. if (!list_empty(&net->bpf.links[type]))
  310. return -EINVAL;
  311. attached = net->bpf.progs[type];
  312. if (!attached || attached != old)
  313. return -ENOENT;
  314. netns_bpf_run_array_detach(net, type);
  315. net->bpf.progs[type] = NULL;
  316. bpf_prog_put(attached);
  317. return 0;
  318. }
  319. int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
  320. {
  321. enum netns_bpf_attach_type type;
  322. struct bpf_prog *prog;
  323. int ret;
  324. if (attr->target_fd)
  325. return -EINVAL;
  326. type = to_netns_bpf_attach_type(attr->attach_type);
  327. if (type < 0)
  328. return -EINVAL;
  329. prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
  330. if (IS_ERR(prog))
  331. return PTR_ERR(prog);
  332. mutex_lock(&netns_bpf_mutex);
  333. ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
  334. mutex_unlock(&netns_bpf_mutex);
  335. bpf_prog_put(prog);
  336. return ret;
  337. }
  338. static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
  339. {
  340. switch (type) {
  341. case NETNS_BPF_FLOW_DISSECTOR:
  342. return 1;
  343. case NETNS_BPF_SK_LOOKUP:
  344. return 64;
  345. default:
  346. return 0;
  347. }
  348. }
  349. static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
  350. enum netns_bpf_attach_type type)
  351. {
  352. struct bpf_netns_link *net_link =
  353. container_of(link, struct bpf_netns_link, link);
  354. struct bpf_prog_array *run_array;
  355. int cnt, err;
  356. mutex_lock(&netns_bpf_mutex);
  357. cnt = link_count(net, type);
  358. if (cnt >= netns_bpf_max_progs(type)) {
  359. err = -E2BIG;
  360. goto out_unlock;
  361. }
  362. /* Links are not compatible with attaching prog directly */
  363. if (net->bpf.progs[type]) {
  364. err = -EEXIST;
  365. goto out_unlock;
  366. }
  367. switch (type) {
  368. case NETNS_BPF_FLOW_DISSECTOR:
  369. err = flow_dissector_bpf_prog_attach_check(net, link->prog);
  370. break;
  371. case NETNS_BPF_SK_LOOKUP:
  372. err = 0; /* nothing to check */
  373. break;
  374. default:
  375. err = -EINVAL;
  376. break;
  377. }
  378. if (err)
  379. goto out_unlock;
  380. run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
  381. if (!run_array) {
  382. err = -ENOMEM;
  383. goto out_unlock;
  384. }
  385. list_add_tail(&net_link->node, &net->bpf.links[type]);
  386. fill_prog_array(net, type, run_array);
  387. run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
  388. lockdep_is_held(&netns_bpf_mutex));
  389. bpf_prog_array_free(run_array);
  390. /* Mark attach point as used */
  391. netns_bpf_attach_type_need(type);
  392. out_unlock:
  393. mutex_unlock(&netns_bpf_mutex);
  394. return err;
  395. }
  396. int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
  397. {
  398. enum netns_bpf_attach_type netns_type;
  399. struct bpf_link_primer link_primer;
  400. struct bpf_netns_link *net_link;
  401. enum bpf_attach_type type;
  402. struct net *net;
  403. int err;
  404. if (attr->link_create.flags)
  405. return -EINVAL;
  406. type = attr->link_create.attach_type;
  407. netns_type = to_netns_bpf_attach_type(type);
  408. if (netns_type < 0)
  409. return -EINVAL;
  410. net = get_net_ns_by_fd(attr->link_create.target_fd);
  411. if (IS_ERR(net))
  412. return PTR_ERR(net);
  413. net_link = kzalloc(sizeof(*net_link), GFP_USER);
  414. if (!net_link) {
  415. err = -ENOMEM;
  416. goto out_put_net;
  417. }
  418. bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
  419. &bpf_netns_link_ops, prog);
  420. net_link->net = net;
  421. net_link->type = type;
  422. net_link->netns_type = netns_type;
  423. err = bpf_link_prime(&net_link->link, &link_primer);
  424. if (err) {
  425. kfree(net_link);
  426. goto out_put_net;
  427. }
  428. err = netns_bpf_link_attach(net, &net_link->link, netns_type);
  429. if (err) {
  430. bpf_link_cleanup(&link_primer);
  431. goto out_put_net;
  432. }
  433. put_net(net);
  434. return bpf_link_settle(&link_primer);
  435. out_put_net:
  436. put_net(net);
  437. return err;
  438. }
  439. static int __net_init netns_bpf_pernet_init(struct net *net)
  440. {
  441. int type;
  442. for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
  443. INIT_LIST_HEAD(&net->bpf.links[type]);
  444. return 0;
  445. }
  446. static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
  447. {
  448. enum netns_bpf_attach_type type;
  449. struct bpf_netns_link *net_link;
  450. mutex_lock(&netns_bpf_mutex);
  451. for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
  452. netns_bpf_run_array_detach(net, type);
  453. list_for_each_entry(net_link, &net->bpf.links[type], node) {
  454. net_link->net = NULL; /* auto-detach link */
  455. netns_bpf_attach_type_unneed(type);
  456. }
  457. if (net->bpf.progs[type])
  458. bpf_prog_put(net->bpf.progs[type]);
  459. }
  460. mutex_unlock(&netns_bpf_mutex);
  461. }
  462. static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
  463. .init = netns_bpf_pernet_init,
  464. .pre_exit = netns_bpf_pernet_pre_exit,
  465. };
  466. static int __init netns_bpf_init(void)
  467. {
  468. return register_pernet_subsys(&netns_bpf_pernet_ops);
  469. }
  470. subsys_initcall(netns_bpf_init);