bpf_struct_ops.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2019 Facebook */
  3. #include <linux/bpf.h>
  4. #include <linux/bpf_verifier.h>
  5. #include <linux/btf.h>
  6. #include <linux/filter.h>
  7. #include <linux/slab.h>
  8. #include <linux/numa.h>
  9. #include <linux/seq_file.h>
  10. #include <linux/refcount.h>
  11. #include <linux/mutex.h>
  12. #include <linux/btf_ids.h>
  13. enum bpf_struct_ops_state {
  14. BPF_STRUCT_OPS_STATE_INIT,
  15. BPF_STRUCT_OPS_STATE_INUSE,
  16. BPF_STRUCT_OPS_STATE_TOBEFREE,
  17. };
  18. #define BPF_STRUCT_OPS_COMMON_VALUE \
  19. refcount_t refcnt; \
  20. enum bpf_struct_ops_state state
  21. struct bpf_struct_ops_value {
  22. BPF_STRUCT_OPS_COMMON_VALUE;
  23. char data[] ____cacheline_aligned_in_smp;
  24. };
  25. struct bpf_struct_ops_map {
  26. struct bpf_map map;
  27. struct rcu_head rcu;
  28. const struct bpf_struct_ops *st_ops;
  29. /* protect map_update */
  30. struct mutex lock;
  31. /* link has all the bpf_links that is populated
  32. * to the func ptr of the kernel's struct
  33. * (in kvalue.data).
  34. */
  35. struct bpf_link **links;
  36. /* image is a page that has all the trampolines
  37. * that stores the func args before calling the bpf_prog.
  38. * A PAGE_SIZE "image" is enough to store all trampoline for
  39. * "links[]".
  40. */
  41. void *image;
  42. /* uvalue->data stores the kernel struct
  43. * (e.g. tcp_congestion_ops) that is more useful
  44. * to userspace than the kvalue. For example,
  45. * the bpf_prog's id is stored instead of the kernel
  46. * address of a func ptr.
  47. */
  48. struct bpf_struct_ops_value *uvalue;
  49. /* kvalue.data stores the actual kernel's struct
  50. * (e.g. tcp_congestion_ops) that will be
  51. * registered to the kernel subsystem.
  52. */
  53. struct bpf_struct_ops_value kvalue;
  54. };
  55. #define VALUE_PREFIX "bpf_struct_ops_"
  56. #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
  57. /* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
  58. * the map's value exposed to the userspace and its btf-type-id is
  59. * stored at the map->btf_vmlinux_value_type_id.
  60. *
  61. */
  62. #define BPF_STRUCT_OPS_TYPE(_name) \
  63. extern struct bpf_struct_ops bpf_##_name; \
  64. \
  65. struct bpf_struct_ops_##_name { \
  66. BPF_STRUCT_OPS_COMMON_VALUE; \
  67. struct _name data ____cacheline_aligned_in_smp; \
  68. };
  69. #include "bpf_struct_ops_types.h"
  70. #undef BPF_STRUCT_OPS_TYPE
  71. enum {
  72. #define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
  73. #include "bpf_struct_ops_types.h"
  74. #undef BPF_STRUCT_OPS_TYPE
  75. __NR_BPF_STRUCT_OPS_TYPE,
  76. };
  77. static struct bpf_struct_ops * const bpf_struct_ops[] = {
  78. #define BPF_STRUCT_OPS_TYPE(_name) \
  79. [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name,
  80. #include "bpf_struct_ops_types.h"
  81. #undef BPF_STRUCT_OPS_TYPE
  82. };
  83. const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
  84. };
  85. const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
  86. #ifdef CONFIG_NET
  87. .test_run = bpf_struct_ops_test_run,
  88. #endif
  89. };
  90. static const struct btf_type *module_type;
  91. void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
  92. {
  93. s32 type_id, value_id, module_id;
  94. const struct btf_member *member;
  95. struct bpf_struct_ops *st_ops;
  96. const struct btf_type *t;
  97. char value_name[128];
  98. const char *mname;
  99. u32 i, j;
  100. /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */
  101. #define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name);
  102. #include "bpf_struct_ops_types.h"
  103. #undef BPF_STRUCT_OPS_TYPE
  104. module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT);
  105. if (module_id < 0) {
  106. pr_warn("Cannot find struct module in btf_vmlinux\n");
  107. return;
  108. }
  109. module_type = btf_type_by_id(btf, module_id);
  110. for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
  111. st_ops = bpf_struct_ops[i];
  112. if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
  113. sizeof(value_name)) {
  114. pr_warn("struct_ops name %s is too long\n",
  115. st_ops->name);
  116. continue;
  117. }
  118. sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
  119. value_id = btf_find_by_name_kind(btf, value_name,
  120. BTF_KIND_STRUCT);
  121. if (value_id < 0) {
  122. pr_warn("Cannot find struct %s in btf_vmlinux\n",
  123. value_name);
  124. continue;
  125. }
  126. type_id = btf_find_by_name_kind(btf, st_ops->name,
  127. BTF_KIND_STRUCT);
  128. if (type_id < 0) {
  129. pr_warn("Cannot find struct %s in btf_vmlinux\n",
  130. st_ops->name);
  131. continue;
  132. }
  133. t = btf_type_by_id(btf, type_id);
  134. if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
  135. pr_warn("Cannot support #%u members in struct %s\n",
  136. btf_type_vlen(t), st_ops->name);
  137. continue;
  138. }
  139. for_each_member(j, t, member) {
  140. const struct btf_type *func_proto;
  141. mname = btf_name_by_offset(btf, member->name_off);
  142. if (!*mname) {
  143. pr_warn("anon member in struct %s is not supported\n",
  144. st_ops->name);
  145. break;
  146. }
  147. if (__btf_member_bitfield_size(t, member)) {
  148. pr_warn("bit field member %s in struct %s is not supported\n",
  149. mname, st_ops->name);
  150. break;
  151. }
  152. func_proto = btf_type_resolve_func_ptr(btf,
  153. member->type,
  154. NULL);
  155. if (func_proto &&
  156. btf_distill_func_proto(log, btf,
  157. func_proto, mname,
  158. &st_ops->func_models[j])) {
  159. pr_warn("Error in parsing func ptr %s in struct %s\n",
  160. mname, st_ops->name);
  161. break;
  162. }
  163. }
  164. if (j == btf_type_vlen(t)) {
  165. if (st_ops->init(btf)) {
  166. pr_warn("Error in init bpf_struct_ops %s\n",
  167. st_ops->name);
  168. } else {
  169. st_ops->type_id = type_id;
  170. st_ops->type = t;
  171. st_ops->value_id = value_id;
  172. st_ops->value_type = btf_type_by_id(btf,
  173. value_id);
  174. }
  175. }
  176. }
  177. }
  178. extern struct btf *btf_vmlinux;
  179. static const struct bpf_struct_ops *
  180. bpf_struct_ops_find_value(u32 value_id)
  181. {
  182. unsigned int i;
  183. if (!value_id || !btf_vmlinux)
  184. return NULL;
  185. for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
  186. if (bpf_struct_ops[i]->value_id == value_id)
  187. return bpf_struct_ops[i];
  188. }
  189. return NULL;
  190. }
  191. const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
  192. {
  193. unsigned int i;
  194. if (!type_id || !btf_vmlinux)
  195. return NULL;
  196. for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
  197. if (bpf_struct_ops[i]->type_id == type_id)
  198. return bpf_struct_ops[i];
  199. }
  200. return NULL;
  201. }
  202. static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
  203. void *next_key)
  204. {
  205. if (key && *(u32 *)key == 0)
  206. return -ENOENT;
  207. *(u32 *)next_key = 0;
  208. return 0;
  209. }
  210. int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
  211. void *value)
  212. {
  213. struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
  214. struct bpf_struct_ops_value *uvalue, *kvalue;
  215. enum bpf_struct_ops_state state;
  216. if (unlikely(*(u32 *)key != 0))
  217. return -ENOENT;
  218. kvalue = &st_map->kvalue;
  219. /* Pair with smp_store_release() during map_update */
  220. state = smp_load_acquire(&kvalue->state);
  221. if (state == BPF_STRUCT_OPS_STATE_INIT) {
  222. memset(value, 0, map->value_size);
  223. return 0;
  224. }
  225. /* No lock is needed. state and refcnt do not need
  226. * to be updated together under atomic context.
  227. */
  228. uvalue = value;
  229. memcpy(uvalue, st_map->uvalue, map->value_size);
  230. uvalue->state = state;
  231. refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
  232. return 0;
  233. }
  234. static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
  235. {
  236. return ERR_PTR(-EINVAL);
  237. }
  238. static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
  239. {
  240. const struct btf_type *t = st_map->st_ops->type;
  241. u32 i;
  242. for (i = 0; i < btf_type_vlen(t); i++) {
  243. if (st_map->links[i]) {
  244. bpf_link_put(st_map->links[i]);
  245. st_map->links[i] = NULL;
  246. }
  247. }
  248. }
  249. static int check_zero_holes(const struct btf_type *t, void *data)
  250. {
  251. const struct btf_member *member;
  252. u32 i, moff, msize, prev_mend = 0;
  253. const struct btf_type *mtype;
  254. for_each_member(i, t, member) {
  255. moff = __btf_member_bit_offset(t, member) / 8;
  256. if (moff > prev_mend &&
  257. memchr_inv(data + prev_mend, 0, moff - prev_mend))
  258. return -EINVAL;
  259. mtype = btf_type_by_id(btf_vmlinux, member->type);
  260. mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
  261. if (IS_ERR(mtype))
  262. return PTR_ERR(mtype);
  263. prev_mend = moff + msize;
  264. }
  265. if (t->size > prev_mend &&
  266. memchr_inv(data + prev_mend, 0, t->size - prev_mend))
  267. return -EINVAL;
  268. return 0;
  269. }
  270. static void bpf_struct_ops_link_release(struct bpf_link *link)
  271. {
  272. }
  273. static void bpf_struct_ops_link_dealloc(struct bpf_link *link)
  274. {
  275. struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link);
  276. kfree(tlink);
  277. }
  278. const struct bpf_link_ops bpf_struct_ops_link_lops = {
  279. .release = bpf_struct_ops_link_release,
  280. .dealloc = bpf_struct_ops_link_dealloc,
  281. };
  282. int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
  283. struct bpf_tramp_link *link,
  284. const struct btf_func_model *model,
  285. void *image, void *image_end)
  286. {
  287. u32 flags;
  288. tlinks[BPF_TRAMP_FENTRY].links[0] = link;
  289. tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
  290. /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
  291. * and it must be used alone.
  292. */
  293. flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
  294. return arch_prepare_bpf_trampoline(NULL, image, image_end,
  295. model, flags, tlinks, NULL);
  296. }
  297. static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  298. void *value, u64 flags)
  299. {
  300. struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
  301. const struct bpf_struct_ops *st_ops = st_map->st_ops;
  302. struct bpf_struct_ops_value *uvalue, *kvalue;
  303. const struct btf_member *member;
  304. const struct btf_type *t = st_ops->type;
  305. struct bpf_tramp_links *tlinks = NULL;
  306. void *udata, *kdata;
  307. int prog_fd, err = 0;
  308. void *image, *image_end;
  309. u32 i;
  310. if (flags)
  311. return -EINVAL;
  312. if (*(u32 *)key != 0)
  313. return -E2BIG;
  314. err = check_zero_holes(st_ops->value_type, value);
  315. if (err)
  316. return err;
  317. uvalue = value;
  318. err = check_zero_holes(t, uvalue->data);
  319. if (err)
  320. return err;
  321. if (uvalue->state || refcount_read(&uvalue->refcnt))
  322. return -EINVAL;
  323. tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
  324. if (!tlinks)
  325. return -ENOMEM;
  326. uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
  327. kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
  328. mutex_lock(&st_map->lock);
  329. if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
  330. err = -EBUSY;
  331. goto unlock;
  332. }
  333. memcpy(uvalue, value, map->value_size);
  334. udata = &uvalue->data;
  335. kdata = &kvalue->data;
  336. image = st_map->image;
  337. image_end = st_map->image + PAGE_SIZE;
  338. for_each_member(i, t, member) {
  339. const struct btf_type *mtype, *ptype;
  340. struct bpf_prog *prog;
  341. struct bpf_tramp_link *link;
  342. u32 moff;
  343. moff = __btf_member_bit_offset(t, member) / 8;
  344. ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
  345. if (ptype == module_type) {
  346. if (*(void **)(udata + moff))
  347. goto reset_unlock;
  348. *(void **)(kdata + moff) = BPF_MODULE_OWNER;
  349. continue;
  350. }
  351. err = st_ops->init_member(t, member, kdata, udata);
  352. if (err < 0)
  353. goto reset_unlock;
  354. /* The ->init_member() has handled this member */
  355. if (err > 0)
  356. continue;
  357. /* If st_ops->init_member does not handle it,
  358. * we will only handle func ptrs and zero-ed members
  359. * here. Reject everything else.
  360. */
  361. /* All non func ptr member must be 0 */
  362. if (!ptype || !btf_type_is_func_proto(ptype)) {
  363. u32 msize;
  364. mtype = btf_type_by_id(btf_vmlinux, member->type);
  365. mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
  366. if (IS_ERR(mtype)) {
  367. err = PTR_ERR(mtype);
  368. goto reset_unlock;
  369. }
  370. if (memchr_inv(udata + moff, 0, msize)) {
  371. err = -EINVAL;
  372. goto reset_unlock;
  373. }
  374. continue;
  375. }
  376. prog_fd = (int)(*(unsigned long *)(udata + moff));
  377. /* Similar check as the attr->attach_prog_fd */
  378. if (!prog_fd)
  379. continue;
  380. prog = bpf_prog_get(prog_fd);
  381. if (IS_ERR(prog)) {
  382. err = PTR_ERR(prog);
  383. goto reset_unlock;
  384. }
  385. if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
  386. prog->aux->attach_btf_id != st_ops->type_id ||
  387. prog->expected_attach_type != i) {
  388. bpf_prog_put(prog);
  389. err = -EINVAL;
  390. goto reset_unlock;
  391. }
  392. link = kzalloc(sizeof(*link), GFP_USER);
  393. if (!link) {
  394. bpf_prog_put(prog);
  395. err = -ENOMEM;
  396. goto reset_unlock;
  397. }
  398. bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
  399. &bpf_struct_ops_link_lops, prog);
  400. st_map->links[i] = &link->link;
  401. err = bpf_struct_ops_prepare_trampoline(tlinks, link,
  402. &st_ops->func_models[i],
  403. image, image_end);
  404. if (err < 0)
  405. goto reset_unlock;
  406. *(void **)(kdata + moff) = image;
  407. image += err;
  408. /* put prog_id to udata */
  409. *(unsigned long *)(udata + moff) = prog->aux->id;
  410. }
  411. refcount_set(&kvalue->refcnt, 1);
  412. bpf_map_inc(map);
  413. set_memory_ro((long)st_map->image, 1);
  414. set_memory_x((long)st_map->image, 1);
  415. err = st_ops->reg(kdata);
  416. if (likely(!err)) {
  417. /* Pair with smp_load_acquire() during lookup_elem().
  418. * It ensures the above udata updates (e.g. prog->aux->id)
  419. * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
  420. */
  421. smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
  422. goto unlock;
  423. }
  424. /* Error during st_ops->reg(). Can happen if this struct_ops needs to be
  425. * verified as a whole, after all init_member() calls. Can also happen if
  426. * there was a race in registering the struct_ops (under the same name) to
  427. * a sub-system through different struct_ops's maps.
  428. */
  429. set_memory_nx((long)st_map->image, 1);
  430. set_memory_rw((long)st_map->image, 1);
  431. bpf_map_put(map);
  432. reset_unlock:
  433. bpf_struct_ops_map_put_progs(st_map);
  434. memset(uvalue, 0, map->value_size);
  435. memset(kvalue, 0, map->value_size);
  436. unlock:
  437. kfree(tlinks);
  438. mutex_unlock(&st_map->lock);
  439. return err;
  440. }
  441. static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
  442. {
  443. enum bpf_struct_ops_state prev_state;
  444. struct bpf_struct_ops_map *st_map;
  445. st_map = (struct bpf_struct_ops_map *)map;
  446. prev_state = cmpxchg(&st_map->kvalue.state,
  447. BPF_STRUCT_OPS_STATE_INUSE,
  448. BPF_STRUCT_OPS_STATE_TOBEFREE);
  449. switch (prev_state) {
  450. case BPF_STRUCT_OPS_STATE_INUSE:
  451. st_map->st_ops->unreg(&st_map->kvalue.data);
  452. if (refcount_dec_and_test(&st_map->kvalue.refcnt))
  453. bpf_map_put(map);
  454. return 0;
  455. case BPF_STRUCT_OPS_STATE_TOBEFREE:
  456. return -EINPROGRESS;
  457. case BPF_STRUCT_OPS_STATE_INIT:
  458. return -ENOENT;
  459. default:
  460. WARN_ON_ONCE(1);
  461. /* Should never happen. Treat it as not found. */
  462. return -ENOENT;
  463. }
  464. }
  465. static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
  466. struct seq_file *m)
  467. {
  468. void *value;
  469. int err;
  470. value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
  471. if (!value)
  472. return;
  473. err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
  474. if (!err) {
  475. btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
  476. value, m);
  477. seq_puts(m, "\n");
  478. }
  479. kfree(value);
  480. }
  481. static void bpf_struct_ops_map_free(struct bpf_map *map)
  482. {
  483. struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
  484. if (st_map->links)
  485. bpf_struct_ops_map_put_progs(st_map);
  486. bpf_map_area_free(st_map->links);
  487. bpf_jit_free_exec(st_map->image);
  488. bpf_map_area_free(st_map->uvalue);
  489. bpf_map_area_free(st_map);
  490. }
  491. static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
  492. {
  493. if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
  494. attr->map_flags || !attr->btf_vmlinux_value_type_id)
  495. return -EINVAL;
  496. return 0;
  497. }
  498. static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
  499. {
  500. const struct bpf_struct_ops *st_ops;
  501. size_t st_map_size;
  502. struct bpf_struct_ops_map *st_map;
  503. const struct btf_type *t, *vt;
  504. struct bpf_map *map;
  505. if (!bpf_capable())
  506. return ERR_PTR(-EPERM);
  507. st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
  508. if (!st_ops)
  509. return ERR_PTR(-ENOTSUPP);
  510. vt = st_ops->value_type;
  511. if (attr->value_size != vt->size)
  512. return ERR_PTR(-EINVAL);
  513. t = st_ops->type;
  514. st_map_size = sizeof(*st_map) +
  515. /* kvalue stores the
  516. * struct bpf_struct_ops_tcp_congestions_ops
  517. */
  518. (vt->size - sizeof(struct bpf_struct_ops_value));
  519. st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
  520. if (!st_map)
  521. return ERR_PTR(-ENOMEM);
  522. st_map->st_ops = st_ops;
  523. map = &st_map->map;
  524. st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
  525. st_map->links =
  526. bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
  527. NUMA_NO_NODE);
  528. st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
  529. if (!st_map->uvalue || !st_map->links || !st_map->image) {
  530. bpf_struct_ops_map_free(map);
  531. return ERR_PTR(-ENOMEM);
  532. }
  533. mutex_init(&st_map->lock);
  534. set_vm_flush_reset_perms(st_map->image);
  535. bpf_map_init_from_attr(map, attr);
  536. return map;
  537. }
  538. BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map)
  539. const struct bpf_map_ops bpf_struct_ops_map_ops = {
  540. .map_alloc_check = bpf_struct_ops_map_alloc_check,
  541. .map_alloc = bpf_struct_ops_map_alloc,
  542. .map_free = bpf_struct_ops_map_free,
  543. .map_get_next_key = bpf_struct_ops_map_get_next_key,
  544. .map_lookup_elem = bpf_struct_ops_map_lookup_elem,
  545. .map_delete_elem = bpf_struct_ops_map_delete_elem,
  546. .map_update_elem = bpf_struct_ops_map_update_elem,
  547. .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
  548. .map_btf_id = &bpf_struct_ops_map_btf_ids[0],
  549. };
  550. /* "const void *" because some subsystem is
  551. * passing a const (e.g. const struct tcp_congestion_ops *)
  552. */
  553. bool bpf_struct_ops_get(const void *kdata)
  554. {
  555. struct bpf_struct_ops_value *kvalue;
  556. kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
  557. return refcount_inc_not_zero(&kvalue->refcnt);
  558. }
  559. static void bpf_struct_ops_put_rcu(struct rcu_head *head)
  560. {
  561. struct bpf_struct_ops_map *st_map;
  562. st_map = container_of(head, struct bpf_struct_ops_map, rcu);
  563. bpf_map_put(&st_map->map);
  564. }
  565. void bpf_struct_ops_put(const void *kdata)
  566. {
  567. struct bpf_struct_ops_value *kvalue;
  568. kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
  569. if (refcount_dec_and_test(&kvalue->refcnt)) {
  570. struct bpf_struct_ops_map *st_map;
  571. st_map = container_of(kvalue, struct bpf_struct_ops_map,
  572. kvalue);
  573. /* The struct_ops's function may switch to another struct_ops.
  574. *
  575. * For example, bpf_tcp_cc_x->init() may switch to
  576. * another tcp_cc_y by calling
  577. * setsockopt(TCP_CONGESTION, "tcp_cc_y").
  578. * During the switch, bpf_struct_ops_put(tcp_cc_x) is called
  579. * and its map->refcnt may reach 0 which then free its
  580. * trampoline image while tcp_cc_x is still running.
  581. *
  582. * Thus, a rcu grace period is needed here.
  583. */
  584. call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
  585. }
  586. }