arraymap.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  3. * Copyright (c) 2016,2017 Facebook
  4. */
  5. #include <linux/bpf.h>
  6. #include <linux/btf.h>
  7. #include <linux/err.h>
  8. #include <linux/slab.h>
  9. #include <linux/mm.h>
  10. #include <linux/filter.h>
  11. #include <linux/perf_event.h>
  12. #include <uapi/linux/btf.h>
  13. #include <linux/rcupdate_trace.h>
  14. #include <linux/btf_ids.h>
  15. #include "map_in_map.h"
  16. #define ARRAY_CREATE_FLAG_MASK \
  17. (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
  18. BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
  19. static void bpf_array_free_percpu(struct bpf_array *array)
  20. {
  21. int i;
  22. for (i = 0; i < array->map.max_entries; i++) {
  23. free_percpu(array->pptrs[i]);
  24. cond_resched();
  25. }
  26. }
  27. static int bpf_array_alloc_percpu(struct bpf_array *array)
  28. {
  29. void __percpu *ptr;
  30. int i;
  31. for (i = 0; i < array->map.max_entries; i++) {
  32. ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
  33. GFP_USER | __GFP_NOWARN);
  34. if (!ptr) {
  35. bpf_array_free_percpu(array);
  36. return -ENOMEM;
  37. }
  38. array->pptrs[i] = ptr;
  39. cond_resched();
  40. }
  41. return 0;
  42. }
  43. /* Called from syscall */
  44. int array_map_alloc_check(union bpf_attr *attr)
  45. {
  46. bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
  47. int numa_node = bpf_map_attr_numa_node(attr);
  48. /* check sanity of attributes */
  49. if (attr->max_entries == 0 || attr->key_size != 4 ||
  50. attr->value_size == 0 ||
  51. attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
  52. !bpf_map_flags_access_ok(attr->map_flags) ||
  53. (percpu && numa_node != NUMA_NO_NODE))
  54. return -EINVAL;
  55. if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
  56. attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
  57. return -EINVAL;
  58. if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
  59. attr->map_flags & BPF_F_PRESERVE_ELEMS)
  60. return -EINVAL;
  61. /* avoid overflow on round_up(map->value_size) */
  62. if (attr->value_size > INT_MAX)
  63. return -E2BIG;
  64. return 0;
  65. }
  66. static struct bpf_map *array_map_alloc(union bpf_attr *attr)
  67. {
  68. bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
  69. int numa_node = bpf_map_attr_numa_node(attr);
  70. u32 elem_size, index_mask, max_entries;
  71. bool bypass_spec_v1 = bpf_bypass_spec_v1();
  72. u64 array_size, mask64;
  73. struct bpf_array *array;
  74. elem_size = round_up(attr->value_size, 8);
  75. max_entries = attr->max_entries;
  76. /* On 32 bit archs roundup_pow_of_two() with max_entries that has
  77. * upper most bit set in u32 space is undefined behavior due to
  78. * resulting 1U << 32, so do it manually here in u64 space.
  79. */
  80. mask64 = fls_long(max_entries - 1);
  81. mask64 = 1ULL << mask64;
  82. mask64 -= 1;
  83. index_mask = mask64;
  84. if (!bypass_spec_v1) {
  85. /* round up array size to nearest power of 2,
  86. * since cpu will speculate within index_mask limits
  87. */
  88. max_entries = index_mask + 1;
  89. /* Check for overflows. */
  90. if (max_entries < attr->max_entries)
  91. return ERR_PTR(-E2BIG);
  92. }
  93. array_size = sizeof(*array);
  94. if (percpu) {
  95. array_size += (u64) max_entries * sizeof(void *);
  96. } else {
  97. /* rely on vmalloc() to return page-aligned memory and
  98. * ensure array->value is exactly page-aligned
  99. */
  100. if (attr->map_flags & BPF_F_MMAPABLE) {
  101. array_size = PAGE_ALIGN(array_size);
  102. array_size += PAGE_ALIGN((u64) max_entries * elem_size);
  103. } else {
  104. array_size += (u64) max_entries * elem_size;
  105. }
  106. }
  107. /* allocate all map elements and zero-initialize them */
  108. if (attr->map_flags & BPF_F_MMAPABLE) {
  109. void *data;
  110. /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
  111. data = bpf_map_area_mmapable_alloc(array_size, numa_node);
  112. if (!data)
  113. return ERR_PTR(-ENOMEM);
  114. array = data + PAGE_ALIGN(sizeof(struct bpf_array))
  115. - offsetof(struct bpf_array, value);
  116. } else {
  117. array = bpf_map_area_alloc(array_size, numa_node);
  118. }
  119. if (!array)
  120. return ERR_PTR(-ENOMEM);
  121. array->index_mask = index_mask;
  122. array->map.bypass_spec_v1 = bypass_spec_v1;
  123. /* copy mandatory map attributes */
  124. bpf_map_init_from_attr(&array->map, attr);
  125. array->elem_size = elem_size;
  126. if (percpu && bpf_array_alloc_percpu(array)) {
  127. bpf_map_area_free(array);
  128. return ERR_PTR(-ENOMEM);
  129. }
  130. return &array->map;
  131. }
  132. static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
  133. {
  134. return array->value + (u64)array->elem_size * index;
  135. }
  136. /* Called from syscall or from eBPF program */
  137. static void *array_map_lookup_elem(struct bpf_map *map, void *key)
  138. {
  139. struct bpf_array *array = container_of(map, struct bpf_array, map);
  140. u32 index = *(u32 *)key;
  141. if (unlikely(index >= array->map.max_entries))
  142. return NULL;
  143. return array->value + (u64)array->elem_size * (index & array->index_mask);
  144. }
  145. static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
  146. u32 off)
  147. {
  148. struct bpf_array *array = container_of(map, struct bpf_array, map);
  149. if (map->max_entries != 1)
  150. return -ENOTSUPP;
  151. if (off >= map->value_size)
  152. return -EINVAL;
  153. *imm = (unsigned long)array->value;
  154. return 0;
  155. }
  156. static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
  157. u32 *off)
  158. {
  159. struct bpf_array *array = container_of(map, struct bpf_array, map);
  160. u64 base = (unsigned long)array->value;
  161. u64 range = array->elem_size;
  162. if (map->max_entries != 1)
  163. return -ENOTSUPP;
  164. if (imm < base || imm >= base + range)
  165. return -ENOENT;
  166. *off = imm - base;
  167. return 0;
  168. }
  169. /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
  170. static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
  171. {
  172. struct bpf_array *array = container_of(map, struct bpf_array, map);
  173. struct bpf_insn *insn = insn_buf;
  174. u32 elem_size = array->elem_size;
  175. const int ret = BPF_REG_0;
  176. const int map_ptr = BPF_REG_1;
  177. const int index = BPF_REG_2;
  178. if (map->map_flags & BPF_F_INNER_MAP)
  179. return -EOPNOTSUPP;
  180. *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
  181. *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
  182. if (!map->bypass_spec_v1) {
  183. *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
  184. *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
  185. } else {
  186. *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
  187. }
  188. if (is_power_of_2(elem_size)) {
  189. *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
  190. } else {
  191. *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
  192. }
  193. *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
  194. *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
  195. *insn++ = BPF_MOV64_IMM(ret, 0);
  196. return insn - insn_buf;
  197. }
  198. /* Called from eBPF program */
  199. static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
  200. {
  201. struct bpf_array *array = container_of(map, struct bpf_array, map);
  202. u32 index = *(u32 *)key;
  203. if (unlikely(index >= array->map.max_entries))
  204. return NULL;
  205. return this_cpu_ptr(array->pptrs[index & array->index_mask]);
  206. }
  207. static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
  208. {
  209. struct bpf_array *array = container_of(map, struct bpf_array, map);
  210. u32 index = *(u32 *)key;
  211. if (cpu >= nr_cpu_ids)
  212. return NULL;
  213. if (unlikely(index >= array->map.max_entries))
  214. return NULL;
  215. return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
  216. }
  217. int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
  218. {
  219. struct bpf_array *array = container_of(map, struct bpf_array, map);
  220. u32 index = *(u32 *)key;
  221. void __percpu *pptr;
  222. int cpu, off = 0;
  223. u32 size;
  224. if (unlikely(index >= array->map.max_entries))
  225. return -ENOENT;
  226. /* per_cpu areas are zero-filled and bpf programs can only
  227. * access 'value_size' of them, so copying rounded areas
  228. * will not leak any kernel data
  229. */
  230. size = array->elem_size;
  231. rcu_read_lock();
  232. pptr = array->pptrs[index & array->index_mask];
  233. for_each_possible_cpu(cpu) {
  234. copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
  235. check_and_init_map_value(map, value + off);
  236. off += size;
  237. }
  238. rcu_read_unlock();
  239. return 0;
  240. }
  241. /* Called from syscall */
  242. static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
  243. {
  244. struct bpf_array *array = container_of(map, struct bpf_array, map);
  245. u32 index = key ? *(u32 *)key : U32_MAX;
  246. u32 *next = (u32 *)next_key;
  247. if (index >= array->map.max_entries) {
  248. *next = 0;
  249. return 0;
  250. }
  251. if (index == array->map.max_entries - 1)
  252. return -ENOENT;
  253. *next = index + 1;
  254. return 0;
  255. }
  256. static void check_and_free_fields(struct bpf_array *arr, void *val)
  257. {
  258. if (map_value_has_timer(&arr->map))
  259. bpf_timer_cancel_and_free(val + arr->map.timer_off);
  260. if (map_value_has_kptrs(&arr->map))
  261. bpf_map_free_kptrs(&arr->map, val);
  262. }
  263. /* Called from syscall or from eBPF program */
  264. static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
  265. u64 map_flags)
  266. {
  267. struct bpf_array *array = container_of(map, struct bpf_array, map);
  268. u32 index = *(u32 *)key;
  269. char *val;
  270. if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
  271. /* unknown flags */
  272. return -EINVAL;
  273. if (unlikely(index >= array->map.max_entries))
  274. /* all elements were pre-allocated, cannot insert a new one */
  275. return -E2BIG;
  276. if (unlikely(map_flags & BPF_NOEXIST))
  277. /* all elements already exist */
  278. return -EEXIST;
  279. if (unlikely((map_flags & BPF_F_LOCK) &&
  280. !map_value_has_spin_lock(map)))
  281. return -EINVAL;
  282. if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
  283. val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
  284. copy_map_value(map, val, value);
  285. check_and_free_fields(array, val);
  286. } else {
  287. val = array->value +
  288. (u64)array->elem_size * (index & array->index_mask);
  289. if (map_flags & BPF_F_LOCK)
  290. copy_map_value_locked(map, val, value, false);
  291. else
  292. copy_map_value(map, val, value);
  293. check_and_free_fields(array, val);
  294. }
  295. return 0;
  296. }
  297. int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
  298. u64 map_flags)
  299. {
  300. struct bpf_array *array = container_of(map, struct bpf_array, map);
  301. u32 index = *(u32 *)key;
  302. void __percpu *pptr;
  303. int cpu, off = 0;
  304. u32 size;
  305. if (unlikely(map_flags > BPF_EXIST))
  306. /* unknown flags */
  307. return -EINVAL;
  308. if (unlikely(index >= array->map.max_entries))
  309. /* all elements were pre-allocated, cannot insert a new one */
  310. return -E2BIG;
  311. if (unlikely(map_flags == BPF_NOEXIST))
  312. /* all elements already exist */
  313. return -EEXIST;
  314. /* the user space will provide round_up(value_size, 8) bytes that
  315. * will be copied into per-cpu area. bpf programs can only access
  316. * value_size of it. During lookup the same extra bytes will be
  317. * returned or zeros which were zero-filled by percpu_alloc,
  318. * so no kernel data leaks possible
  319. */
  320. size = array->elem_size;
  321. rcu_read_lock();
  322. pptr = array->pptrs[index & array->index_mask];
  323. for_each_possible_cpu(cpu) {
  324. copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
  325. check_and_free_fields(array, per_cpu_ptr(pptr, cpu));
  326. off += size;
  327. }
  328. rcu_read_unlock();
  329. return 0;
  330. }
  331. /* Called from syscall or from eBPF program */
  332. static int array_map_delete_elem(struct bpf_map *map, void *key)
  333. {
  334. return -EINVAL;
  335. }
  336. static void *array_map_vmalloc_addr(struct bpf_array *array)
  337. {
  338. return (void *)round_down((unsigned long)array, PAGE_SIZE);
  339. }
  340. static void array_map_free_timers(struct bpf_map *map)
  341. {
  342. struct bpf_array *array = container_of(map, struct bpf_array, map);
  343. int i;
  344. /* We don't reset or free kptr on uref dropping to zero. */
  345. if (!map_value_has_timer(map))
  346. return;
  347. for (i = 0; i < array->map.max_entries; i++)
  348. bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
  349. }
  350. /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
  351. static void array_map_free(struct bpf_map *map)
  352. {
  353. struct bpf_array *array = container_of(map, struct bpf_array, map);
  354. int i;
  355. if (map_value_has_kptrs(map)) {
  356. if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
  357. for (i = 0; i < array->map.max_entries; i++) {
  358. void __percpu *pptr = array->pptrs[i & array->index_mask];
  359. int cpu;
  360. for_each_possible_cpu(cpu) {
  361. bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu));
  362. cond_resched();
  363. }
  364. }
  365. } else {
  366. for (i = 0; i < array->map.max_entries; i++)
  367. bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
  368. }
  369. bpf_map_free_kptr_off_tab(map);
  370. }
  371. if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
  372. bpf_array_free_percpu(array);
  373. if (array->map.map_flags & BPF_F_MMAPABLE)
  374. bpf_map_area_free(array_map_vmalloc_addr(array));
  375. else
  376. bpf_map_area_free(array);
  377. }
  378. static void array_map_seq_show_elem(struct bpf_map *map, void *key,
  379. struct seq_file *m)
  380. {
  381. void *value;
  382. rcu_read_lock();
  383. value = array_map_lookup_elem(map, key);
  384. if (!value) {
  385. rcu_read_unlock();
  386. return;
  387. }
  388. if (map->btf_key_type_id)
  389. seq_printf(m, "%u: ", *(u32 *)key);
  390. btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
  391. seq_puts(m, "\n");
  392. rcu_read_unlock();
  393. }
  394. static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
  395. struct seq_file *m)
  396. {
  397. struct bpf_array *array = container_of(map, struct bpf_array, map);
  398. u32 index = *(u32 *)key;
  399. void __percpu *pptr;
  400. int cpu;
  401. rcu_read_lock();
  402. seq_printf(m, "%u: {\n", *(u32 *)key);
  403. pptr = array->pptrs[index & array->index_mask];
  404. for_each_possible_cpu(cpu) {
  405. seq_printf(m, "\tcpu%d: ", cpu);
  406. btf_type_seq_show(map->btf, map->btf_value_type_id,
  407. per_cpu_ptr(pptr, cpu), m);
  408. seq_puts(m, "\n");
  409. }
  410. seq_puts(m, "}\n");
  411. rcu_read_unlock();
  412. }
  413. static int array_map_check_btf(const struct bpf_map *map,
  414. const struct btf *btf,
  415. const struct btf_type *key_type,
  416. const struct btf_type *value_type)
  417. {
  418. u32 int_data;
  419. /* One exception for keyless BTF: .bss/.data/.rodata map */
  420. if (btf_type_is_void(key_type)) {
  421. if (map->map_type != BPF_MAP_TYPE_ARRAY ||
  422. map->max_entries != 1)
  423. return -EINVAL;
  424. if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
  425. return -EINVAL;
  426. return 0;
  427. }
  428. if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
  429. return -EINVAL;
  430. int_data = *(u32 *)(key_type + 1);
  431. /* bpf array can only take a u32 key. This check makes sure
  432. * that the btf matches the attr used during map_create.
  433. */
  434. if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
  435. return -EINVAL;
  436. return 0;
  437. }
  438. static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
  439. {
  440. struct bpf_array *array = container_of(map, struct bpf_array, map);
  441. pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
  442. if (!(map->map_flags & BPF_F_MMAPABLE))
  443. return -EINVAL;
  444. if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
  445. PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
  446. return -EINVAL;
  447. return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
  448. vma->vm_pgoff + pgoff);
  449. }
  450. static bool array_map_meta_equal(const struct bpf_map *meta0,
  451. const struct bpf_map *meta1)
  452. {
  453. if (!bpf_map_meta_equal(meta0, meta1))
  454. return false;
  455. return meta0->map_flags & BPF_F_INNER_MAP ? true :
  456. meta0->max_entries == meta1->max_entries;
  457. }
  458. struct bpf_iter_seq_array_map_info {
  459. struct bpf_map *map;
  460. void *percpu_value_buf;
  461. u32 index;
  462. };
  463. static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
  464. {
  465. struct bpf_iter_seq_array_map_info *info = seq->private;
  466. struct bpf_map *map = info->map;
  467. struct bpf_array *array;
  468. u32 index;
  469. if (info->index >= map->max_entries)
  470. return NULL;
  471. if (*pos == 0)
  472. ++*pos;
  473. array = container_of(map, struct bpf_array, map);
  474. index = info->index & array->index_mask;
  475. if (info->percpu_value_buf)
  476. return array->pptrs[index];
  477. return array_map_elem_ptr(array, index);
  478. }
  479. static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  480. {
  481. struct bpf_iter_seq_array_map_info *info = seq->private;
  482. struct bpf_map *map = info->map;
  483. struct bpf_array *array;
  484. u32 index;
  485. ++*pos;
  486. ++info->index;
  487. if (info->index >= map->max_entries)
  488. return NULL;
  489. array = container_of(map, struct bpf_array, map);
  490. index = info->index & array->index_mask;
  491. if (info->percpu_value_buf)
  492. return array->pptrs[index];
  493. return array_map_elem_ptr(array, index);
  494. }
  495. static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
  496. {
  497. struct bpf_iter_seq_array_map_info *info = seq->private;
  498. struct bpf_iter__bpf_map_elem ctx = {};
  499. struct bpf_map *map = info->map;
  500. struct bpf_array *array = container_of(map, struct bpf_array, map);
  501. struct bpf_iter_meta meta;
  502. struct bpf_prog *prog;
  503. int off = 0, cpu = 0;
  504. void __percpu **pptr;
  505. u32 size;
  506. meta.seq = seq;
  507. prog = bpf_iter_get_info(&meta, v == NULL);
  508. if (!prog)
  509. return 0;
  510. ctx.meta = &meta;
  511. ctx.map = info->map;
  512. if (v) {
  513. ctx.key = &info->index;
  514. if (!info->percpu_value_buf) {
  515. ctx.value = v;
  516. } else {
  517. pptr = v;
  518. size = array->elem_size;
  519. for_each_possible_cpu(cpu) {
  520. copy_map_value_long(map, info->percpu_value_buf + off,
  521. per_cpu_ptr(pptr, cpu));
  522. check_and_init_map_value(map, info->percpu_value_buf + off);
  523. off += size;
  524. }
  525. ctx.value = info->percpu_value_buf;
  526. }
  527. }
  528. return bpf_iter_run_prog(prog, &ctx);
  529. }
  530. static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
  531. {
  532. return __bpf_array_map_seq_show(seq, v);
  533. }
  534. static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
  535. {
  536. if (!v)
  537. (void)__bpf_array_map_seq_show(seq, NULL);
  538. }
  539. static int bpf_iter_init_array_map(void *priv_data,
  540. struct bpf_iter_aux_info *aux)
  541. {
  542. struct bpf_iter_seq_array_map_info *seq_info = priv_data;
  543. struct bpf_map *map = aux->map;
  544. struct bpf_array *array = container_of(map, struct bpf_array, map);
  545. void *value_buf;
  546. u32 buf_size;
  547. if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
  548. buf_size = array->elem_size * num_possible_cpus();
  549. value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
  550. if (!value_buf)
  551. return -ENOMEM;
  552. seq_info->percpu_value_buf = value_buf;
  553. }
  554. /* bpf_iter_attach_map() acquires a map uref, and the uref may be
  555. * released before or in the middle of iterating map elements, so
  556. * acquire an extra map uref for iterator.
  557. */
  558. bpf_map_inc_with_uref(map);
  559. seq_info->map = map;
  560. return 0;
  561. }
  562. static void bpf_iter_fini_array_map(void *priv_data)
  563. {
  564. struct bpf_iter_seq_array_map_info *seq_info = priv_data;
  565. bpf_map_put_with_uref(seq_info->map);
  566. kfree(seq_info->percpu_value_buf);
  567. }
  568. static const struct seq_operations bpf_array_map_seq_ops = {
  569. .start = bpf_array_map_seq_start,
  570. .next = bpf_array_map_seq_next,
  571. .stop = bpf_array_map_seq_stop,
  572. .show = bpf_array_map_seq_show,
  573. };
  574. static const struct bpf_iter_seq_info iter_seq_info = {
  575. .seq_ops = &bpf_array_map_seq_ops,
  576. .init_seq_private = bpf_iter_init_array_map,
  577. .fini_seq_private = bpf_iter_fini_array_map,
  578. .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
  579. };
  580. static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
  581. void *callback_ctx, u64 flags)
  582. {
  583. u32 i, key, num_elems = 0;
  584. struct bpf_array *array;
  585. bool is_percpu;
  586. u64 ret = 0;
  587. void *val;
  588. if (flags != 0)
  589. return -EINVAL;
  590. is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
  591. array = container_of(map, struct bpf_array, map);
  592. if (is_percpu)
  593. migrate_disable();
  594. for (i = 0; i < map->max_entries; i++) {
  595. if (is_percpu)
  596. val = this_cpu_ptr(array->pptrs[i]);
  597. else
  598. val = array_map_elem_ptr(array, i);
  599. num_elems++;
  600. key = i;
  601. ret = callback_fn((u64)(long)map, (u64)(long)&key,
  602. (u64)(long)val, (u64)(long)callback_ctx, 0);
  603. /* return value: 0 - continue, 1 - stop and return */
  604. if (ret)
  605. break;
  606. }
  607. if (is_percpu)
  608. migrate_enable();
  609. return num_elems;
  610. }
  611. BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
  612. const struct bpf_map_ops array_map_ops = {
  613. .map_meta_equal = array_map_meta_equal,
  614. .map_alloc_check = array_map_alloc_check,
  615. .map_alloc = array_map_alloc,
  616. .map_free = array_map_free,
  617. .map_get_next_key = array_map_get_next_key,
  618. .map_release_uref = array_map_free_timers,
  619. .map_lookup_elem = array_map_lookup_elem,
  620. .map_update_elem = array_map_update_elem,
  621. .map_delete_elem = array_map_delete_elem,
  622. .map_gen_lookup = array_map_gen_lookup,
  623. .map_direct_value_addr = array_map_direct_value_addr,
  624. .map_direct_value_meta = array_map_direct_value_meta,
  625. .map_mmap = array_map_mmap,
  626. .map_seq_show_elem = array_map_seq_show_elem,
  627. .map_check_btf = array_map_check_btf,
  628. .map_lookup_batch = generic_map_lookup_batch,
  629. .map_update_batch = generic_map_update_batch,
  630. .map_set_for_each_callback_args = map_set_for_each_callback_args,
  631. .map_for_each_callback = bpf_for_each_array_elem,
  632. .map_btf_id = &array_map_btf_ids[0],
  633. .iter_seq_info = &iter_seq_info,
  634. };
  635. const struct bpf_map_ops percpu_array_map_ops = {
  636. .map_meta_equal = bpf_map_meta_equal,
  637. .map_alloc_check = array_map_alloc_check,
  638. .map_alloc = array_map_alloc,
  639. .map_free = array_map_free,
  640. .map_get_next_key = array_map_get_next_key,
  641. .map_lookup_elem = percpu_array_map_lookup_elem,
  642. .map_update_elem = array_map_update_elem,
  643. .map_delete_elem = array_map_delete_elem,
  644. .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
  645. .map_seq_show_elem = percpu_array_map_seq_show_elem,
  646. .map_check_btf = array_map_check_btf,
  647. .map_lookup_batch = generic_map_lookup_batch,
  648. .map_update_batch = generic_map_update_batch,
  649. .map_set_for_each_callback_args = map_set_for_each_callback_args,
  650. .map_for_each_callback = bpf_for_each_array_elem,
  651. .map_btf_id = &array_map_btf_ids[0],
  652. .iter_seq_info = &iter_seq_info,
  653. };
  654. static int fd_array_map_alloc_check(union bpf_attr *attr)
  655. {
  656. /* only file descriptors can be stored in this type of map */
  657. if (attr->value_size != sizeof(u32))
  658. return -EINVAL;
  659. /* Program read-only/write-only not supported for special maps yet. */
  660. if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
  661. return -EINVAL;
  662. return array_map_alloc_check(attr);
  663. }
  664. static void fd_array_map_free(struct bpf_map *map)
  665. {
  666. struct bpf_array *array = container_of(map, struct bpf_array, map);
  667. int i;
  668. /* make sure it's empty */
  669. for (i = 0; i < array->map.max_entries; i++)
  670. BUG_ON(array->ptrs[i] != NULL);
  671. bpf_map_area_free(array);
  672. }
  673. static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
  674. {
  675. return ERR_PTR(-EOPNOTSUPP);
  676. }
  677. /* only called from syscall */
  678. int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
  679. {
  680. void **elem, *ptr;
  681. int ret = 0;
  682. if (!map->ops->map_fd_sys_lookup_elem)
  683. return -ENOTSUPP;
  684. rcu_read_lock();
  685. elem = array_map_lookup_elem(map, key);
  686. if (elem && (ptr = READ_ONCE(*elem)))
  687. *value = map->ops->map_fd_sys_lookup_elem(ptr);
  688. else
  689. ret = -ENOENT;
  690. rcu_read_unlock();
  691. return ret;
  692. }
  693. /* only called from syscall */
  694. int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
  695. void *key, void *value, u64 map_flags)
  696. {
  697. struct bpf_array *array = container_of(map, struct bpf_array, map);
  698. void *new_ptr, *old_ptr;
  699. u32 index = *(u32 *)key, ufd;
  700. if (map_flags != BPF_ANY)
  701. return -EINVAL;
  702. if (index >= array->map.max_entries)
  703. return -E2BIG;
  704. ufd = *(u32 *)value;
  705. new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
  706. if (IS_ERR(new_ptr))
  707. return PTR_ERR(new_ptr);
  708. if (map->ops->map_poke_run) {
  709. mutex_lock(&array->aux->poke_mutex);
  710. old_ptr = xchg(array->ptrs + index, new_ptr);
  711. map->ops->map_poke_run(map, index, old_ptr, new_ptr);
  712. mutex_unlock(&array->aux->poke_mutex);
  713. } else {
  714. old_ptr = xchg(array->ptrs + index, new_ptr);
  715. }
  716. if (old_ptr)
  717. map->ops->map_fd_put_ptr(old_ptr);
  718. return 0;
  719. }
  720. static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
  721. {
  722. struct bpf_array *array = container_of(map, struct bpf_array, map);
  723. void *old_ptr;
  724. u32 index = *(u32 *)key;
  725. if (index >= array->map.max_entries)
  726. return -E2BIG;
  727. if (map->ops->map_poke_run) {
  728. mutex_lock(&array->aux->poke_mutex);
  729. old_ptr = xchg(array->ptrs + index, NULL);
  730. map->ops->map_poke_run(map, index, old_ptr, NULL);
  731. mutex_unlock(&array->aux->poke_mutex);
  732. } else {
  733. old_ptr = xchg(array->ptrs + index, NULL);
  734. }
  735. if (old_ptr) {
  736. map->ops->map_fd_put_ptr(old_ptr);
  737. return 0;
  738. } else {
  739. return -ENOENT;
  740. }
  741. }
  742. static void *prog_fd_array_get_ptr(struct bpf_map *map,
  743. struct file *map_file, int fd)
  744. {
  745. struct bpf_prog *prog = bpf_prog_get(fd);
  746. if (IS_ERR(prog))
  747. return prog;
  748. if (!bpf_prog_map_compatible(map, prog)) {
  749. bpf_prog_put(prog);
  750. return ERR_PTR(-EINVAL);
  751. }
  752. return prog;
  753. }
  754. static void prog_fd_array_put_ptr(void *ptr)
  755. {
  756. bpf_prog_put(ptr);
  757. }
  758. static u32 prog_fd_array_sys_lookup_elem(void *ptr)
  759. {
  760. return ((struct bpf_prog *)ptr)->aux->id;
  761. }
  762. /* decrement refcnt of all bpf_progs that are stored in this map */
  763. static void bpf_fd_array_map_clear(struct bpf_map *map)
  764. {
  765. struct bpf_array *array = container_of(map, struct bpf_array, map);
  766. int i;
  767. for (i = 0; i < array->map.max_entries; i++)
  768. fd_array_map_delete_elem(map, &i);
  769. }
  770. static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
  771. struct seq_file *m)
  772. {
  773. void **elem, *ptr;
  774. u32 prog_id;
  775. rcu_read_lock();
  776. elem = array_map_lookup_elem(map, key);
  777. if (elem) {
  778. ptr = READ_ONCE(*elem);
  779. if (ptr) {
  780. seq_printf(m, "%u: ", *(u32 *)key);
  781. prog_id = prog_fd_array_sys_lookup_elem(ptr);
  782. btf_type_seq_show(map->btf, map->btf_value_type_id,
  783. &prog_id, m);
  784. seq_puts(m, "\n");
  785. }
  786. }
  787. rcu_read_unlock();
  788. }
  789. struct prog_poke_elem {
  790. struct list_head list;
  791. struct bpf_prog_aux *aux;
  792. };
  793. static int prog_array_map_poke_track(struct bpf_map *map,
  794. struct bpf_prog_aux *prog_aux)
  795. {
  796. struct prog_poke_elem *elem;
  797. struct bpf_array_aux *aux;
  798. int ret = 0;
  799. aux = container_of(map, struct bpf_array, map)->aux;
  800. mutex_lock(&aux->poke_mutex);
  801. list_for_each_entry(elem, &aux->poke_progs, list) {
  802. if (elem->aux == prog_aux)
  803. goto out;
  804. }
  805. elem = kmalloc(sizeof(*elem), GFP_KERNEL);
  806. if (!elem) {
  807. ret = -ENOMEM;
  808. goto out;
  809. }
  810. INIT_LIST_HEAD(&elem->list);
  811. /* We must track the program's aux info at this point in time
  812. * since the program pointer itself may not be stable yet, see
  813. * also comment in prog_array_map_poke_run().
  814. */
  815. elem->aux = prog_aux;
  816. list_add_tail(&elem->list, &aux->poke_progs);
  817. out:
  818. mutex_unlock(&aux->poke_mutex);
  819. return ret;
  820. }
  821. static void prog_array_map_poke_untrack(struct bpf_map *map,
  822. struct bpf_prog_aux *prog_aux)
  823. {
  824. struct prog_poke_elem *elem, *tmp;
  825. struct bpf_array_aux *aux;
  826. aux = container_of(map, struct bpf_array, map)->aux;
  827. mutex_lock(&aux->poke_mutex);
  828. list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
  829. if (elem->aux == prog_aux) {
  830. list_del_init(&elem->list);
  831. kfree(elem);
  832. break;
  833. }
  834. }
  835. mutex_unlock(&aux->poke_mutex);
  836. }
  837. void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
  838. struct bpf_prog *new, struct bpf_prog *old)
  839. {
  840. WARN_ON_ONCE(1);
  841. }
  842. static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
  843. struct bpf_prog *old,
  844. struct bpf_prog *new)
  845. {
  846. struct prog_poke_elem *elem;
  847. struct bpf_array_aux *aux;
  848. aux = container_of(map, struct bpf_array, map)->aux;
  849. WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
  850. list_for_each_entry(elem, &aux->poke_progs, list) {
  851. struct bpf_jit_poke_descriptor *poke;
  852. int i;
  853. for (i = 0; i < elem->aux->size_poke_tab; i++) {
  854. poke = &elem->aux->poke_tab[i];
  855. /* Few things to be aware of:
  856. *
  857. * 1) We can only ever access aux in this context, but
  858. * not aux->prog since it might not be stable yet and
  859. * there could be danger of use after free otherwise.
  860. * 2) Initially when we start tracking aux, the program
  861. * is not JITed yet and also does not have a kallsyms
  862. * entry. We skip these as poke->tailcall_target_stable
  863. * is not active yet. The JIT will do the final fixup
  864. * before setting it stable. The various
  865. * poke->tailcall_target_stable are successively
  866. * activated, so tail call updates can arrive from here
  867. * while JIT is still finishing its final fixup for
  868. * non-activated poke entries.
  869. * 3) Also programs reaching refcount of zero while patching
  870. * is in progress is okay since we're protected under
  871. * poke_mutex and untrack the programs before the JIT
  872. * buffer is freed.
  873. */
  874. if (!READ_ONCE(poke->tailcall_target_stable))
  875. continue;
  876. if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
  877. continue;
  878. if (poke->tail_call.map != map ||
  879. poke->tail_call.key != key)
  880. continue;
  881. bpf_arch_poke_desc_update(poke, new, old);
  882. }
  883. }
  884. }
  885. static void prog_array_map_clear_deferred(struct work_struct *work)
  886. {
  887. struct bpf_map *map = container_of(work, struct bpf_array_aux,
  888. work)->map;
  889. bpf_fd_array_map_clear(map);
  890. bpf_map_put(map);
  891. }
  892. static void prog_array_map_clear(struct bpf_map *map)
  893. {
  894. struct bpf_array_aux *aux = container_of(map, struct bpf_array,
  895. map)->aux;
  896. bpf_map_inc(map);
  897. schedule_work(&aux->work);
  898. }
  899. static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
  900. {
  901. struct bpf_array_aux *aux;
  902. struct bpf_map *map;
  903. aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
  904. if (!aux)
  905. return ERR_PTR(-ENOMEM);
  906. INIT_WORK(&aux->work, prog_array_map_clear_deferred);
  907. INIT_LIST_HEAD(&aux->poke_progs);
  908. mutex_init(&aux->poke_mutex);
  909. map = array_map_alloc(attr);
  910. if (IS_ERR(map)) {
  911. kfree(aux);
  912. return map;
  913. }
  914. container_of(map, struct bpf_array, map)->aux = aux;
  915. aux->map = map;
  916. return map;
  917. }
  918. static void prog_array_map_free(struct bpf_map *map)
  919. {
  920. struct prog_poke_elem *elem, *tmp;
  921. struct bpf_array_aux *aux;
  922. aux = container_of(map, struct bpf_array, map)->aux;
  923. list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
  924. list_del_init(&elem->list);
  925. kfree(elem);
  926. }
  927. kfree(aux);
  928. fd_array_map_free(map);
  929. }
  930. /* prog_array->aux->{type,jited} is a runtime binding.
  931. * Doing static check alone in the verifier is not enough.
  932. * Thus, prog_array_map cannot be used as an inner_map
  933. * and map_meta_equal is not implemented.
  934. */
  935. const struct bpf_map_ops prog_array_map_ops = {
  936. .map_alloc_check = fd_array_map_alloc_check,
  937. .map_alloc = prog_array_map_alloc,
  938. .map_free = prog_array_map_free,
  939. .map_poke_track = prog_array_map_poke_track,
  940. .map_poke_untrack = prog_array_map_poke_untrack,
  941. .map_poke_run = prog_array_map_poke_run,
  942. .map_get_next_key = array_map_get_next_key,
  943. .map_lookup_elem = fd_array_map_lookup_elem,
  944. .map_delete_elem = fd_array_map_delete_elem,
  945. .map_fd_get_ptr = prog_fd_array_get_ptr,
  946. .map_fd_put_ptr = prog_fd_array_put_ptr,
  947. .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
  948. .map_release_uref = prog_array_map_clear,
  949. .map_seq_show_elem = prog_array_map_seq_show_elem,
  950. .map_btf_id = &array_map_btf_ids[0],
  951. };
  952. static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
  953. struct file *map_file)
  954. {
  955. struct bpf_event_entry *ee;
  956. ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
  957. if (ee) {
  958. ee->event = perf_file->private_data;
  959. ee->perf_file = perf_file;
  960. ee->map_file = map_file;
  961. }
  962. return ee;
  963. }
  964. static void __bpf_event_entry_free(struct rcu_head *rcu)
  965. {
  966. struct bpf_event_entry *ee;
  967. ee = container_of(rcu, struct bpf_event_entry, rcu);
  968. fput(ee->perf_file);
  969. kfree(ee);
  970. }
  971. static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
  972. {
  973. call_rcu(&ee->rcu, __bpf_event_entry_free);
  974. }
  975. static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
  976. struct file *map_file, int fd)
  977. {
  978. struct bpf_event_entry *ee;
  979. struct perf_event *event;
  980. struct file *perf_file;
  981. u64 value;
  982. perf_file = perf_event_get(fd);
  983. if (IS_ERR(perf_file))
  984. return perf_file;
  985. ee = ERR_PTR(-EOPNOTSUPP);
  986. event = perf_file->private_data;
  987. if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
  988. goto err_out;
  989. ee = bpf_event_entry_gen(perf_file, map_file);
  990. if (ee)
  991. return ee;
  992. ee = ERR_PTR(-ENOMEM);
  993. err_out:
  994. fput(perf_file);
  995. return ee;
  996. }
  997. static void perf_event_fd_array_put_ptr(void *ptr)
  998. {
  999. bpf_event_entry_free_rcu(ptr);
  1000. }
  1001. static void perf_event_fd_array_release(struct bpf_map *map,
  1002. struct file *map_file)
  1003. {
  1004. struct bpf_array *array = container_of(map, struct bpf_array, map);
  1005. struct bpf_event_entry *ee;
  1006. int i;
  1007. if (map->map_flags & BPF_F_PRESERVE_ELEMS)
  1008. return;
  1009. rcu_read_lock();
  1010. for (i = 0; i < array->map.max_entries; i++) {
  1011. ee = READ_ONCE(array->ptrs[i]);
  1012. if (ee && ee->map_file == map_file)
  1013. fd_array_map_delete_elem(map, &i);
  1014. }
  1015. rcu_read_unlock();
  1016. }
  1017. static void perf_event_fd_array_map_free(struct bpf_map *map)
  1018. {
  1019. if (map->map_flags & BPF_F_PRESERVE_ELEMS)
  1020. bpf_fd_array_map_clear(map);
  1021. fd_array_map_free(map);
  1022. }
  1023. const struct bpf_map_ops perf_event_array_map_ops = {
  1024. .map_meta_equal = bpf_map_meta_equal,
  1025. .map_alloc_check = fd_array_map_alloc_check,
  1026. .map_alloc = array_map_alloc,
  1027. .map_free = perf_event_fd_array_map_free,
  1028. .map_get_next_key = array_map_get_next_key,
  1029. .map_lookup_elem = fd_array_map_lookup_elem,
  1030. .map_delete_elem = fd_array_map_delete_elem,
  1031. .map_fd_get_ptr = perf_event_fd_array_get_ptr,
  1032. .map_fd_put_ptr = perf_event_fd_array_put_ptr,
  1033. .map_release = perf_event_fd_array_release,
  1034. .map_check_btf = map_check_no_btf,
  1035. .map_btf_id = &array_map_btf_ids[0],
  1036. };
  1037. #ifdef CONFIG_CGROUPS
  1038. static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
  1039. struct file *map_file /* not used */,
  1040. int fd)
  1041. {
  1042. return cgroup_get_from_fd(fd);
  1043. }
  1044. static void cgroup_fd_array_put_ptr(void *ptr)
  1045. {
  1046. /* cgroup_put free cgrp after a rcu grace period */
  1047. cgroup_put(ptr);
  1048. }
  1049. static void cgroup_fd_array_free(struct bpf_map *map)
  1050. {
  1051. bpf_fd_array_map_clear(map);
  1052. fd_array_map_free(map);
  1053. }
  1054. const struct bpf_map_ops cgroup_array_map_ops = {
  1055. .map_meta_equal = bpf_map_meta_equal,
  1056. .map_alloc_check = fd_array_map_alloc_check,
  1057. .map_alloc = array_map_alloc,
  1058. .map_free = cgroup_fd_array_free,
  1059. .map_get_next_key = array_map_get_next_key,
  1060. .map_lookup_elem = fd_array_map_lookup_elem,
  1061. .map_delete_elem = fd_array_map_delete_elem,
  1062. .map_fd_get_ptr = cgroup_fd_array_get_ptr,
  1063. .map_fd_put_ptr = cgroup_fd_array_put_ptr,
  1064. .map_check_btf = map_check_no_btf,
  1065. .map_btf_id = &array_map_btf_ids[0],
  1066. };
  1067. #endif
  1068. static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
  1069. {
  1070. struct bpf_map *map, *inner_map_meta;
  1071. inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
  1072. if (IS_ERR(inner_map_meta))
  1073. return inner_map_meta;
  1074. map = array_map_alloc(attr);
  1075. if (IS_ERR(map)) {
  1076. bpf_map_meta_free(inner_map_meta);
  1077. return map;
  1078. }
  1079. map->inner_map_meta = inner_map_meta;
  1080. return map;
  1081. }
  1082. static void array_of_map_free(struct bpf_map *map)
  1083. {
  1084. /* map->inner_map_meta is only accessed by syscall which
  1085. * is protected by fdget/fdput.
  1086. */
  1087. bpf_map_meta_free(map->inner_map_meta);
  1088. bpf_fd_array_map_clear(map);
  1089. fd_array_map_free(map);
  1090. }
  1091. static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
  1092. {
  1093. struct bpf_map **inner_map = array_map_lookup_elem(map, key);
  1094. if (!inner_map)
  1095. return NULL;
  1096. return READ_ONCE(*inner_map);
  1097. }
  1098. static int array_of_map_gen_lookup(struct bpf_map *map,
  1099. struct bpf_insn *insn_buf)
  1100. {
  1101. struct bpf_array *array = container_of(map, struct bpf_array, map);
  1102. u32 elem_size = array->elem_size;
  1103. struct bpf_insn *insn = insn_buf;
  1104. const int ret = BPF_REG_0;
  1105. const int map_ptr = BPF_REG_1;
  1106. const int index = BPF_REG_2;
  1107. *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
  1108. *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
  1109. if (!map->bypass_spec_v1) {
  1110. *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
  1111. *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
  1112. } else {
  1113. *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
  1114. }
  1115. if (is_power_of_2(elem_size))
  1116. *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
  1117. else
  1118. *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
  1119. *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
  1120. *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
  1121. *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
  1122. *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
  1123. *insn++ = BPF_MOV64_IMM(ret, 0);
  1124. return insn - insn_buf;
  1125. }
  1126. const struct bpf_map_ops array_of_maps_map_ops = {
  1127. .map_alloc_check = fd_array_map_alloc_check,
  1128. .map_alloc = array_of_map_alloc,
  1129. .map_free = array_of_map_free,
  1130. .map_get_next_key = array_map_get_next_key,
  1131. .map_lookup_elem = array_of_map_lookup_elem,
  1132. .map_delete_elem = fd_array_map_delete_elem,
  1133. .map_fd_get_ptr = bpf_map_fd_get_ptr,
  1134. .map_fd_put_ptr = bpf_map_fd_put_ptr,
  1135. .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
  1136. .map_gen_lookup = array_of_map_gen_lookup,
  1137. .map_lookup_batch = generic_map_lookup_batch,
  1138. .map_update_batch = generic_map_update_batch,
  1139. .map_check_btf = map_check_no_btf,
  1140. .map_btf_id = &array_map_btf_ids[0],
  1141. };