test_lru_dist.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2016 Facebook
  4. */
  5. #define _GNU_SOURCE
  6. #include <linux/types.h>
  7. #include <stdio.h>
  8. #include <unistd.h>
  9. #include <linux/bpf.h>
  10. #include <errno.h>
  11. #include <string.h>
  12. #include <assert.h>
  13. #include <sched.h>
  14. #include <sys/wait.h>
  15. #include <sys/stat.h>
  16. #include <fcntl.h>
  17. #include <stdlib.h>
  18. #include <time.h>
  19. #include <bpf/bpf.h>
  20. #include "bpf_util.h"
  21. #define min(a, b) ((a) < (b) ? (a) : (b))
  22. #ifndef offsetof
  23. # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
  24. #endif
  25. #define container_of(ptr, type, member) ({ \
  26. const typeof( ((type *)0)->member ) *__mptr = (ptr); \
  27. (type *)( (char *)__mptr - offsetof(type,member) );})
  28. static int nr_cpus;
  29. static unsigned long long *dist_keys;
  30. static unsigned int dist_key_counts;
  31. struct list_head {
  32. struct list_head *next, *prev;
  33. };
  34. static inline void INIT_LIST_HEAD(struct list_head *list)
  35. {
  36. list->next = list;
  37. list->prev = list;
  38. }
  39. static inline int list_empty(const struct list_head *head)
  40. {
  41. return head->next == head;
  42. }
  43. static inline void __list_add(struct list_head *new,
  44. struct list_head *prev,
  45. struct list_head *next)
  46. {
  47. next->prev = new;
  48. new->next = next;
  49. new->prev = prev;
  50. prev->next = new;
  51. }
  52. static inline void list_add(struct list_head *new, struct list_head *head)
  53. {
  54. __list_add(new, head, head->next);
  55. }
  56. static inline void __list_del(struct list_head *prev, struct list_head *next)
  57. {
  58. next->prev = prev;
  59. prev->next = next;
  60. }
  61. static inline void __list_del_entry(struct list_head *entry)
  62. {
  63. __list_del(entry->prev, entry->next);
  64. }
  65. static inline void list_move(struct list_head *list, struct list_head *head)
  66. {
  67. __list_del_entry(list);
  68. list_add(list, head);
  69. }
  70. #define list_entry(ptr, type, member) \
  71. container_of(ptr, type, member)
  72. #define list_last_entry(ptr, type, member) \
  73. list_entry((ptr)->prev, type, member)
  74. struct pfect_lru_node {
  75. struct list_head list;
  76. unsigned long long key;
  77. };
  78. struct pfect_lru {
  79. struct list_head list;
  80. struct pfect_lru_node *free_nodes;
  81. unsigned int cur_size;
  82. unsigned int lru_size;
  83. unsigned int nr_unique;
  84. unsigned int nr_misses;
  85. unsigned int total;
  86. int map_fd;
  87. };
  88. static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
  89. unsigned int nr_possible_elems)
  90. {
  91. lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
  92. sizeof(unsigned long long),
  93. sizeof(struct pfect_lru_node *),
  94. nr_possible_elems, NULL);
  95. assert(lru->map_fd != -1);
  96. lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
  97. assert(lru->free_nodes);
  98. INIT_LIST_HEAD(&lru->list);
  99. lru->cur_size = 0;
  100. lru->lru_size = lru_size;
  101. lru->nr_unique = lru->nr_misses = lru->total = 0;
  102. }
  103. static void pfect_lru_destroy(struct pfect_lru *lru)
  104. {
  105. close(lru->map_fd);
  106. free(lru->free_nodes);
  107. }
  108. static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
  109. unsigned long long key)
  110. {
  111. struct pfect_lru_node *node = NULL;
  112. int seen = 0;
  113. lru->total++;
  114. if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
  115. if (node) {
  116. list_move(&node->list, &lru->list);
  117. return 1;
  118. }
  119. seen = 1;
  120. }
  121. if (lru->cur_size < lru->lru_size) {
  122. node = &lru->free_nodes[lru->cur_size++];
  123. INIT_LIST_HEAD(&node->list);
  124. } else {
  125. struct pfect_lru_node *null_node = NULL;
  126. node = list_last_entry(&lru->list,
  127. struct pfect_lru_node,
  128. list);
  129. bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
  130. }
  131. node->key = key;
  132. list_move(&node->list, &lru->list);
  133. lru->nr_misses++;
  134. if (seen) {
  135. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
  136. } else {
  137. lru->nr_unique++;
  138. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
  139. }
  140. return seen;
  141. }
  142. static unsigned int read_keys(const char *dist_file,
  143. unsigned long long **keys)
  144. {
  145. struct stat fst;
  146. unsigned long long *retkeys;
  147. unsigned int counts = 0;
  148. int dist_fd;
  149. char *b, *l;
  150. int i;
  151. dist_fd = open(dist_file, 0);
  152. assert(dist_fd != -1);
  153. assert(fstat(dist_fd, &fst) == 0);
  154. b = malloc(fst.st_size);
  155. assert(b);
  156. assert(read(dist_fd, b, fst.st_size) == fst.st_size);
  157. close(dist_fd);
  158. for (i = 0; i < fst.st_size; i++) {
  159. if (b[i] == '\n')
  160. counts++;
  161. }
  162. counts++; /* in case the last line has no \n */
  163. retkeys = malloc(counts * sizeof(unsigned long long));
  164. assert(retkeys);
  165. counts = 0;
  166. for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
  167. retkeys[counts++] = strtoull(l, NULL, 10);
  168. free(b);
  169. *keys = retkeys;
  170. return counts;
  171. }
  172. static int create_map(int map_type, int map_flags, unsigned int size)
  173. {
  174. LIBBPF_OPTS(bpf_map_create_opts, opts,
  175. .map_flags = map_flags,
  176. );
  177. int map_fd;
  178. map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
  179. sizeof(unsigned long long), size, &opts);
  180. if (map_fd == -1)
  181. perror("bpf_create_map");
  182. return map_fd;
  183. }
  184. static int sched_next_online(int pid, int next_to_try)
  185. {
  186. cpu_set_t cpuset;
  187. if (next_to_try == nr_cpus)
  188. return -1;
  189. while (next_to_try < nr_cpus) {
  190. CPU_ZERO(&cpuset);
  191. CPU_SET(next_to_try++, &cpuset);
  192. if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
  193. break;
  194. }
  195. return next_to_try;
  196. }
  197. static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
  198. void *data)
  199. {
  200. int next_sched_cpu = 0;
  201. pid_t pid[tasks];
  202. int i;
  203. for (i = 0; i < tasks; i++) {
  204. pid[i] = fork();
  205. if (pid[i] == 0) {
  206. next_sched_cpu = sched_next_online(0, next_sched_cpu);
  207. fn(i, data);
  208. exit(0);
  209. } else if (pid[i] == -1) {
  210. printf("couldn't spawn #%d process\n", i);
  211. exit(1);
  212. }
  213. /* It is mostly redundant and just allow the parent
  214. * process to update next_shced_cpu for the next child
  215. * process
  216. */
  217. next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
  218. }
  219. for (i = 0; i < tasks; i++) {
  220. int status;
  221. assert(waitpid(pid[i], &status, 0) == pid[i]);
  222. assert(status == 0);
  223. }
  224. }
  225. static void do_test_lru_dist(int task, void *data)
  226. {
  227. unsigned int nr_misses = 0;
  228. struct pfect_lru pfect_lru;
  229. unsigned long long key, value = 1234;
  230. unsigned int i;
  231. unsigned int lru_map_fd = ((unsigned int *)data)[0];
  232. unsigned int lru_size = ((unsigned int *)data)[1];
  233. unsigned long long key_offset = task * dist_key_counts;
  234. pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
  235. for (i = 0; i < dist_key_counts; i++) {
  236. key = dist_keys[i] + key_offset;
  237. pfect_lru_lookup_or_insert(&pfect_lru, key);
  238. if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
  239. continue;
  240. if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
  241. printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
  242. key, errno);
  243. assert(0);
  244. }
  245. nr_misses++;
  246. }
  247. printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  248. task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
  249. dist_key_counts);
  250. printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  251. task, pfect_lru.nr_unique, pfect_lru.total,
  252. pfect_lru.nr_misses, pfect_lru.total);
  253. pfect_lru_destroy(&pfect_lru);
  254. close(lru_map_fd);
  255. }
  256. static void test_parallel_lru_dist(int map_type, int map_flags,
  257. int nr_tasks, unsigned int lru_size)
  258. {
  259. int child_data[2];
  260. int lru_map_fd;
  261. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  262. map_flags);
  263. if (map_flags & BPF_F_NO_COMMON_LRU)
  264. lru_map_fd = create_map(map_type, map_flags,
  265. nr_cpus * lru_size);
  266. else
  267. lru_map_fd = create_map(map_type, map_flags,
  268. nr_tasks * lru_size);
  269. assert(lru_map_fd != -1);
  270. child_data[0] = lru_map_fd;
  271. child_data[1] = lru_size;
  272. run_parallel(nr_tasks, do_test_lru_dist, child_data);
  273. close(lru_map_fd);
  274. }
  275. static void test_lru_loss0(int map_type, int map_flags)
  276. {
  277. unsigned long long key, value[nr_cpus];
  278. unsigned int old_unused_losses = 0;
  279. unsigned int new_unused_losses = 0;
  280. unsigned int used_losses = 0;
  281. int map_fd;
  282. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  283. map_flags);
  284. assert(sched_next_online(0, 0) != -1);
  285. if (map_flags & BPF_F_NO_COMMON_LRU)
  286. map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
  287. else
  288. map_fd = create_map(map_type, map_flags, 900);
  289. assert(map_fd != -1);
  290. value[0] = 1234;
  291. for (key = 1; key <= 1000; key++) {
  292. int start_key, end_key;
  293. assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
  294. start_key = 101;
  295. end_key = min(key, 900);
  296. while (start_key <= end_key) {
  297. bpf_map_lookup_elem(map_fd, &start_key, value);
  298. start_key++;
  299. }
  300. }
  301. for (key = 1; key <= 1000; key++) {
  302. if (bpf_map_lookup_elem(map_fd, &key, value)) {
  303. if (key <= 100)
  304. old_unused_losses++;
  305. else if (key <= 900)
  306. used_losses++;
  307. else
  308. new_unused_losses++;
  309. }
  310. }
  311. close(map_fd);
  312. printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
  313. "newer-elem-losses:%d(/100)\n",
  314. old_unused_losses, used_losses, new_unused_losses);
  315. }
  316. static void test_lru_loss1(int map_type, int map_flags)
  317. {
  318. unsigned long long key, value[nr_cpus];
  319. int map_fd;
  320. unsigned int nr_losses = 0;
  321. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  322. map_flags);
  323. assert(sched_next_online(0, 0) != -1);
  324. if (map_flags & BPF_F_NO_COMMON_LRU)
  325. map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
  326. else
  327. map_fd = create_map(map_type, map_flags, 1000);
  328. assert(map_fd != -1);
  329. value[0] = 1234;
  330. for (key = 1; key <= 1000; key++)
  331. assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
  332. for (key = 1; key <= 1000; key++) {
  333. if (bpf_map_lookup_elem(map_fd, &key, value))
  334. nr_losses++;
  335. }
  336. close(map_fd);
  337. printf("nr_losses:%d(/1000)\n", nr_losses);
  338. }
  339. static void do_test_parallel_lru_loss(int task, void *data)
  340. {
  341. const unsigned int nr_stable_elems = 1000;
  342. const unsigned int nr_repeats = 100000;
  343. int map_fd = *(int *)data;
  344. unsigned long long stable_base;
  345. unsigned long long key, value[nr_cpus];
  346. unsigned long long next_ins_key;
  347. unsigned int nr_losses = 0;
  348. unsigned int i;
  349. stable_base = task * nr_repeats * 2 + 1;
  350. next_ins_key = stable_base;
  351. value[0] = 1234;
  352. for (i = 0; i < nr_stable_elems; i++) {
  353. assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
  354. BPF_NOEXIST) == 0);
  355. next_ins_key++;
  356. }
  357. for (i = 0; i < nr_repeats; i++) {
  358. int rn;
  359. rn = rand();
  360. if (rn % 10) {
  361. key = rn % nr_stable_elems + stable_base;
  362. bpf_map_lookup_elem(map_fd, &key, value);
  363. } else {
  364. bpf_map_update_elem(map_fd, &next_ins_key, value,
  365. BPF_NOEXIST);
  366. next_ins_key++;
  367. }
  368. }
  369. key = stable_base;
  370. for (i = 0; i < nr_stable_elems; i++) {
  371. if (bpf_map_lookup_elem(map_fd, &key, value))
  372. nr_losses++;
  373. key++;
  374. }
  375. printf(" task:%d nr_losses:%u\n", task, nr_losses);
  376. }
  377. static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
  378. {
  379. int map_fd;
  380. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  381. map_flags);
  382. /* Give 20% more than the active working set */
  383. if (map_flags & BPF_F_NO_COMMON_LRU)
  384. map_fd = create_map(map_type, map_flags,
  385. nr_cpus * (1000 + 200));
  386. else
  387. map_fd = create_map(map_type, map_flags,
  388. nr_tasks * (1000 + 200));
  389. assert(map_fd != -1);
  390. run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
  391. close(map_fd);
  392. }
  393. int main(int argc, char **argv)
  394. {
  395. int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
  396. const char *dist_file;
  397. int nr_tasks = 1;
  398. int lru_size;
  399. int f;
  400. if (argc < 4) {
  401. printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
  402. argv[0]);
  403. return -1;
  404. }
  405. dist_file = argv[1];
  406. lru_size = atoi(argv[2]);
  407. nr_tasks = atoi(argv[3]);
  408. setbuf(stdout, NULL);
  409. srand(time(NULL));
  410. nr_cpus = bpf_num_possible_cpus();
  411. assert(nr_cpus != -1);
  412. printf("nr_cpus:%d\n\n", nr_cpus);
  413. nr_tasks = min(nr_tasks, nr_cpus);
  414. dist_key_counts = read_keys(dist_file, &dist_keys);
  415. if (!dist_key_counts) {
  416. printf("%s has no key\n", dist_file);
  417. return -1;
  418. }
  419. for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
  420. test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  421. test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  422. test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  423. nr_tasks);
  424. test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  425. nr_tasks, lru_size);
  426. printf("\n");
  427. }
  428. free(dist_keys);
  429. return 0;
  430. }