test_vmalloc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Test module for stress and analyze performance of vmalloc allocator.
  4. * (C) 2018 Uladzislau Rezki (Sony) <[email protected]>
  5. */
  6. #include <linux/init.h>
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/vmalloc.h>
  10. #include <linux/random.h>
  11. #include <linux/kthread.h>
  12. #include <linux/moduleparam.h>
  13. #include <linux/completion.h>
  14. #include <linux/delay.h>
  15. #include <linux/rwsem.h>
  16. #include <linux/mm.h>
  17. #include <linux/rcupdate.h>
  18. #include <linux/slab.h>
  19. #define __param(type, name, init, msg) \
  20. static type name = init; \
  21. module_param(name, type, 0444); \
  22. MODULE_PARM_DESC(name, msg) \
  23. __param(int, nr_threads, 0,
  24. "Number of workers to perform tests(min: 1 max: USHRT_MAX)");
  25. __param(bool, sequential_test_order, false,
  26. "Use sequential stress tests order");
  27. __param(int, test_repeat_count, 1,
  28. "Set test repeat counter");
  29. __param(int, test_loop_count, 1000000,
  30. "Set test loop counter");
  31. __param(int, nr_pages, 0,
  32. "Set number of pages for fix_size_alloc_test(default: 1)");
  33. __param(int, run_test_mask, INT_MAX,
  34. "Set tests specified in the mask.\n\n"
  35. "\t\tid: 1, name: fix_size_alloc_test\n"
  36. "\t\tid: 2, name: full_fit_alloc_test\n"
  37. "\t\tid: 4, name: long_busy_list_alloc_test\n"
  38. "\t\tid: 8, name: random_size_alloc_test\n"
  39. "\t\tid: 16, name: fix_align_alloc_test\n"
  40. "\t\tid: 32, name: random_size_align_alloc_test\n"
  41. "\t\tid: 64, name: align_shift_alloc_test\n"
  42. "\t\tid: 128, name: pcpu_alloc_test\n"
  43. "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n"
  44. "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n"
  45. /* Add a new test case description here. */
  46. );
  47. /*
  48. * Read write semaphore for synchronization of setup
  49. * phase that is done in main thread and workers.
  50. */
  51. static DECLARE_RWSEM(prepare_for_test_rwsem);
  52. /*
  53. * Completion tracking for worker threads.
  54. */
  55. static DECLARE_COMPLETION(test_all_done_comp);
  56. static atomic_t test_n_undone = ATOMIC_INIT(0);
  57. static inline void
  58. test_report_one_done(void)
  59. {
  60. if (atomic_dec_and_test(&test_n_undone))
  61. complete(&test_all_done_comp);
  62. }
  63. static int random_size_align_alloc_test(void)
  64. {
  65. unsigned long size, align;
  66. unsigned int rnd;
  67. void *ptr;
  68. int i;
  69. for (i = 0; i < test_loop_count; i++) {
  70. rnd = get_random_u8();
  71. /*
  72. * Maximum 1024 pages, if PAGE_SIZE is 4096.
  73. */
  74. align = 1 << (rnd % 23);
  75. /*
  76. * Maximum 10 pages.
  77. */
  78. size = ((rnd % 10) + 1) * PAGE_SIZE;
  79. ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
  80. __builtin_return_address(0));
  81. if (!ptr)
  82. return -1;
  83. vfree(ptr);
  84. }
  85. return 0;
  86. }
  87. /*
  88. * This test case is supposed to be failed.
  89. */
  90. static int align_shift_alloc_test(void)
  91. {
  92. unsigned long align;
  93. void *ptr;
  94. int i;
  95. for (i = 0; i < BITS_PER_LONG; i++) {
  96. align = ((unsigned long) 1) << i;
  97. ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
  98. __builtin_return_address(0));
  99. if (!ptr)
  100. return -1;
  101. vfree(ptr);
  102. }
  103. return 0;
  104. }
  105. static int fix_align_alloc_test(void)
  106. {
  107. void *ptr;
  108. int i;
  109. for (i = 0; i < test_loop_count; i++) {
  110. ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
  111. GFP_KERNEL | __GFP_ZERO, 0,
  112. __builtin_return_address(0));
  113. if (!ptr)
  114. return -1;
  115. vfree(ptr);
  116. }
  117. return 0;
  118. }
  119. static int random_size_alloc_test(void)
  120. {
  121. unsigned int n;
  122. void *p;
  123. int i;
  124. for (i = 0; i < test_loop_count; i++) {
  125. n = prandom_u32_max(100) + 1;
  126. p = vmalloc(n * PAGE_SIZE);
  127. if (!p)
  128. return -1;
  129. *((__u8 *)p) = 1;
  130. vfree(p);
  131. }
  132. return 0;
  133. }
  134. static int long_busy_list_alloc_test(void)
  135. {
  136. void *ptr_1, *ptr_2;
  137. void **ptr;
  138. int rv = -1;
  139. int i;
  140. ptr = vmalloc(sizeof(void *) * 15000);
  141. if (!ptr)
  142. return rv;
  143. for (i = 0; i < 15000; i++)
  144. ptr[i] = vmalloc(1 * PAGE_SIZE);
  145. for (i = 0; i < test_loop_count; i++) {
  146. ptr_1 = vmalloc(100 * PAGE_SIZE);
  147. if (!ptr_1)
  148. goto leave;
  149. ptr_2 = vmalloc(1 * PAGE_SIZE);
  150. if (!ptr_2) {
  151. vfree(ptr_1);
  152. goto leave;
  153. }
  154. *((__u8 *)ptr_1) = 0;
  155. *((__u8 *)ptr_2) = 1;
  156. vfree(ptr_1);
  157. vfree(ptr_2);
  158. }
  159. /* Success */
  160. rv = 0;
  161. leave:
  162. for (i = 0; i < 15000; i++)
  163. vfree(ptr[i]);
  164. vfree(ptr);
  165. return rv;
  166. }
  167. static int full_fit_alloc_test(void)
  168. {
  169. void **ptr, **junk_ptr, *tmp;
  170. int junk_length;
  171. int rv = -1;
  172. int i;
  173. junk_length = fls(num_online_cpus());
  174. junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
  175. ptr = vmalloc(sizeof(void *) * junk_length);
  176. if (!ptr)
  177. return rv;
  178. junk_ptr = vmalloc(sizeof(void *) * junk_length);
  179. if (!junk_ptr) {
  180. vfree(ptr);
  181. return rv;
  182. }
  183. for (i = 0; i < junk_length; i++) {
  184. ptr[i] = vmalloc(1 * PAGE_SIZE);
  185. junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
  186. }
  187. for (i = 0; i < junk_length; i++)
  188. vfree(junk_ptr[i]);
  189. for (i = 0; i < test_loop_count; i++) {
  190. tmp = vmalloc(1 * PAGE_SIZE);
  191. if (!tmp)
  192. goto error;
  193. *((__u8 *)tmp) = 1;
  194. vfree(tmp);
  195. }
  196. /* Success */
  197. rv = 0;
  198. error:
  199. for (i = 0; i < junk_length; i++)
  200. vfree(ptr[i]);
  201. vfree(ptr);
  202. vfree(junk_ptr);
  203. return rv;
  204. }
  205. static int fix_size_alloc_test(void)
  206. {
  207. void *ptr;
  208. int i;
  209. for (i = 0; i < test_loop_count; i++) {
  210. ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE);
  211. if (!ptr)
  212. return -1;
  213. *((__u8 *)ptr) = 0;
  214. vfree(ptr);
  215. }
  216. return 0;
  217. }
  218. static int
  219. pcpu_alloc_test(void)
  220. {
  221. int rv = 0;
  222. #ifndef CONFIG_NEED_PER_CPU_KM
  223. void __percpu **pcpu;
  224. size_t size, align;
  225. int i;
  226. pcpu = vmalloc(sizeof(void __percpu *) * 35000);
  227. if (!pcpu)
  228. return -1;
  229. for (i = 0; i < 35000; i++) {
  230. size = prandom_u32_max(PAGE_SIZE / 4) + 1;
  231. /*
  232. * Maximum PAGE_SIZE
  233. */
  234. align = 1 << (prandom_u32_max(11) + 1);
  235. pcpu[i] = __alloc_percpu(size, align);
  236. if (!pcpu[i])
  237. rv = -1;
  238. }
  239. for (i = 0; i < 35000; i++)
  240. free_percpu(pcpu[i]);
  241. vfree(pcpu);
  242. #endif
  243. return rv;
  244. }
  245. struct test_kvfree_rcu {
  246. struct rcu_head rcu;
  247. unsigned char array[20];
  248. };
  249. static int
  250. kvfree_rcu_1_arg_vmalloc_test(void)
  251. {
  252. struct test_kvfree_rcu *p;
  253. int i;
  254. for (i = 0; i < test_loop_count; i++) {
  255. p = vmalloc(1 * PAGE_SIZE);
  256. if (!p)
  257. return -1;
  258. p->array[0] = 'a';
  259. kvfree_rcu(p);
  260. }
  261. return 0;
  262. }
  263. static int
  264. kvfree_rcu_2_arg_vmalloc_test(void)
  265. {
  266. struct test_kvfree_rcu *p;
  267. int i;
  268. for (i = 0; i < test_loop_count; i++) {
  269. p = vmalloc(1 * PAGE_SIZE);
  270. if (!p)
  271. return -1;
  272. p->array[0] = 'a';
  273. kvfree_rcu(p, rcu);
  274. }
  275. return 0;
  276. }
  277. struct test_case_desc {
  278. const char *test_name;
  279. int (*test_func)(void);
  280. };
  281. static struct test_case_desc test_case_array[] = {
  282. { "fix_size_alloc_test", fix_size_alloc_test },
  283. { "full_fit_alloc_test", full_fit_alloc_test },
  284. { "long_busy_list_alloc_test", long_busy_list_alloc_test },
  285. { "random_size_alloc_test", random_size_alloc_test },
  286. { "fix_align_alloc_test", fix_align_alloc_test },
  287. { "random_size_align_alloc_test", random_size_align_alloc_test },
  288. { "align_shift_alloc_test", align_shift_alloc_test },
  289. { "pcpu_alloc_test", pcpu_alloc_test },
  290. { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
  291. { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
  292. /* Add a new test case here. */
  293. };
  294. struct test_case_data {
  295. int test_failed;
  296. int test_passed;
  297. u64 time;
  298. };
  299. static struct test_driver {
  300. struct task_struct *task;
  301. struct test_case_data data[ARRAY_SIZE(test_case_array)];
  302. unsigned long start;
  303. unsigned long stop;
  304. } *tdriver;
  305. static void shuffle_array(int *arr, int n)
  306. {
  307. int i, j;
  308. for (i = n - 1; i > 0; i--) {
  309. /* Cut the range. */
  310. j = prandom_u32_max(i);
  311. /* Swap indexes. */
  312. swap(arr[i], arr[j]);
  313. }
  314. }
  315. static int test_func(void *private)
  316. {
  317. struct test_driver *t = private;
  318. int random_array[ARRAY_SIZE(test_case_array)];
  319. int index, i, j;
  320. ktime_t kt;
  321. u64 delta;
  322. for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
  323. random_array[i] = i;
  324. if (!sequential_test_order)
  325. shuffle_array(random_array, ARRAY_SIZE(test_case_array));
  326. /*
  327. * Block until initialization is done.
  328. */
  329. down_read(&prepare_for_test_rwsem);
  330. t->start = get_cycles();
  331. for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
  332. index = random_array[i];
  333. /*
  334. * Skip tests if run_test_mask has been specified.
  335. */
  336. if (!((run_test_mask & (1 << index)) >> index))
  337. continue;
  338. kt = ktime_get();
  339. for (j = 0; j < test_repeat_count; j++) {
  340. if (!test_case_array[index].test_func())
  341. t->data[index].test_passed++;
  342. else
  343. t->data[index].test_failed++;
  344. }
  345. /*
  346. * Take an average time that test took.
  347. */
  348. delta = (u64) ktime_us_delta(ktime_get(), kt);
  349. do_div(delta, (u32) test_repeat_count);
  350. t->data[index].time = delta;
  351. }
  352. t->stop = get_cycles();
  353. up_read(&prepare_for_test_rwsem);
  354. test_report_one_done();
  355. /*
  356. * Wait for the kthread_stop() call.
  357. */
  358. while (!kthread_should_stop())
  359. msleep(10);
  360. return 0;
  361. }
  362. static int
  363. init_test_configurtion(void)
  364. {
  365. /*
  366. * A maximum number of workers is defined as hard-coded
  367. * value and set to USHRT_MAX. We add such gap just in
  368. * case and for potential heavy stressing.
  369. */
  370. nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX);
  371. /* Allocate the space for test instances. */
  372. tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL);
  373. if (tdriver == NULL)
  374. return -1;
  375. if (test_repeat_count <= 0)
  376. test_repeat_count = 1;
  377. if (test_loop_count <= 0)
  378. test_loop_count = 1;
  379. return 0;
  380. }
  381. static void do_concurrent_test(void)
  382. {
  383. int i, ret;
  384. /*
  385. * Set some basic configurations plus sanity check.
  386. */
  387. ret = init_test_configurtion();
  388. if (ret < 0)
  389. return;
  390. /*
  391. * Put on hold all workers.
  392. */
  393. down_write(&prepare_for_test_rwsem);
  394. for (i = 0; i < nr_threads; i++) {
  395. struct test_driver *t = &tdriver[i];
  396. t->task = kthread_run(test_func, t, "vmalloc_test/%d", i);
  397. if (!IS_ERR(t->task))
  398. /* Success. */
  399. atomic_inc(&test_n_undone);
  400. else
  401. pr_err("Failed to start %d kthread\n", i);
  402. }
  403. /*
  404. * Now let the workers do their job.
  405. */
  406. up_write(&prepare_for_test_rwsem);
  407. /*
  408. * Sleep quiet until all workers are done with 1 second
  409. * interval. Since the test can take a lot of time we
  410. * can run into a stack trace of the hung task. That is
  411. * why we go with completion_timeout and HZ value.
  412. */
  413. do {
  414. ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
  415. } while (!ret);
  416. for (i = 0; i < nr_threads; i++) {
  417. struct test_driver *t = &tdriver[i];
  418. int j;
  419. if (!IS_ERR(t->task))
  420. kthread_stop(t->task);
  421. for (j = 0; j < ARRAY_SIZE(test_case_array); j++) {
  422. if (!((run_test_mask & (1 << j)) >> j))
  423. continue;
  424. pr_info(
  425. "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
  426. test_case_array[j].test_name,
  427. t->data[j].test_passed,
  428. t->data[j].test_failed,
  429. test_repeat_count, test_loop_count,
  430. t->data[j].time);
  431. }
  432. pr_info("All test took worker%d=%lu cycles\n",
  433. i, t->stop - t->start);
  434. }
  435. kvfree(tdriver);
  436. }
  437. static int vmalloc_test_init(void)
  438. {
  439. do_concurrent_test();
  440. return -EAGAIN; /* Fail will directly unload the module */
  441. }
  442. static void vmalloc_test_exit(void)
  443. {
  444. }
  445. module_init(vmalloc_test_init)
  446. module_exit(vmalloc_test_exit)
  447. MODULE_LICENSE("GPL");
  448. MODULE_AUTHOR("Uladzislau Rezki");
  449. MODULE_DESCRIPTION("vmalloc test module");