test_kmem.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. // SPDX-License-Identifier: GPL-2.0
  2. #define _GNU_SOURCE
  3. #include <linux/limits.h>
  4. #include <fcntl.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include <sys/stat.h>
  9. #include <sys/types.h>
  10. #include <unistd.h>
  11. #include <sys/wait.h>
  12. #include <errno.h>
  13. #include <sys/sysinfo.h>
  14. #include <pthread.h>
  15. #include "../kselftest.h"
  16. #include "cgroup_util.h"
  17. /*
  18. * Memory cgroup charging is performed using percpu batches 64 pages
  19. * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
  20. * the maximum discrepancy between charge and vmstat entries is number
  21. * of cpus multiplied by 64 pages.
  22. */
  23. #define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
  24. static int alloc_dcache(const char *cgroup, void *arg)
  25. {
  26. unsigned long i;
  27. struct stat st;
  28. char buf[128];
  29. for (i = 0; i < (unsigned long)arg; i++) {
  30. snprintf(buf, sizeof(buf),
  31. "/something-non-existent-with-a-long-name-%64lu-%d",
  32. i, getpid());
  33. stat(buf, &st);
  34. }
  35. return 0;
  36. }
  37. /*
  38. * This test allocates 100000 of negative dentries with long names.
  39. * Then it checks that "slab" in memory.stat is larger than 1M.
  40. * Then it sets memory.high to 1M and checks that at least 1/2
  41. * of slab memory has been reclaimed.
  42. */
  43. static int test_kmem_basic(const char *root)
  44. {
  45. int ret = KSFT_FAIL;
  46. char *cg = NULL;
  47. long slab0, slab1, current;
  48. cg = cg_name(root, "kmem_basic_test");
  49. if (!cg)
  50. goto cleanup;
  51. if (cg_create(cg))
  52. goto cleanup;
  53. if (cg_run(cg, alloc_dcache, (void *)100000))
  54. goto cleanup;
  55. slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
  56. if (slab0 < (1 << 20))
  57. goto cleanup;
  58. cg_write(cg, "memory.high", "1M");
  59. slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
  60. if (slab1 <= 0)
  61. goto cleanup;
  62. current = cg_read_long(cg, "memory.current");
  63. if (current <= 0)
  64. goto cleanup;
  65. if (slab1 < slab0 / 2 && current < slab0 / 2)
  66. ret = KSFT_PASS;
  67. cleanup:
  68. cg_destroy(cg);
  69. free(cg);
  70. return ret;
  71. }
  72. static void *alloc_kmem_fn(void *arg)
  73. {
  74. alloc_dcache(NULL, (void *)100);
  75. return NULL;
  76. }
  77. static int alloc_kmem_smp(const char *cgroup, void *arg)
  78. {
  79. int nr_threads = 2 * get_nprocs();
  80. pthread_t *tinfo;
  81. unsigned long i;
  82. int ret = -1;
  83. tinfo = calloc(nr_threads, sizeof(pthread_t));
  84. if (tinfo == NULL)
  85. return -1;
  86. for (i = 0; i < nr_threads; i++) {
  87. if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
  88. (void *)i)) {
  89. free(tinfo);
  90. return -1;
  91. }
  92. }
  93. for (i = 0; i < nr_threads; i++) {
  94. ret = pthread_join(tinfo[i], NULL);
  95. if (ret)
  96. break;
  97. }
  98. free(tinfo);
  99. return ret;
  100. }
  101. static int cg_run_in_subcgroups(const char *parent,
  102. int (*fn)(const char *cgroup, void *arg),
  103. void *arg, int times)
  104. {
  105. char *child;
  106. int i;
  107. for (i = 0; i < times; i++) {
  108. child = cg_name_indexed(parent, "child", i);
  109. if (!child)
  110. return -1;
  111. if (cg_create(child)) {
  112. cg_destroy(child);
  113. free(child);
  114. return -1;
  115. }
  116. if (cg_run(child, fn, NULL)) {
  117. cg_destroy(child);
  118. free(child);
  119. return -1;
  120. }
  121. cg_destroy(child);
  122. free(child);
  123. }
  124. return 0;
  125. }
  126. /*
  127. * The test creates and destroys a large number of cgroups. In each cgroup it
  128. * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
  129. * threads. Then it checks the sanity of numbers on the parent level:
  130. * the total size of the cgroups should be roughly equal to
  131. * anon + file + slab + kernel_stack.
  132. */
  133. static int test_kmem_memcg_deletion(const char *root)
  134. {
  135. long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum;
  136. int ret = KSFT_FAIL;
  137. char *parent;
  138. parent = cg_name(root, "kmem_memcg_deletion_test");
  139. if (!parent)
  140. goto cleanup;
  141. if (cg_create(parent))
  142. goto cleanup;
  143. if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  144. goto cleanup;
  145. if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
  146. goto cleanup;
  147. current = cg_read_long(parent, "memory.current");
  148. slab = cg_read_key_long(parent, "memory.stat", "slab ");
  149. anon = cg_read_key_long(parent, "memory.stat", "anon ");
  150. file = cg_read_key_long(parent, "memory.stat", "file ");
  151. kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
  152. pagetables = cg_read_key_long(parent, "memory.stat", "pagetables ");
  153. percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
  154. sock = cg_read_key_long(parent, "memory.stat", "sock ");
  155. if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
  156. kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0)
  157. goto cleanup;
  158. sum = slab + anon + file + kernel_stack + pagetables + percpu + sock;
  159. if (abs(sum - current) < MAX_VMSTAT_ERROR) {
  160. ret = KSFT_PASS;
  161. } else {
  162. printf("memory.current = %ld\n", current);
  163. printf("slab + anon + file + kernel_stack = %ld\n", sum);
  164. printf("slab = %ld\n", slab);
  165. printf("anon = %ld\n", anon);
  166. printf("file = %ld\n", file);
  167. printf("kernel_stack = %ld\n", kernel_stack);
  168. printf("pagetables = %ld\n", pagetables);
  169. printf("percpu = %ld\n", percpu);
  170. printf("sock = %ld\n", sock);
  171. }
  172. cleanup:
  173. cg_destroy(parent);
  174. free(parent);
  175. return ret;
  176. }
  177. /*
  178. * The test reads the entire /proc/kpagecgroup. If the operation went
  179. * successfully (and the kernel didn't panic), the test is treated as passed.
  180. */
  181. static int test_kmem_proc_kpagecgroup(const char *root)
  182. {
  183. unsigned long buf[128];
  184. int ret = KSFT_FAIL;
  185. ssize_t len;
  186. int fd;
  187. fd = open("/proc/kpagecgroup", O_RDONLY);
  188. if (fd < 0)
  189. return ret;
  190. do {
  191. len = read(fd, buf, sizeof(buf));
  192. } while (len > 0);
  193. if (len == 0)
  194. ret = KSFT_PASS;
  195. close(fd);
  196. return ret;
  197. }
  198. static void *pthread_wait_fn(void *arg)
  199. {
  200. sleep(100);
  201. return NULL;
  202. }
  203. static int spawn_1000_threads(const char *cgroup, void *arg)
  204. {
  205. int nr_threads = 1000;
  206. pthread_t *tinfo;
  207. unsigned long i;
  208. long stack;
  209. int ret = -1;
  210. tinfo = calloc(nr_threads, sizeof(pthread_t));
  211. if (tinfo == NULL)
  212. return -1;
  213. for (i = 0; i < nr_threads; i++) {
  214. if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
  215. (void *)i)) {
  216. free(tinfo);
  217. return(-1);
  218. }
  219. }
  220. stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
  221. if (stack >= 4096 * 1000)
  222. ret = 0;
  223. free(tinfo);
  224. return ret;
  225. }
  226. /*
  227. * The test spawns a process, which spawns 1000 threads. Then it checks
  228. * that memory.stat's kernel_stack is at least 1000 pages large.
  229. */
  230. static int test_kmem_kernel_stacks(const char *root)
  231. {
  232. int ret = KSFT_FAIL;
  233. char *cg = NULL;
  234. cg = cg_name(root, "kmem_kernel_stacks_test");
  235. if (!cg)
  236. goto cleanup;
  237. if (cg_create(cg))
  238. goto cleanup;
  239. if (cg_run(cg, spawn_1000_threads, NULL))
  240. goto cleanup;
  241. ret = KSFT_PASS;
  242. cleanup:
  243. cg_destroy(cg);
  244. free(cg);
  245. return ret;
  246. }
  247. /*
  248. * This test sequentionally creates 30 child cgroups, allocates some
  249. * kernel memory in each of them, and deletes them. Then it checks
  250. * that the number of dying cgroups on the parent level is 0.
  251. */
  252. static int test_kmem_dead_cgroups(const char *root)
  253. {
  254. int ret = KSFT_FAIL;
  255. char *parent;
  256. long dead;
  257. int i;
  258. parent = cg_name(root, "kmem_dead_cgroups_test");
  259. if (!parent)
  260. goto cleanup;
  261. if (cg_create(parent))
  262. goto cleanup;
  263. if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  264. goto cleanup;
  265. if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
  266. goto cleanup;
  267. for (i = 0; i < 5; i++) {
  268. dead = cg_read_key_long(parent, "cgroup.stat",
  269. "nr_dying_descendants ");
  270. if (dead == 0) {
  271. ret = KSFT_PASS;
  272. break;
  273. }
  274. /*
  275. * Reclaiming cgroups might take some time,
  276. * let's wait a bit and repeat.
  277. */
  278. sleep(1);
  279. }
  280. cleanup:
  281. cg_destroy(parent);
  282. free(parent);
  283. return ret;
  284. }
  285. /*
  286. * This test creates a sub-tree with 1000 memory cgroups.
  287. * Then it checks that the memory.current on the parent level
  288. * is greater than 0 and approximates matches the percpu value
  289. * from memory.stat.
  290. */
  291. static int test_percpu_basic(const char *root)
  292. {
  293. int ret = KSFT_FAIL;
  294. char *parent, *child;
  295. long current, percpu;
  296. int i;
  297. parent = cg_name(root, "percpu_basic_test");
  298. if (!parent)
  299. goto cleanup;
  300. if (cg_create(parent))
  301. goto cleanup;
  302. if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  303. goto cleanup;
  304. for (i = 0; i < 1000; i++) {
  305. child = cg_name_indexed(parent, "child", i);
  306. if (!child)
  307. return -1;
  308. if (cg_create(child))
  309. goto cleanup_children;
  310. free(child);
  311. }
  312. current = cg_read_long(parent, "memory.current");
  313. percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
  314. if (current > 0 && percpu > 0 && abs(current - percpu) <
  315. MAX_VMSTAT_ERROR)
  316. ret = KSFT_PASS;
  317. else
  318. printf("memory.current %ld\npercpu %ld\n",
  319. current, percpu);
  320. cleanup_children:
  321. for (i = 0; i < 1000; i++) {
  322. child = cg_name_indexed(parent, "child", i);
  323. cg_destroy(child);
  324. free(child);
  325. }
  326. cleanup:
  327. cg_destroy(parent);
  328. free(parent);
  329. return ret;
  330. }
  331. #define T(x) { x, #x }
  332. struct kmem_test {
  333. int (*fn)(const char *root);
  334. const char *name;
  335. } tests[] = {
  336. T(test_kmem_basic),
  337. T(test_kmem_memcg_deletion),
  338. T(test_kmem_proc_kpagecgroup),
  339. T(test_kmem_kernel_stacks),
  340. T(test_kmem_dead_cgroups),
  341. T(test_percpu_basic),
  342. };
  343. #undef T
  344. int main(int argc, char **argv)
  345. {
  346. char root[PATH_MAX];
  347. int i, ret = EXIT_SUCCESS;
  348. if (cg_find_unified_root(root, sizeof(root)))
  349. ksft_exit_skip("cgroup v2 isn't mounted\n");
  350. /*
  351. * Check that memory controller is available:
  352. * memory is listed in cgroup.controllers
  353. */
  354. if (cg_read_strstr(root, "cgroup.controllers", "memory"))
  355. ksft_exit_skip("memory controller isn't available\n");
  356. if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
  357. if (cg_write(root, "cgroup.subtree_control", "+memory"))
  358. ksft_exit_skip("Failed to set memory controller\n");
  359. for (i = 0; i < ARRAY_SIZE(tests); i++) {
  360. switch (tests[i].fn(root)) {
  361. case KSFT_PASS:
  362. ksft_test_result_pass("%s\n", tests[i].name);
  363. break;
  364. case KSFT_SKIP:
  365. ksft_test_result_skip("%s\n", tests[i].name);
  366. break;
  367. default:
  368. ret = EXIT_FAILURE;
  369. ksft_test_result_fail("%s\n", tests[i].name);
  370. break;
  371. }
  372. }
  373. return ret;
  374. }