cgroup_util.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #define _GNU_SOURCE
  3. #include <errno.h>
  4. #include <fcntl.h>
  5. #include <linux/limits.h>
  6. #include <poll.h>
  7. #include <signal.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <sys/inotify.h>
  12. #include <sys/stat.h>
  13. #include <sys/types.h>
  14. #include <sys/wait.h>
  15. #include <unistd.h>
  16. #include "cgroup_util.h"
  17. #include "../clone3/clone3_selftests.h"
  18. /* Returns read len on success, or -errno on failure. */
  19. static ssize_t read_text(const char *path, char *buf, size_t max_len)
  20. {
  21. ssize_t len;
  22. int fd;
  23. fd = open(path, O_RDONLY);
  24. if (fd < 0)
  25. return -errno;
  26. len = read(fd, buf, max_len - 1);
  27. if (len >= 0)
  28. buf[len] = 0;
  29. close(fd);
  30. return len < 0 ? -errno : len;
  31. }
  32. /* Returns written len on success, or -errno on failure. */
  33. static ssize_t write_text(const char *path, char *buf, ssize_t len)
  34. {
  35. int fd;
  36. fd = open(path, O_WRONLY | O_APPEND);
  37. if (fd < 0)
  38. return -errno;
  39. len = write(fd, buf, len);
  40. close(fd);
  41. return len < 0 ? -errno : len;
  42. }
  43. char *cg_name(const char *root, const char *name)
  44. {
  45. size_t len = strlen(root) + strlen(name) + 2;
  46. char *ret = malloc(len);
  47. snprintf(ret, len, "%s/%s", root, name);
  48. return ret;
  49. }
  50. char *cg_name_indexed(const char *root, const char *name, int index)
  51. {
  52. size_t len = strlen(root) + strlen(name) + 10;
  53. char *ret = malloc(len);
  54. snprintf(ret, len, "%s/%s_%d", root, name, index);
  55. return ret;
  56. }
  57. char *cg_control(const char *cgroup, const char *control)
  58. {
  59. size_t len = strlen(cgroup) + strlen(control) + 2;
  60. char *ret = malloc(len);
  61. snprintf(ret, len, "%s/%s", cgroup, control);
  62. return ret;
  63. }
  64. /* Returns 0 on success, or -errno on failure. */
  65. int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
  66. {
  67. char path[PATH_MAX];
  68. ssize_t ret;
  69. snprintf(path, sizeof(path), "%s/%s", cgroup, control);
  70. ret = read_text(path, buf, len);
  71. return ret >= 0 ? 0 : ret;
  72. }
  73. int cg_read_strcmp(const char *cgroup, const char *control,
  74. const char *expected)
  75. {
  76. size_t size;
  77. char *buf;
  78. int ret;
  79. /* Handle the case of comparing against empty string */
  80. if (!expected)
  81. return -1;
  82. else
  83. size = strlen(expected) + 1;
  84. buf = malloc(size);
  85. if (!buf)
  86. return -1;
  87. if (cg_read(cgroup, control, buf, size)) {
  88. free(buf);
  89. return -1;
  90. }
  91. ret = strcmp(expected, buf);
  92. free(buf);
  93. return ret;
  94. }
  95. int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
  96. {
  97. char buf[PAGE_SIZE];
  98. if (cg_read(cgroup, control, buf, sizeof(buf)))
  99. return -1;
  100. return strstr(buf, needle) ? 0 : -1;
  101. }
  102. long cg_read_long(const char *cgroup, const char *control)
  103. {
  104. char buf[128];
  105. if (cg_read(cgroup, control, buf, sizeof(buf)))
  106. return -1;
  107. return atol(buf);
  108. }
  109. long cg_read_key_long(const char *cgroup, const char *control, const char *key)
  110. {
  111. char buf[PAGE_SIZE];
  112. char *ptr;
  113. if (cg_read(cgroup, control, buf, sizeof(buf)))
  114. return -1;
  115. ptr = strstr(buf, key);
  116. if (!ptr)
  117. return -1;
  118. return atol(ptr + strlen(key));
  119. }
  120. long cg_read_lc(const char *cgroup, const char *control)
  121. {
  122. char buf[PAGE_SIZE];
  123. const char delim[] = "\n";
  124. char *line;
  125. long cnt = 0;
  126. if (cg_read(cgroup, control, buf, sizeof(buf)))
  127. return -1;
  128. for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
  129. cnt++;
  130. return cnt;
  131. }
  132. /* Returns 0 on success, or -errno on failure. */
  133. int cg_write(const char *cgroup, const char *control, char *buf)
  134. {
  135. char path[PATH_MAX];
  136. ssize_t len = strlen(buf), ret;
  137. snprintf(path, sizeof(path), "%s/%s", cgroup, control);
  138. ret = write_text(path, buf, len);
  139. return ret == len ? 0 : ret;
  140. }
  141. int cg_write_numeric(const char *cgroup, const char *control, long value)
  142. {
  143. char buf[64];
  144. int ret;
  145. ret = sprintf(buf, "%lu", value);
  146. if (ret < 0)
  147. return ret;
  148. return cg_write(cgroup, control, buf);
  149. }
  150. int cg_find_unified_root(char *root, size_t len)
  151. {
  152. char buf[10 * PAGE_SIZE];
  153. char *fs, *mount, *type;
  154. const char delim[] = "\n\t ";
  155. if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
  156. return -1;
  157. /*
  158. * Example:
  159. * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
  160. */
  161. for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
  162. mount = strtok(NULL, delim);
  163. type = strtok(NULL, delim);
  164. strtok(NULL, delim);
  165. strtok(NULL, delim);
  166. strtok(NULL, delim);
  167. if (strcmp(type, "cgroup2") == 0) {
  168. strncpy(root, mount, len);
  169. return 0;
  170. }
  171. }
  172. return -1;
  173. }
  174. int cg_create(const char *cgroup)
  175. {
  176. return mkdir(cgroup, 0755);
  177. }
  178. int cg_wait_for_proc_count(const char *cgroup, int count)
  179. {
  180. char buf[10 * PAGE_SIZE] = {0};
  181. int attempts;
  182. char *ptr;
  183. for (attempts = 10; attempts >= 0; attempts--) {
  184. int nr = 0;
  185. if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
  186. break;
  187. for (ptr = buf; *ptr; ptr++)
  188. if (*ptr == '\n')
  189. nr++;
  190. if (nr >= count)
  191. return 0;
  192. usleep(100000);
  193. }
  194. return -1;
  195. }
  196. int cg_killall(const char *cgroup)
  197. {
  198. char buf[PAGE_SIZE];
  199. char *ptr = buf;
  200. /* If cgroup.kill exists use it. */
  201. if (!cg_write(cgroup, "cgroup.kill", "1"))
  202. return 0;
  203. if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
  204. return -1;
  205. while (ptr < buf + sizeof(buf)) {
  206. int pid = strtol(ptr, &ptr, 10);
  207. if (pid == 0)
  208. break;
  209. if (*ptr)
  210. ptr++;
  211. else
  212. break;
  213. if (kill(pid, SIGKILL))
  214. return -1;
  215. }
  216. return 0;
  217. }
  218. int cg_destroy(const char *cgroup)
  219. {
  220. int ret;
  221. retry:
  222. ret = rmdir(cgroup);
  223. if (ret && errno == EBUSY) {
  224. cg_killall(cgroup);
  225. usleep(100);
  226. goto retry;
  227. }
  228. if (ret && errno == ENOENT)
  229. ret = 0;
  230. return ret;
  231. }
  232. int cg_enter(const char *cgroup, int pid)
  233. {
  234. char pidbuf[64];
  235. snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
  236. return cg_write(cgroup, "cgroup.procs", pidbuf);
  237. }
  238. int cg_enter_current(const char *cgroup)
  239. {
  240. return cg_write(cgroup, "cgroup.procs", "0");
  241. }
  242. int cg_enter_current_thread(const char *cgroup)
  243. {
  244. return cg_write(cgroup, "cgroup.threads", "0");
  245. }
  246. int cg_run(const char *cgroup,
  247. int (*fn)(const char *cgroup, void *arg),
  248. void *arg)
  249. {
  250. int pid, retcode;
  251. pid = fork();
  252. if (pid < 0) {
  253. return pid;
  254. } else if (pid == 0) {
  255. char buf[64];
  256. snprintf(buf, sizeof(buf), "%d", getpid());
  257. if (cg_write(cgroup, "cgroup.procs", buf))
  258. exit(EXIT_FAILURE);
  259. exit(fn(cgroup, arg));
  260. } else {
  261. waitpid(pid, &retcode, 0);
  262. if (WIFEXITED(retcode))
  263. return WEXITSTATUS(retcode);
  264. else
  265. return -1;
  266. }
  267. }
  268. pid_t clone_into_cgroup(int cgroup_fd)
  269. {
  270. #ifdef CLONE_ARGS_SIZE_VER2
  271. pid_t pid;
  272. struct __clone_args args = {
  273. .flags = CLONE_INTO_CGROUP,
  274. .exit_signal = SIGCHLD,
  275. .cgroup = cgroup_fd,
  276. };
  277. pid = sys_clone3(&args, sizeof(struct __clone_args));
  278. /*
  279. * Verify that this is a genuine test failure:
  280. * ENOSYS -> clone3() not available
  281. * E2BIG -> CLONE_INTO_CGROUP not available
  282. */
  283. if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
  284. goto pretend_enosys;
  285. return pid;
  286. pretend_enosys:
  287. #endif
  288. errno = ENOSYS;
  289. return -ENOSYS;
  290. }
  291. int clone_reap(pid_t pid, int options)
  292. {
  293. int ret;
  294. siginfo_t info = {
  295. .si_signo = 0,
  296. };
  297. again:
  298. ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
  299. if (ret < 0) {
  300. if (errno == EINTR)
  301. goto again;
  302. return -1;
  303. }
  304. if (options & WEXITED) {
  305. if (WIFEXITED(info.si_status))
  306. return WEXITSTATUS(info.si_status);
  307. }
  308. if (options & WSTOPPED) {
  309. if (WIFSTOPPED(info.si_status))
  310. return WSTOPSIG(info.si_status);
  311. }
  312. if (options & WCONTINUED) {
  313. if (WIFCONTINUED(info.si_status))
  314. return 0;
  315. }
  316. return -1;
  317. }
  318. int dirfd_open_opath(const char *dir)
  319. {
  320. return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
  321. }
  322. #define close_prot_errno(fd) \
  323. if (fd >= 0) { \
  324. int _e_ = errno; \
  325. close(fd); \
  326. errno = _e_; \
  327. }
  328. static int clone_into_cgroup_run_nowait(const char *cgroup,
  329. int (*fn)(const char *cgroup, void *arg),
  330. void *arg)
  331. {
  332. int cgroup_fd;
  333. pid_t pid;
  334. cgroup_fd = dirfd_open_opath(cgroup);
  335. if (cgroup_fd < 0)
  336. return -1;
  337. pid = clone_into_cgroup(cgroup_fd);
  338. close_prot_errno(cgroup_fd);
  339. if (pid == 0)
  340. exit(fn(cgroup, arg));
  341. return pid;
  342. }
  343. int cg_run_nowait(const char *cgroup,
  344. int (*fn)(const char *cgroup, void *arg),
  345. void *arg)
  346. {
  347. int pid;
  348. pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
  349. if (pid > 0)
  350. return pid;
  351. /* Genuine test failure. */
  352. if (pid < 0 && errno != ENOSYS)
  353. return -1;
  354. pid = fork();
  355. if (pid == 0) {
  356. char buf[64];
  357. snprintf(buf, sizeof(buf), "%d", getpid());
  358. if (cg_write(cgroup, "cgroup.procs", buf))
  359. exit(EXIT_FAILURE);
  360. exit(fn(cgroup, arg));
  361. }
  362. return pid;
  363. }
  364. int get_temp_fd(void)
  365. {
  366. return open(".", O_TMPFILE | O_RDWR | O_EXCL);
  367. }
  368. int alloc_pagecache(int fd, size_t size)
  369. {
  370. char buf[PAGE_SIZE];
  371. struct stat st;
  372. int i;
  373. if (fstat(fd, &st))
  374. goto cleanup;
  375. size += st.st_size;
  376. if (ftruncate(fd, size))
  377. goto cleanup;
  378. for (i = 0; i < size; i += sizeof(buf))
  379. read(fd, buf, sizeof(buf));
  380. return 0;
  381. cleanup:
  382. return -1;
  383. }
  384. int alloc_anon(const char *cgroup, void *arg)
  385. {
  386. size_t size = (unsigned long)arg;
  387. char *buf, *ptr;
  388. buf = malloc(size);
  389. for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
  390. *ptr = 0;
  391. free(buf);
  392. return 0;
  393. }
  394. int is_swap_enabled(void)
  395. {
  396. char buf[PAGE_SIZE];
  397. const char delim[] = "\n";
  398. int cnt = 0;
  399. char *line;
  400. if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
  401. return -1;
  402. for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
  403. cnt++;
  404. return cnt > 1;
  405. }
  406. int set_oom_adj_score(int pid, int score)
  407. {
  408. char path[PATH_MAX];
  409. int fd, len;
  410. sprintf(path, "/proc/%d/oom_score_adj", pid);
  411. fd = open(path, O_WRONLY | O_APPEND);
  412. if (fd < 0)
  413. return fd;
  414. len = dprintf(fd, "%d", score);
  415. if (len < 0) {
  416. close(fd);
  417. return len;
  418. }
  419. close(fd);
  420. return 0;
  421. }
  422. int proc_mount_contains(const char *option)
  423. {
  424. char buf[4 * PAGE_SIZE];
  425. ssize_t read;
  426. read = read_text("/proc/mounts", buf, sizeof(buf));
  427. if (read < 0)
  428. return read;
  429. return strstr(buf, option) != NULL;
  430. }
  431. ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
  432. {
  433. char path[PATH_MAX];
  434. ssize_t ret;
  435. if (!pid)
  436. snprintf(path, sizeof(path), "/proc/%s/%s",
  437. thread ? "thread-self" : "self", item);
  438. else
  439. snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
  440. ret = read_text(path, buf, size);
  441. return ret < 0 ? -1 : ret;
  442. }
  443. int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
  444. {
  445. char buf[PAGE_SIZE];
  446. if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
  447. return -1;
  448. return strstr(buf, needle) ? 0 : -1;
  449. }
  450. int clone_into_cgroup_run_wait(const char *cgroup)
  451. {
  452. int cgroup_fd;
  453. pid_t pid;
  454. cgroup_fd = dirfd_open_opath(cgroup);
  455. if (cgroup_fd < 0)
  456. return -1;
  457. pid = clone_into_cgroup(cgroup_fd);
  458. close_prot_errno(cgroup_fd);
  459. if (pid < 0)
  460. return -1;
  461. if (pid == 0)
  462. exit(EXIT_SUCCESS);
  463. /*
  464. * We don't care whether this fails. We only care whether the initial
  465. * clone succeeded.
  466. */
  467. (void)clone_reap(pid, WEXITED);
  468. return 0;
  469. }
  470. static int __prepare_for_wait(const char *cgroup, const char *filename)
  471. {
  472. int fd, ret = -1;
  473. fd = inotify_init1(0);
  474. if (fd == -1)
  475. return fd;
  476. ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
  477. if (ret == -1) {
  478. close(fd);
  479. fd = -1;
  480. }
  481. return fd;
  482. }
  483. int cg_prepare_for_wait(const char *cgroup)
  484. {
  485. return __prepare_for_wait(cgroup, "cgroup.events");
  486. }
  487. int memcg_prepare_for_wait(const char *cgroup)
  488. {
  489. return __prepare_for_wait(cgroup, "memory.events");
  490. }
  491. int cg_wait_for(int fd)
  492. {
  493. int ret = -1;
  494. struct pollfd fds = {
  495. .fd = fd,
  496. .events = POLLIN,
  497. };
  498. while (true) {
  499. ret = poll(&fds, 1, 10000);
  500. if (ret == -1) {
  501. if (errno == EINTR)
  502. continue;
  503. break;
  504. }
  505. if (ret > 0 && fds.revents & POLLIN) {
  506. ret = 0;
  507. break;
  508. }
  509. }
  510. return ret;
  511. }