fuse_test.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * memfd GUP test-case
  4. * This tests memfd interactions with get_user_pages(). We require the
  5. * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
  6. * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
  7. * read() on files in that file-system will pin the receive-buffer pages for at
  8. * least 1s via get_user_pages().
  9. *
  10. * We use this trick to race ADD_SEALS against a write on a memfd object. The
  11. * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
  12. * the read() syscall with our memory-mapped memfd object as receive buffer to
  13. * force the kernel to write into our memfd object.
  14. */
  15. #define _GNU_SOURCE
  16. #define __EXPORTED_HEADERS__
  17. #include <errno.h>
  18. #include <inttypes.h>
  19. #include <limits.h>
  20. #include <linux/falloc.h>
  21. #include <fcntl.h>
  22. #include <linux/memfd.h>
  23. #include <linux/types.h>
  24. #include <sched.h>
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <signal.h>
  28. #include <string.h>
  29. #include <sys/mman.h>
  30. #include <sys/stat.h>
  31. #include <sys/syscall.h>
  32. #include <sys/wait.h>
  33. #include <unistd.h>
  34. #include "common.h"
  35. #define MFD_DEF_SIZE 8192
  36. #define STACK_SIZE 65536
  37. static size_t mfd_def_size = MFD_DEF_SIZE;
  38. static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
  39. {
  40. int r, fd;
  41. fd = sys_memfd_create(name, flags);
  42. if (fd < 0) {
  43. printf("memfd_create(\"%s\", %u) failed: %m\n",
  44. name, flags);
  45. abort();
  46. }
  47. r = ftruncate(fd, sz);
  48. if (r < 0) {
  49. printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
  50. abort();
  51. }
  52. return fd;
  53. }
  54. static __u64 mfd_assert_get_seals(int fd)
  55. {
  56. long r;
  57. r = fcntl(fd, F_GET_SEALS);
  58. if (r < 0) {
  59. printf("GET_SEALS(%d) failed: %m\n", fd);
  60. abort();
  61. }
  62. return r;
  63. }
  64. static void mfd_assert_has_seals(int fd, __u64 seals)
  65. {
  66. __u64 s;
  67. s = mfd_assert_get_seals(fd);
  68. if (s != seals) {
  69. printf("%llu != %llu = GET_SEALS(%d)\n",
  70. (unsigned long long)seals, (unsigned long long)s, fd);
  71. abort();
  72. }
  73. }
  74. static void mfd_assert_add_seals(int fd, __u64 seals)
  75. {
  76. long r;
  77. __u64 s;
  78. s = mfd_assert_get_seals(fd);
  79. r = fcntl(fd, F_ADD_SEALS, seals);
  80. if (r < 0) {
  81. printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
  82. fd, (unsigned long long)s, (unsigned long long)seals);
  83. abort();
  84. }
  85. }
  86. static int mfd_busy_add_seals(int fd, __u64 seals)
  87. {
  88. long r;
  89. __u64 s;
  90. r = fcntl(fd, F_GET_SEALS);
  91. if (r < 0)
  92. s = 0;
  93. else
  94. s = r;
  95. r = fcntl(fd, F_ADD_SEALS, seals);
  96. if (r < 0 && errno != EBUSY) {
  97. printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
  98. fd, (unsigned long long)s, (unsigned long long)seals);
  99. abort();
  100. }
  101. return r;
  102. }
  103. static void *mfd_assert_mmap_shared(int fd)
  104. {
  105. void *p;
  106. p = mmap(NULL,
  107. mfd_def_size,
  108. PROT_READ | PROT_WRITE,
  109. MAP_SHARED,
  110. fd,
  111. 0);
  112. if (p == MAP_FAILED) {
  113. printf("mmap() failed: %m\n");
  114. abort();
  115. }
  116. return p;
  117. }
  118. static void *mfd_assert_mmap_private(int fd)
  119. {
  120. void *p;
  121. p = mmap(NULL,
  122. mfd_def_size,
  123. PROT_READ | PROT_WRITE,
  124. MAP_PRIVATE,
  125. fd,
  126. 0);
  127. if (p == MAP_FAILED) {
  128. printf("mmap() failed: %m\n");
  129. abort();
  130. }
  131. return p;
  132. }
  133. static int global_mfd = -1;
  134. static void *global_p = NULL;
  135. static int sealing_thread_fn(void *arg)
  136. {
  137. int sig, r;
  138. /*
  139. * This thread first waits 200ms so any pending operation in the parent
  140. * is correctly started. After that, it tries to seal @global_mfd as
  141. * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
  142. * that memory mapped object still ongoing.
  143. * We then wait one more second and try sealing again. This time it
  144. * must succeed as there shouldn't be anyone else pinning the pages.
  145. */
  146. /* wait 200ms for FUSE-request to be active */
  147. usleep(200000);
  148. /* unmount mapping before sealing to avoid i_mmap_writable failures */
  149. munmap(global_p, mfd_def_size);
  150. /* Try sealing the global file; expect EBUSY or success. Current
  151. * kernels will never succeed, but in the future, kernels might
  152. * implement page-replacements or other fancy ways to avoid racing
  153. * writes. */
  154. r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
  155. if (r >= 0) {
  156. printf("HURRAY! This kernel fixed GUP races!\n");
  157. } else {
  158. /* wait 1s more so the FUSE-request is done */
  159. sleep(1);
  160. /* try sealing the global file again */
  161. mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
  162. }
  163. return 0;
  164. }
  165. static pid_t spawn_sealing_thread(void)
  166. {
  167. uint8_t *stack;
  168. pid_t pid;
  169. stack = malloc(STACK_SIZE);
  170. if (!stack) {
  171. printf("malloc(STACK_SIZE) failed: %m\n");
  172. abort();
  173. }
  174. pid = clone(sealing_thread_fn,
  175. stack + STACK_SIZE,
  176. SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
  177. NULL);
  178. if (pid < 0) {
  179. printf("clone() failed: %m\n");
  180. abort();
  181. }
  182. return pid;
  183. }
  184. static void join_sealing_thread(pid_t pid)
  185. {
  186. waitpid(pid, NULL, 0);
  187. }
  188. int main(int argc, char **argv)
  189. {
  190. char *zero;
  191. int fd, mfd, r;
  192. void *p;
  193. int was_sealed;
  194. pid_t pid;
  195. if (argc < 2) {
  196. printf("error: please pass path to file in fuse_mnt mount-point\n");
  197. abort();
  198. }
  199. if (argc >= 3) {
  200. if (!strcmp(argv[2], "hugetlbfs")) {
  201. unsigned long hpage_size = default_huge_page_size();
  202. if (!hpage_size) {
  203. printf("Unable to determine huge page size\n");
  204. abort();
  205. }
  206. hugetlbfs_test = 1;
  207. mfd_def_size = hpage_size * 2;
  208. } else {
  209. printf("Unknown option: %s\n", argv[2]);
  210. abort();
  211. }
  212. }
  213. zero = calloc(sizeof(*zero), mfd_def_size);
  214. /* open FUSE memfd file for GUP testing */
  215. printf("opening: %s\n", argv[1]);
  216. fd = open(argv[1], O_RDONLY | O_CLOEXEC);
  217. if (fd < 0) {
  218. printf("cannot open(\"%s\"): %m\n", argv[1]);
  219. abort();
  220. }
  221. /* create new memfd-object */
  222. mfd = mfd_assert_new("kern_memfd_fuse",
  223. mfd_def_size,
  224. MFD_CLOEXEC | MFD_ALLOW_SEALING);
  225. /* mmap memfd-object for writing */
  226. p = mfd_assert_mmap_shared(mfd);
  227. /* pass mfd+mapping to a separate sealing-thread which tries to seal
  228. * the memfd objects with SEAL_WRITE while we write into it */
  229. global_mfd = mfd;
  230. global_p = p;
  231. pid = spawn_sealing_thread();
  232. /* Use read() on the FUSE file to read into our memory-mapped memfd
  233. * object. This races the other thread which tries to seal the
  234. * memfd-object.
  235. * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
  236. * This guarantees that the receive-buffer is pinned for 1s until the
  237. * data is written into it. The racing ADD_SEALS should thus fail as
  238. * the pages are still pinned. */
  239. r = read(fd, p, mfd_def_size);
  240. if (r < 0) {
  241. printf("read() failed: %m\n");
  242. abort();
  243. } else if (!r) {
  244. printf("unexpected EOF on read()\n");
  245. abort();
  246. }
  247. was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
  248. /* Wait for sealing-thread to finish and verify that it
  249. * successfully sealed the file after the second try. */
  250. join_sealing_thread(pid);
  251. mfd_assert_has_seals(mfd, F_SEAL_WRITE);
  252. /* *IF* the memfd-object was sealed at the time our read() returned,
  253. * then the kernel did a page-replacement or canceled the read() (or
  254. * whatever magic it did..). In that case, the memfd object is still
  255. * all zero.
  256. * In case the memfd-object was *not* sealed, the read() was successfull
  257. * and the memfd object must *not* be all zero.
  258. * Note that in real scenarios, there might be a mixture of both, but
  259. * in this test-cases, we have explicit 200ms delays which should be
  260. * enough to avoid any in-flight writes. */
  261. p = mfd_assert_mmap_private(mfd);
  262. if (was_sealed && memcmp(p, zero, mfd_def_size)) {
  263. printf("memfd sealed during read() but data not discarded\n");
  264. abort();
  265. } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
  266. printf("memfd sealed after read() but data discarded\n");
  267. abort();
  268. }
  269. close(mfd);
  270. close(fd);
  271. printf("fuse: DONE\n");
  272. free(zero);
  273. return 0;
  274. }