userfaultfd.c 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Stress userfaultfd syscall.
  4. *
  5. * Copyright (C) 2015 Red Hat, Inc.
  6. *
  7. * This test allocates two virtual areas and bounces the physical
  8. * memory across the two virtual areas (from area_src to area_dst)
  9. * using userfaultfd.
  10. *
  11. * There are three threads running per CPU:
  12. *
  13. * 1) one per-CPU thread takes a per-page pthread_mutex in a random
  14. * page of the area_dst (while the physical page may still be in
  15. * area_src), and increments a per-page counter in the same page,
  16. * and checks its value against a verification region.
  17. *
  18. * 2) another per-CPU thread handles the userfaults generated by
  19. * thread 1 above. userfaultfd blocking reads or poll() modes are
  20. * exercised interleaved.
  21. *
  22. * 3) one last per-CPU thread transfers the memory in the background
  23. * at maximum bandwidth (if not already transferred by thread
  24. * 2). Each cpu thread takes cares of transferring a portion of the
  25. * area.
  26. *
  27. * When all threads of type 3 completed the transfer, one bounce is
  28. * complete. area_src and area_dst are then swapped. All threads are
  29. * respawned and so the bounce is immediately restarted in the
  30. * opposite direction.
  31. *
  32. * per-CPU threads 1 by triggering userfaults inside
  33. * pthread_mutex_lock will also verify the atomicity of the memory
  34. * transfer (UFFDIO_COPY).
  35. */
  36. #define _GNU_SOURCE
  37. #include <stdio.h>
  38. #include <errno.h>
  39. #include <unistd.h>
  40. #include <stdlib.h>
  41. #include <sys/types.h>
  42. #include <sys/stat.h>
  43. #include <fcntl.h>
  44. #include <time.h>
  45. #include <signal.h>
  46. #include <poll.h>
  47. #include <string.h>
  48. #include <linux/mman.h>
  49. #include <sys/mman.h>
  50. #include <sys/syscall.h>
  51. #include <sys/ioctl.h>
  52. #include <sys/wait.h>
  53. #include <pthread.h>
  54. #include <linux/userfaultfd.h>
  55. #include <setjmp.h>
  56. #include <stdbool.h>
  57. #include <assert.h>
  58. #include <inttypes.h>
  59. #include <stdint.h>
  60. #include <sys/random.h>
  61. #include "../kselftest.h"
  62. #include "vm_util.h"
  63. #ifdef __NR_userfaultfd
  64. static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size;
  65. #define BOUNCE_RANDOM (1<<0)
  66. #define BOUNCE_RACINGFAULTS (1<<1)
  67. #define BOUNCE_VERIFY (1<<2)
  68. #define BOUNCE_POLL (1<<3)
  69. static int bounces;
  70. #define TEST_ANON 1
  71. #define TEST_HUGETLB 2
  72. #define TEST_SHMEM 3
  73. static int test_type;
  74. #define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
  75. #define BASE_PMD_ADDR ((void *)(1UL << 30))
  76. /* test using /dev/userfaultfd, instead of userfaultfd(2) */
  77. static bool test_dev_userfaultfd;
  78. /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
  79. #define ALARM_INTERVAL_SECS 10
  80. static volatile bool test_uffdio_copy_eexist = true;
  81. static volatile bool test_uffdio_zeropage_eexist = true;
  82. /* Whether to test uffd write-protection */
  83. static bool test_uffdio_wp = true;
  84. /* Whether to test uffd minor faults */
  85. static bool test_uffdio_minor = false;
  86. static bool map_shared;
  87. static int shm_fd;
  88. static int huge_fd;
  89. static unsigned long long *count_verify;
  90. static int uffd = -1;
  91. static int uffd_flags, finished, *pipefd;
  92. static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
  93. static char *zeropage;
  94. pthread_attr_t attr;
  95. static bool test_collapse;
  96. /* Userfaultfd test statistics */
  97. struct uffd_stats {
  98. int cpu;
  99. unsigned long missing_faults;
  100. unsigned long wp_faults;
  101. unsigned long minor_faults;
  102. };
  103. /* pthread_mutex_t starts at page offset 0 */
  104. #define area_mutex(___area, ___nr) \
  105. ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
  106. /*
  107. * count is placed in the page after pthread_mutex_t naturally aligned
  108. * to avoid non alignment faults on non-x86 archs.
  109. */
  110. #define area_count(___area, ___nr) \
  111. ((volatile unsigned long long *) ((unsigned long) \
  112. ((___area) + (___nr)*page_size + \
  113. sizeof(pthread_mutex_t) + \
  114. sizeof(unsigned long long) - 1) & \
  115. ~(unsigned long)(sizeof(unsigned long long) \
  116. - 1)))
  117. #define swap(a, b) \
  118. do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
  119. #define factor_of_2(x) ((x) ^ ((x) & ((x) - 1)))
  120. const char *examples =
  121. "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
  122. "./userfaultfd anon 100 99999\n\n"
  123. "# Run the same anonymous memory test, but using /dev/userfaultfd:\n"
  124. "./userfaultfd anon:dev 100 99999\n\n"
  125. "# Run share memory test on 1GiB region with 99 bounces:\n"
  126. "./userfaultfd shmem 1000 99\n\n"
  127. "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
  128. "./userfaultfd hugetlb 256 50\n\n"
  129. "# Run the same hugetlb test but using shared file:\n"
  130. "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
  131. "# 10MiB-~6GiB 999 bounces anonymous test, "
  132. "continue forever unless an error triggers\n"
  133. "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
  134. static void usage(void)
  135. {
  136. fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
  137. "[hugetlbfs_file]\n\n");
  138. fprintf(stderr, "Supported <test type>: anon, hugetlb, "
  139. "hugetlb_shared, shmem\n\n");
  140. fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. "
  141. "Supported mods:\n");
  142. fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n");
  143. fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n");
  144. fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n"
  145. "memory\n");
  146. fprintf(stderr, "\nExample test mod usage:\n");
  147. fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n");
  148. fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n");
  149. fprintf(stderr, "Examples:\n\n");
  150. fprintf(stderr, "%s", examples);
  151. exit(1);
  152. }
  153. #define _err(fmt, ...) \
  154. do { \
  155. int ret = errno; \
  156. fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
  157. fprintf(stderr, " (errno=%d, line=%d)\n", \
  158. ret, __LINE__); \
  159. } while (0)
  160. #define errexit(exitcode, fmt, ...) \
  161. do { \
  162. _err(fmt, ##__VA_ARGS__); \
  163. exit(exitcode); \
  164. } while (0)
  165. #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
  166. static void uffd_stats_reset(struct uffd_stats *uffd_stats,
  167. unsigned long n_cpus)
  168. {
  169. int i;
  170. for (i = 0; i < n_cpus; i++) {
  171. uffd_stats[i].cpu = i;
  172. uffd_stats[i].missing_faults = 0;
  173. uffd_stats[i].wp_faults = 0;
  174. uffd_stats[i].minor_faults = 0;
  175. }
  176. }
  177. static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
  178. {
  179. int i;
  180. unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
  181. for (i = 0; i < n_cpus; i++) {
  182. miss_total += stats[i].missing_faults;
  183. wp_total += stats[i].wp_faults;
  184. minor_total += stats[i].minor_faults;
  185. }
  186. printf("userfaults: ");
  187. if (miss_total) {
  188. printf("%llu missing (", miss_total);
  189. for (i = 0; i < n_cpus; i++)
  190. printf("%lu+", stats[i].missing_faults);
  191. printf("\b) ");
  192. }
  193. if (wp_total) {
  194. printf("%llu wp (", wp_total);
  195. for (i = 0; i < n_cpus; i++)
  196. printf("%lu+", stats[i].wp_faults);
  197. printf("\b) ");
  198. }
  199. if (minor_total) {
  200. printf("%llu minor (", minor_total);
  201. for (i = 0; i < n_cpus; i++)
  202. printf("%lu+", stats[i].minor_faults);
  203. printf("\b)");
  204. }
  205. printf("\n");
  206. }
  207. static void anon_release_pages(char *rel_area)
  208. {
  209. if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
  210. err("madvise(MADV_DONTNEED) failed");
  211. }
  212. static void anon_allocate_area(void **alloc_area, bool is_src)
  213. {
  214. *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
  215. MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
  216. }
  217. static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
  218. {
  219. }
  220. static void hugetlb_release_pages(char *rel_area)
  221. {
  222. if (!map_shared) {
  223. if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
  224. err("madvise(MADV_DONTNEED) failed");
  225. } else {
  226. if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
  227. err("madvise(MADV_REMOVE) failed");
  228. }
  229. }
  230. static void hugetlb_allocate_area(void **alloc_area, bool is_src)
  231. {
  232. void *area_alias = NULL;
  233. char **alloc_area_alias;
  234. if (!map_shared)
  235. *alloc_area = mmap(NULL,
  236. nr_pages * page_size,
  237. PROT_READ | PROT_WRITE,
  238. MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
  239. (is_src ? 0 : MAP_NORESERVE),
  240. -1,
  241. 0);
  242. else
  243. *alloc_area = mmap(NULL,
  244. nr_pages * page_size,
  245. PROT_READ | PROT_WRITE,
  246. MAP_SHARED |
  247. (is_src ? 0 : MAP_NORESERVE),
  248. huge_fd,
  249. is_src ? 0 : nr_pages * page_size);
  250. if (*alloc_area == MAP_FAILED)
  251. err("mmap of hugetlbfs file failed");
  252. if (map_shared) {
  253. area_alias = mmap(NULL,
  254. nr_pages * page_size,
  255. PROT_READ | PROT_WRITE,
  256. MAP_SHARED,
  257. huge_fd,
  258. is_src ? 0 : nr_pages * page_size);
  259. if (area_alias == MAP_FAILED)
  260. err("mmap of hugetlb file alias failed");
  261. }
  262. if (is_src) {
  263. alloc_area_alias = &area_src_alias;
  264. } else {
  265. alloc_area_alias = &area_dst_alias;
  266. }
  267. if (area_alias)
  268. *alloc_area_alias = area_alias;
  269. }
  270. static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
  271. {
  272. if (!map_shared)
  273. return;
  274. *start = (unsigned long) area_dst_alias + offset;
  275. }
  276. static void shmem_release_pages(char *rel_area)
  277. {
  278. if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
  279. err("madvise(MADV_REMOVE) failed");
  280. }
  281. static void shmem_allocate_area(void **alloc_area, bool is_src)
  282. {
  283. void *area_alias = NULL;
  284. size_t bytes = nr_pages * page_size;
  285. unsigned long offset = is_src ? 0 : bytes;
  286. char *p = NULL, *p_alias = NULL;
  287. if (test_collapse) {
  288. p = BASE_PMD_ADDR;
  289. if (!is_src)
  290. /* src map + alias + interleaved hpages */
  291. p += 2 * (bytes + hpage_size);
  292. p_alias = p;
  293. p_alias += bytes;
  294. p_alias += hpage_size; /* Prevent src/dst VMA merge */
  295. }
  296. *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
  297. shm_fd, offset);
  298. if (*alloc_area == MAP_FAILED)
  299. err("mmap of memfd failed");
  300. if (test_collapse && *alloc_area != p)
  301. err("mmap of memfd failed at %p", p);
  302. area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
  303. shm_fd, offset);
  304. if (area_alias == MAP_FAILED)
  305. err("mmap of memfd alias failed");
  306. if (test_collapse && area_alias != p_alias)
  307. err("mmap of anonymous memory failed at %p", p_alias);
  308. if (is_src)
  309. area_src_alias = area_alias;
  310. else
  311. area_dst_alias = area_alias;
  312. }
  313. static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
  314. {
  315. *start = (unsigned long)area_dst_alias + offset;
  316. }
  317. static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages)
  318. {
  319. if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size))
  320. err("Did not find expected %d number of hugepages",
  321. expect_nr_hpages);
  322. }
  323. struct uffd_test_ops {
  324. void (*allocate_area)(void **alloc_area, bool is_src);
  325. void (*release_pages)(char *rel_area);
  326. void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
  327. void (*check_pmd_mapping)(void *p, int expect_nr_hpages);
  328. };
  329. static struct uffd_test_ops anon_uffd_test_ops = {
  330. .allocate_area = anon_allocate_area,
  331. .release_pages = anon_release_pages,
  332. .alias_mapping = noop_alias_mapping,
  333. .check_pmd_mapping = NULL,
  334. };
  335. static struct uffd_test_ops shmem_uffd_test_ops = {
  336. .allocate_area = shmem_allocate_area,
  337. .release_pages = shmem_release_pages,
  338. .alias_mapping = shmem_alias_mapping,
  339. .check_pmd_mapping = shmem_check_pmd_mapping,
  340. };
  341. static struct uffd_test_ops hugetlb_uffd_test_ops = {
  342. .allocate_area = hugetlb_allocate_area,
  343. .release_pages = hugetlb_release_pages,
  344. .alias_mapping = hugetlb_alias_mapping,
  345. .check_pmd_mapping = NULL,
  346. };
  347. static struct uffd_test_ops *uffd_test_ops;
  348. static inline uint64_t uffd_minor_feature(void)
  349. {
  350. if (test_type == TEST_HUGETLB && map_shared)
  351. return UFFD_FEATURE_MINOR_HUGETLBFS;
  352. else if (test_type == TEST_SHMEM)
  353. return UFFD_FEATURE_MINOR_SHMEM;
  354. else
  355. return 0;
  356. }
  357. static uint64_t get_expected_ioctls(uint64_t mode)
  358. {
  359. uint64_t ioctls = UFFD_API_RANGE_IOCTLS;
  360. if (test_type == TEST_HUGETLB)
  361. ioctls &= ~(1 << _UFFDIO_ZEROPAGE);
  362. if (!((mode & UFFDIO_REGISTER_MODE_WP) && test_uffdio_wp))
  363. ioctls &= ~(1 << _UFFDIO_WRITEPROTECT);
  364. if (!((mode & UFFDIO_REGISTER_MODE_MINOR) && test_uffdio_minor))
  365. ioctls &= ~(1 << _UFFDIO_CONTINUE);
  366. return ioctls;
  367. }
  368. static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls)
  369. {
  370. uint64_t expected = get_expected_ioctls(mode);
  371. uint64_t actual = ioctls & expected;
  372. if (actual != expected) {
  373. err("missing ioctl(s): expected %"PRIx64" actual: %"PRIx64,
  374. expected, actual);
  375. }
  376. }
  377. static int __userfaultfd_open_dev(void)
  378. {
  379. int fd, _uffd;
  380. fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
  381. if (fd < 0)
  382. errexit(KSFT_SKIP, "opening /dev/userfaultfd failed");
  383. _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS);
  384. if (_uffd < 0)
  385. errexit(errno == ENOTTY ? KSFT_SKIP : 1,
  386. "creating userfaultfd failed");
  387. close(fd);
  388. return _uffd;
  389. }
  390. static void userfaultfd_open(uint64_t *features)
  391. {
  392. struct uffdio_api uffdio_api;
  393. if (test_dev_userfaultfd)
  394. uffd = __userfaultfd_open_dev();
  395. else {
  396. uffd = syscall(__NR_userfaultfd, UFFD_FLAGS);
  397. if (uffd < 0)
  398. errexit(errno == ENOSYS ? KSFT_SKIP : 1,
  399. "creating userfaultfd failed");
  400. }
  401. uffd_flags = fcntl(uffd, F_GETFD, NULL);
  402. uffdio_api.api = UFFD_API;
  403. uffdio_api.features = *features;
  404. if (ioctl(uffd, UFFDIO_API, &uffdio_api))
  405. err("UFFDIO_API failed.\nPlease make sure to "
  406. "run with either root or ptrace capability.");
  407. if (uffdio_api.api != UFFD_API)
  408. err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
  409. *features = uffdio_api.features;
  410. }
  411. static inline void munmap_area(void **area)
  412. {
  413. if (*area)
  414. if (munmap(*area, nr_pages * page_size))
  415. err("munmap");
  416. *area = NULL;
  417. }
  418. static void uffd_test_ctx_clear(void)
  419. {
  420. size_t i;
  421. if (pipefd) {
  422. for (i = 0; i < nr_cpus * 2; ++i) {
  423. if (close(pipefd[i]))
  424. err("close pipefd");
  425. }
  426. free(pipefd);
  427. pipefd = NULL;
  428. }
  429. if (count_verify) {
  430. free(count_verify);
  431. count_verify = NULL;
  432. }
  433. if (uffd != -1) {
  434. if (close(uffd))
  435. err("close uffd");
  436. uffd = -1;
  437. }
  438. munmap_area((void **)&area_src);
  439. munmap_area((void **)&area_src_alias);
  440. munmap_area((void **)&area_dst);
  441. munmap_area((void **)&area_dst_alias);
  442. munmap_area((void **)&area_remap);
  443. }
  444. static void uffd_test_ctx_init(uint64_t features)
  445. {
  446. unsigned long nr, cpu;
  447. uffd_test_ctx_clear();
  448. uffd_test_ops->allocate_area((void **)&area_src, true);
  449. uffd_test_ops->allocate_area((void **)&area_dst, false);
  450. userfaultfd_open(&features);
  451. count_verify = malloc(nr_pages * sizeof(unsigned long long));
  452. if (!count_verify)
  453. err("count_verify");
  454. for (nr = 0; nr < nr_pages; nr++) {
  455. *area_mutex(area_src, nr) =
  456. (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
  457. count_verify[nr] = *area_count(area_src, nr) = 1;
  458. /*
  459. * In the transition between 255 to 256, powerpc will
  460. * read out of order in my_bcmp and see both bytes as
  461. * zero, so leave a placeholder below always non-zero
  462. * after the count, to avoid my_bcmp to trigger false
  463. * positives.
  464. */
  465. *(area_count(area_src, nr) + 1) = 1;
  466. }
  467. /*
  468. * After initialization of area_src, we must explicitly release pages
  469. * for area_dst to make sure it's fully empty. Otherwise we could have
  470. * some area_dst pages be errornously initialized with zero pages,
  471. * hence we could hit memory corruption later in the test.
  472. *
  473. * One example is when THP is globally enabled, above allocate_area()
  474. * calls could have the two areas merged into a single VMA (as they
  475. * will have the same VMA flags so they're mergeable). When we
  476. * initialize the area_src above, it's possible that some part of
  477. * area_dst could have been faulted in via one huge THP that will be
  478. * shared between area_src and area_dst. It could cause some of the
  479. * area_dst won't be trapped by missing userfaults.
  480. *
  481. * This release_pages() will guarantee even if that happened, we'll
  482. * proactively split the thp and drop any accidentally initialized
  483. * pages within area_dst.
  484. */
  485. uffd_test_ops->release_pages(area_dst);
  486. pipefd = malloc(sizeof(int) * nr_cpus * 2);
  487. if (!pipefd)
  488. err("pipefd");
  489. for (cpu = 0; cpu < nr_cpus; cpu++)
  490. if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
  491. err("pipe");
  492. }
  493. static int my_bcmp(char *str1, char *str2, size_t n)
  494. {
  495. unsigned long i;
  496. for (i = 0; i < n; i++)
  497. if (str1[i] != str2[i])
  498. return 1;
  499. return 0;
  500. }
  501. static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
  502. {
  503. struct uffdio_writeprotect prms;
  504. /* Write protection page faults */
  505. prms.range.start = start;
  506. prms.range.len = len;
  507. /* Undo write-protect, do wakeup after that */
  508. prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
  509. if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
  510. err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
  511. }
  512. static void continue_range(int ufd, __u64 start, __u64 len)
  513. {
  514. struct uffdio_continue req;
  515. int ret;
  516. req.range.start = start;
  517. req.range.len = len;
  518. req.mode = 0;
  519. if (ioctl(ufd, UFFDIO_CONTINUE, &req))
  520. err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
  521. (uint64_t)start);
  522. /*
  523. * Error handling within the kernel for continue is subtly different
  524. * from copy or zeropage, so it may be a source of bugs. Trigger an
  525. * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
  526. */
  527. req.mapped = 0;
  528. ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
  529. if (ret >= 0 || req.mapped != -EEXIST)
  530. err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
  531. ret, (int64_t) req.mapped);
  532. }
  533. static void *locking_thread(void *arg)
  534. {
  535. unsigned long cpu = (unsigned long) arg;
  536. unsigned long page_nr;
  537. unsigned long long count;
  538. if (!(bounces & BOUNCE_RANDOM)) {
  539. page_nr = -bounces;
  540. if (!(bounces & BOUNCE_RACINGFAULTS))
  541. page_nr += cpu * nr_pages_per_cpu;
  542. }
  543. while (!finished) {
  544. if (bounces & BOUNCE_RANDOM) {
  545. if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr))
  546. err("getrandom failed");
  547. } else
  548. page_nr += 1;
  549. page_nr %= nr_pages;
  550. pthread_mutex_lock(area_mutex(area_dst, page_nr));
  551. count = *area_count(area_dst, page_nr);
  552. if (count != count_verify[page_nr])
  553. err("page_nr %lu memory corruption %llu %llu",
  554. page_nr, count, count_verify[page_nr]);
  555. count++;
  556. *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
  557. pthread_mutex_unlock(area_mutex(area_dst, page_nr));
  558. }
  559. return NULL;
  560. }
  561. static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
  562. unsigned long offset)
  563. {
  564. uffd_test_ops->alias_mapping(&uffdio_copy->dst,
  565. uffdio_copy->len,
  566. offset);
  567. if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
  568. /* real retval in ufdio_copy.copy */
  569. if (uffdio_copy->copy != -EEXIST)
  570. err("UFFDIO_COPY retry error: %"PRId64,
  571. (int64_t)uffdio_copy->copy);
  572. } else {
  573. err("UFFDIO_COPY retry unexpected: %"PRId64,
  574. (int64_t)uffdio_copy->copy);
  575. }
  576. }
  577. static void wake_range(int ufd, unsigned long addr, unsigned long len)
  578. {
  579. struct uffdio_range uffdio_wake;
  580. uffdio_wake.start = addr;
  581. uffdio_wake.len = len;
  582. if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
  583. fprintf(stderr, "error waking %lu\n",
  584. addr), exit(1);
  585. }
  586. static int __copy_page(int ufd, unsigned long offset, bool retry)
  587. {
  588. struct uffdio_copy uffdio_copy;
  589. if (offset >= nr_pages * page_size)
  590. err("unexpected offset %lu\n", offset);
  591. uffdio_copy.dst = (unsigned long) area_dst + offset;
  592. uffdio_copy.src = (unsigned long) area_src + offset;
  593. uffdio_copy.len = page_size;
  594. if (test_uffdio_wp)
  595. uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
  596. else
  597. uffdio_copy.mode = 0;
  598. uffdio_copy.copy = 0;
  599. if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
  600. /* real retval in ufdio_copy.copy */
  601. if (uffdio_copy.copy != -EEXIST)
  602. err("UFFDIO_COPY error: %"PRId64,
  603. (int64_t)uffdio_copy.copy);
  604. wake_range(ufd, uffdio_copy.dst, page_size);
  605. } else if (uffdio_copy.copy != page_size) {
  606. err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
  607. } else {
  608. if (test_uffdio_copy_eexist && retry) {
  609. test_uffdio_copy_eexist = false;
  610. retry_copy_page(ufd, &uffdio_copy, offset);
  611. }
  612. return 1;
  613. }
  614. return 0;
  615. }
  616. static int copy_page_retry(int ufd, unsigned long offset)
  617. {
  618. return __copy_page(ufd, offset, true);
  619. }
  620. static int copy_page(int ufd, unsigned long offset)
  621. {
  622. return __copy_page(ufd, offset, false);
  623. }
  624. static int uffd_read_msg(int ufd, struct uffd_msg *msg)
  625. {
  626. int ret = read(uffd, msg, sizeof(*msg));
  627. if (ret != sizeof(*msg)) {
  628. if (ret < 0) {
  629. if (errno == EAGAIN || errno == EINTR)
  630. return 1;
  631. err("blocking read error");
  632. } else {
  633. err("short read");
  634. }
  635. }
  636. return 0;
  637. }
  638. static void uffd_handle_page_fault(struct uffd_msg *msg,
  639. struct uffd_stats *stats)
  640. {
  641. unsigned long offset;
  642. if (msg->event != UFFD_EVENT_PAGEFAULT)
  643. err("unexpected msg event %u", msg->event);
  644. if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
  645. /* Write protect page faults */
  646. wp_range(uffd, msg->arg.pagefault.address, page_size, false);
  647. stats->wp_faults++;
  648. } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
  649. uint8_t *area;
  650. int b;
  651. /*
  652. * Minor page faults
  653. *
  654. * To prove we can modify the original range for testing
  655. * purposes, we're going to bit flip this range before
  656. * continuing.
  657. *
  658. * Note that this requires all minor page fault tests operate on
  659. * area_dst (non-UFFD-registered) and area_dst_alias
  660. * (UFFD-registered).
  661. */
  662. area = (uint8_t *)(area_dst +
  663. ((char *)msg->arg.pagefault.address -
  664. area_dst_alias));
  665. for (b = 0; b < page_size; ++b)
  666. area[b] = ~area[b];
  667. continue_range(uffd, msg->arg.pagefault.address, page_size);
  668. stats->minor_faults++;
  669. } else {
  670. /*
  671. * Missing page faults.
  672. *
  673. * Here we force a write check for each of the missing mode
  674. * faults. It's guaranteed because the only threads that
  675. * will trigger uffd faults are the locking threads, and
  676. * their first instruction to touch the missing page will
  677. * always be pthread_mutex_lock().
  678. *
  679. * Note that here we relied on an NPTL glibc impl detail to
  680. * always read the lock type at the entry of the lock op
  681. * (pthread_mutex_t.__data.__type, offset 0x10) before
  682. * doing any locking operations to guarantee that. It's
  683. * actually not good to rely on this impl detail because
  684. * logically a pthread-compatible lib can implement the
  685. * locks without types and we can fail when linking with
  686. * them. However since we used to find bugs with this
  687. * strict check we still keep it around. Hopefully this
  688. * could be a good hint when it fails again. If one day
  689. * it'll break on some other impl of glibc we'll revisit.
  690. */
  691. if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
  692. err("unexpected write fault");
  693. offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
  694. offset &= ~(page_size-1);
  695. if (copy_page(uffd, offset))
  696. stats->missing_faults++;
  697. }
  698. }
  699. static void *uffd_poll_thread(void *arg)
  700. {
  701. struct uffd_stats *stats = (struct uffd_stats *)arg;
  702. unsigned long cpu = stats->cpu;
  703. struct pollfd pollfd[2];
  704. struct uffd_msg msg;
  705. struct uffdio_register uffd_reg;
  706. int ret;
  707. char tmp_chr;
  708. pollfd[0].fd = uffd;
  709. pollfd[0].events = POLLIN;
  710. pollfd[1].fd = pipefd[cpu*2];
  711. pollfd[1].events = POLLIN;
  712. for (;;) {
  713. ret = poll(pollfd, 2, -1);
  714. if (ret <= 0) {
  715. if (errno == EINTR || errno == EAGAIN)
  716. continue;
  717. err("poll error: %d", ret);
  718. }
  719. if (pollfd[1].revents & POLLIN) {
  720. if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
  721. err("read pipefd error");
  722. break;
  723. }
  724. if (!(pollfd[0].revents & POLLIN))
  725. err("pollfd[0].revents %d", pollfd[0].revents);
  726. if (uffd_read_msg(uffd, &msg))
  727. continue;
  728. switch (msg.event) {
  729. default:
  730. err("unexpected msg event %u\n", msg.event);
  731. break;
  732. case UFFD_EVENT_PAGEFAULT:
  733. uffd_handle_page_fault(&msg, stats);
  734. break;
  735. case UFFD_EVENT_FORK:
  736. close(uffd);
  737. uffd = msg.arg.fork.ufd;
  738. pollfd[0].fd = uffd;
  739. break;
  740. case UFFD_EVENT_REMOVE:
  741. uffd_reg.range.start = msg.arg.remove.start;
  742. uffd_reg.range.len = msg.arg.remove.end -
  743. msg.arg.remove.start;
  744. if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
  745. err("remove failure");
  746. break;
  747. case UFFD_EVENT_REMAP:
  748. area_remap = area_dst; /* save for later unmap */
  749. area_dst = (char *)(unsigned long)msg.arg.remap.to;
  750. break;
  751. }
  752. }
  753. return NULL;
  754. }
  755. pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
  756. static void *uffd_read_thread(void *arg)
  757. {
  758. struct uffd_stats *stats = (struct uffd_stats *)arg;
  759. struct uffd_msg msg;
  760. pthread_mutex_unlock(&uffd_read_mutex);
  761. /* from here cancellation is ok */
  762. for (;;) {
  763. if (uffd_read_msg(uffd, &msg))
  764. continue;
  765. uffd_handle_page_fault(&msg, stats);
  766. }
  767. return NULL;
  768. }
  769. static void *background_thread(void *arg)
  770. {
  771. unsigned long cpu = (unsigned long) arg;
  772. unsigned long page_nr, start_nr, mid_nr, end_nr;
  773. start_nr = cpu * nr_pages_per_cpu;
  774. end_nr = (cpu+1) * nr_pages_per_cpu;
  775. mid_nr = (start_nr + end_nr) / 2;
  776. /* Copy the first half of the pages */
  777. for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
  778. copy_page_retry(uffd, page_nr * page_size);
  779. /*
  780. * If we need to test uffd-wp, set it up now. Then we'll have
  781. * at least the first half of the pages mapped already which
  782. * can be write-protected for testing
  783. */
  784. if (test_uffdio_wp)
  785. wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
  786. nr_pages_per_cpu * page_size, true);
  787. /*
  788. * Continue the 2nd half of the page copying, handling write
  789. * protection faults if any
  790. */
  791. for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
  792. copy_page_retry(uffd, page_nr * page_size);
  793. return NULL;
  794. }
  795. static int stress(struct uffd_stats *uffd_stats)
  796. {
  797. unsigned long cpu;
  798. pthread_t locking_threads[nr_cpus];
  799. pthread_t uffd_threads[nr_cpus];
  800. pthread_t background_threads[nr_cpus];
  801. finished = 0;
  802. for (cpu = 0; cpu < nr_cpus; cpu++) {
  803. if (pthread_create(&locking_threads[cpu], &attr,
  804. locking_thread, (void *)cpu))
  805. return 1;
  806. if (bounces & BOUNCE_POLL) {
  807. if (pthread_create(&uffd_threads[cpu], &attr,
  808. uffd_poll_thread,
  809. (void *)&uffd_stats[cpu]))
  810. return 1;
  811. } else {
  812. if (pthread_create(&uffd_threads[cpu], &attr,
  813. uffd_read_thread,
  814. (void *)&uffd_stats[cpu]))
  815. return 1;
  816. pthread_mutex_lock(&uffd_read_mutex);
  817. }
  818. if (pthread_create(&background_threads[cpu], &attr,
  819. background_thread, (void *)cpu))
  820. return 1;
  821. }
  822. for (cpu = 0; cpu < nr_cpus; cpu++)
  823. if (pthread_join(background_threads[cpu], NULL))
  824. return 1;
  825. /*
  826. * Be strict and immediately zap area_src, the whole area has
  827. * been transferred already by the background treads. The
  828. * area_src could then be faulted in a racy way by still
  829. * running uffdio_threads reading zeropages after we zapped
  830. * area_src (but they're guaranteed to get -EEXIST from
  831. * UFFDIO_COPY without writing zero pages into area_dst
  832. * because the background threads already completed).
  833. */
  834. uffd_test_ops->release_pages(area_src);
  835. finished = 1;
  836. for (cpu = 0; cpu < nr_cpus; cpu++)
  837. if (pthread_join(locking_threads[cpu], NULL))
  838. return 1;
  839. for (cpu = 0; cpu < nr_cpus; cpu++) {
  840. char c;
  841. if (bounces & BOUNCE_POLL) {
  842. if (write(pipefd[cpu*2+1], &c, 1) != 1)
  843. err("pipefd write error");
  844. if (pthread_join(uffd_threads[cpu],
  845. (void *)&uffd_stats[cpu]))
  846. return 1;
  847. } else {
  848. if (pthread_cancel(uffd_threads[cpu]))
  849. return 1;
  850. if (pthread_join(uffd_threads[cpu], NULL))
  851. return 1;
  852. }
  853. }
  854. return 0;
  855. }
  856. sigjmp_buf jbuf, *sigbuf;
  857. static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
  858. {
  859. if (sig == SIGBUS) {
  860. if (sigbuf)
  861. siglongjmp(*sigbuf, 1);
  862. abort();
  863. }
  864. }
  865. /*
  866. * For non-cooperative userfaultfd test we fork() a process that will
  867. * generate pagefaults, will mremap the area monitored by the
  868. * userfaultfd and at last this process will release the monitored
  869. * area.
  870. * For the anonymous and shared memory the area is divided into two
  871. * parts, the first part is accessed before mremap, and the second
  872. * part is accessed after mremap. Since hugetlbfs does not support
  873. * mremap, the entire monitored area is accessed in a single pass for
  874. * HUGETLB_TEST.
  875. * The release of the pages currently generates event for shmem and
  876. * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
  877. * for hugetlb.
  878. * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
  879. * monitored area, generate pagefaults and test that signal is delivered.
  880. * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
  881. * test robustness use case - we release monitored area, fork a process
  882. * that will generate pagefaults and verify signal is generated.
  883. * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
  884. * feature. Using monitor thread, verify no userfault events are generated.
  885. */
  886. static int faulting_process(int signal_test)
  887. {
  888. unsigned long nr;
  889. unsigned long long count;
  890. unsigned long split_nr_pages;
  891. unsigned long lastnr;
  892. struct sigaction act;
  893. volatile unsigned long signalled = 0;
  894. split_nr_pages = (nr_pages + 1) / 2;
  895. if (signal_test) {
  896. sigbuf = &jbuf;
  897. memset(&act, 0, sizeof(act));
  898. act.sa_sigaction = sighndl;
  899. act.sa_flags = SA_SIGINFO;
  900. if (sigaction(SIGBUS, &act, 0))
  901. err("sigaction");
  902. lastnr = (unsigned long)-1;
  903. }
  904. for (nr = 0; nr < split_nr_pages; nr++) {
  905. volatile int steps = 1;
  906. unsigned long offset = nr * page_size;
  907. if (signal_test) {
  908. if (sigsetjmp(*sigbuf, 1) != 0) {
  909. if (steps == 1 && nr == lastnr)
  910. err("Signal repeated");
  911. lastnr = nr;
  912. if (signal_test == 1) {
  913. if (steps == 1) {
  914. /* This is a MISSING request */
  915. steps++;
  916. if (copy_page(uffd, offset))
  917. signalled++;
  918. } else {
  919. /* This is a WP request */
  920. assert(steps == 2);
  921. wp_range(uffd,
  922. (__u64)area_dst +
  923. offset,
  924. page_size, false);
  925. }
  926. } else {
  927. signalled++;
  928. continue;
  929. }
  930. }
  931. }
  932. count = *area_count(area_dst, nr);
  933. if (count != count_verify[nr])
  934. err("nr %lu memory corruption %llu %llu\n",
  935. nr, count, count_verify[nr]);
  936. /*
  937. * Trigger write protection if there is by writing
  938. * the same value back.
  939. */
  940. *area_count(area_dst, nr) = count;
  941. }
  942. if (signal_test)
  943. return signalled != split_nr_pages;
  944. area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
  945. MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
  946. if (area_dst == MAP_FAILED)
  947. err("mremap");
  948. /* Reset area_src since we just clobbered it */
  949. area_src = NULL;
  950. for (; nr < nr_pages; nr++) {
  951. count = *area_count(area_dst, nr);
  952. if (count != count_verify[nr]) {
  953. err("nr %lu memory corruption %llu %llu\n",
  954. nr, count, count_verify[nr]);
  955. }
  956. /*
  957. * Trigger write protection if there is by writing
  958. * the same value back.
  959. */
  960. *area_count(area_dst, nr) = count;
  961. }
  962. uffd_test_ops->release_pages(area_dst);
  963. for (nr = 0; nr < nr_pages; nr++)
  964. if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
  965. err("nr %lu is not zero", nr);
  966. return 0;
  967. }
  968. static void retry_uffdio_zeropage(int ufd,
  969. struct uffdio_zeropage *uffdio_zeropage,
  970. unsigned long offset)
  971. {
  972. uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
  973. uffdio_zeropage->range.len,
  974. offset);
  975. if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
  976. if (uffdio_zeropage->zeropage != -EEXIST)
  977. err("UFFDIO_ZEROPAGE error: %"PRId64,
  978. (int64_t)uffdio_zeropage->zeropage);
  979. } else {
  980. err("UFFDIO_ZEROPAGE error: %"PRId64,
  981. (int64_t)uffdio_zeropage->zeropage);
  982. }
  983. }
  984. static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
  985. {
  986. struct uffdio_zeropage uffdio_zeropage;
  987. int ret;
  988. bool has_zeropage = get_expected_ioctls(0) & (1 << _UFFDIO_ZEROPAGE);
  989. __s64 res;
  990. if (offset >= nr_pages * page_size)
  991. err("unexpected offset %lu", offset);
  992. uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
  993. uffdio_zeropage.range.len = page_size;
  994. uffdio_zeropage.mode = 0;
  995. ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
  996. res = uffdio_zeropage.zeropage;
  997. if (ret) {
  998. /* real retval in ufdio_zeropage.zeropage */
  999. if (has_zeropage)
  1000. err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
  1001. else if (res != -EINVAL)
  1002. err("UFFDIO_ZEROPAGE not -EINVAL");
  1003. } else if (has_zeropage) {
  1004. if (res != page_size) {
  1005. err("UFFDIO_ZEROPAGE unexpected size");
  1006. } else {
  1007. if (test_uffdio_zeropage_eexist && retry) {
  1008. test_uffdio_zeropage_eexist = false;
  1009. retry_uffdio_zeropage(ufd, &uffdio_zeropage,
  1010. offset);
  1011. }
  1012. return 1;
  1013. }
  1014. } else
  1015. err("UFFDIO_ZEROPAGE succeeded");
  1016. return 0;
  1017. }
  1018. static int uffdio_zeropage(int ufd, unsigned long offset)
  1019. {
  1020. return __uffdio_zeropage(ufd, offset, false);
  1021. }
  1022. /* exercise UFFDIO_ZEROPAGE */
  1023. static int userfaultfd_zeropage_test(void)
  1024. {
  1025. struct uffdio_register uffdio_register;
  1026. printf("testing UFFDIO_ZEROPAGE: ");
  1027. fflush(stdout);
  1028. uffd_test_ctx_init(0);
  1029. uffdio_register.range.start = (unsigned long) area_dst;
  1030. uffdio_register.range.len = nr_pages * page_size;
  1031. uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
  1032. if (test_uffdio_wp)
  1033. uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
  1034. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1035. err("register failure");
  1036. assert_expected_ioctls_present(
  1037. uffdio_register.mode, uffdio_register.ioctls);
  1038. if (uffdio_zeropage(uffd, 0))
  1039. if (my_bcmp(area_dst, zeropage, page_size))
  1040. err("zeropage is not zero");
  1041. printf("done.\n");
  1042. return 0;
  1043. }
  1044. static int userfaultfd_events_test(void)
  1045. {
  1046. struct uffdio_register uffdio_register;
  1047. pthread_t uffd_mon;
  1048. int err, features;
  1049. pid_t pid;
  1050. char c;
  1051. struct uffd_stats stats = { 0 };
  1052. printf("testing events (fork, remap, remove): ");
  1053. fflush(stdout);
  1054. features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
  1055. UFFD_FEATURE_EVENT_REMOVE;
  1056. uffd_test_ctx_init(features);
  1057. fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
  1058. uffdio_register.range.start = (unsigned long) area_dst;
  1059. uffdio_register.range.len = nr_pages * page_size;
  1060. uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
  1061. if (test_uffdio_wp)
  1062. uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
  1063. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1064. err("register failure");
  1065. assert_expected_ioctls_present(
  1066. uffdio_register.mode, uffdio_register.ioctls);
  1067. if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
  1068. err("uffd_poll_thread create");
  1069. pid = fork();
  1070. if (pid < 0)
  1071. err("fork");
  1072. if (!pid)
  1073. exit(faulting_process(0));
  1074. waitpid(pid, &err, 0);
  1075. if (err)
  1076. err("faulting process failed");
  1077. if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
  1078. err("pipe write");
  1079. if (pthread_join(uffd_mon, NULL))
  1080. return 1;
  1081. uffd_stats_report(&stats, 1);
  1082. return stats.missing_faults != nr_pages;
  1083. }
  1084. static int userfaultfd_sig_test(void)
  1085. {
  1086. struct uffdio_register uffdio_register;
  1087. unsigned long userfaults;
  1088. pthread_t uffd_mon;
  1089. int err, features;
  1090. pid_t pid;
  1091. char c;
  1092. struct uffd_stats stats = { 0 };
  1093. printf("testing signal delivery: ");
  1094. fflush(stdout);
  1095. features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
  1096. uffd_test_ctx_init(features);
  1097. fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
  1098. uffdio_register.range.start = (unsigned long) area_dst;
  1099. uffdio_register.range.len = nr_pages * page_size;
  1100. uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
  1101. if (test_uffdio_wp)
  1102. uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
  1103. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1104. err("register failure");
  1105. assert_expected_ioctls_present(
  1106. uffdio_register.mode, uffdio_register.ioctls);
  1107. if (faulting_process(1))
  1108. err("faulting process failed");
  1109. uffd_test_ops->release_pages(area_dst);
  1110. if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
  1111. err("uffd_poll_thread create");
  1112. pid = fork();
  1113. if (pid < 0)
  1114. err("fork");
  1115. if (!pid)
  1116. exit(faulting_process(2));
  1117. waitpid(pid, &err, 0);
  1118. if (err)
  1119. err("faulting process failed");
  1120. if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
  1121. err("pipe write");
  1122. if (pthread_join(uffd_mon, (void **)&userfaults))
  1123. return 1;
  1124. printf("done.\n");
  1125. if (userfaults)
  1126. err("Signal test failed, userfaults: %ld", userfaults);
  1127. return userfaults != 0;
  1128. }
  1129. void check_memory_contents(char *p)
  1130. {
  1131. unsigned long i;
  1132. uint8_t expected_byte;
  1133. void *expected_page;
  1134. if (posix_memalign(&expected_page, page_size, page_size))
  1135. err("out of memory");
  1136. for (i = 0; i < nr_pages; ++i) {
  1137. expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
  1138. memset(expected_page, expected_byte, page_size);
  1139. if (my_bcmp(expected_page, p + (i * page_size), page_size))
  1140. err("unexpected page contents after minor fault");
  1141. }
  1142. free(expected_page);
  1143. }
  1144. static int userfaultfd_minor_test(void)
  1145. {
  1146. unsigned long p;
  1147. struct uffdio_register uffdio_register;
  1148. pthread_t uffd_mon;
  1149. char c;
  1150. struct uffd_stats stats = { 0 };
  1151. if (!test_uffdio_minor)
  1152. return 0;
  1153. printf("testing minor faults: ");
  1154. fflush(stdout);
  1155. uffd_test_ctx_init(uffd_minor_feature());
  1156. uffdio_register.range.start = (unsigned long)area_dst_alias;
  1157. uffdio_register.range.len = nr_pages * page_size;
  1158. uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
  1159. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1160. err("register failure");
  1161. assert_expected_ioctls_present(
  1162. uffdio_register.mode, uffdio_register.ioctls);
  1163. /*
  1164. * After registering with UFFD, populate the non-UFFD-registered side of
  1165. * the shared mapping. This should *not* trigger any UFFD minor faults.
  1166. */
  1167. for (p = 0; p < nr_pages; ++p) {
  1168. memset(area_dst + (p * page_size), p % ((uint8_t)-1),
  1169. page_size);
  1170. }
  1171. if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
  1172. err("uffd_poll_thread create");
  1173. /*
  1174. * Read each of the pages back using the UFFD-registered mapping. We
  1175. * expect that the first time we touch a page, it will result in a minor
  1176. * fault. uffd_poll_thread will resolve the fault by bit-flipping the
  1177. * page's contents, and then issuing a CONTINUE ioctl.
  1178. */
  1179. check_memory_contents(area_dst_alias);
  1180. if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
  1181. err("pipe write");
  1182. if (pthread_join(uffd_mon, NULL))
  1183. return 1;
  1184. uffd_stats_report(&stats, 1);
  1185. if (test_collapse) {
  1186. printf("testing collapse of uffd memory into PMD-mapped THPs:");
  1187. if (madvise(area_dst_alias, nr_pages * page_size,
  1188. MADV_COLLAPSE))
  1189. err("madvise(MADV_COLLAPSE)");
  1190. uffd_test_ops->check_pmd_mapping(area_dst,
  1191. nr_pages * page_size /
  1192. hpage_size);
  1193. /*
  1194. * This won't cause uffd-fault - it purely just makes sure there
  1195. * was no corruption.
  1196. */
  1197. check_memory_contents(area_dst_alias);
  1198. printf(" done.\n");
  1199. }
  1200. return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
  1201. }
  1202. #define BIT_ULL(nr) (1ULL << (nr))
  1203. #define PM_SOFT_DIRTY BIT_ULL(55)
  1204. #define PM_MMAP_EXCLUSIVE BIT_ULL(56)
  1205. #define PM_UFFD_WP BIT_ULL(57)
  1206. #define PM_FILE BIT_ULL(61)
  1207. #define PM_SWAP BIT_ULL(62)
  1208. #define PM_PRESENT BIT_ULL(63)
  1209. static int pagemap_open(void)
  1210. {
  1211. int fd = open("/proc/self/pagemap", O_RDONLY);
  1212. if (fd < 0)
  1213. err("open pagemap");
  1214. return fd;
  1215. }
  1216. static uint64_t pagemap_read_vaddr(int fd, void *vaddr)
  1217. {
  1218. uint64_t value;
  1219. int ret;
  1220. ret = pread(fd, &value, sizeof(uint64_t),
  1221. ((uint64_t)vaddr >> 12) * sizeof(uint64_t));
  1222. if (ret != sizeof(uint64_t))
  1223. err("pread() on pagemap failed");
  1224. return value;
  1225. }
  1226. /* This macro let __LINE__ works in err() */
  1227. #define pagemap_check_wp(value, wp) do { \
  1228. if (!!(value & PM_UFFD_WP) != wp) \
  1229. err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
  1230. } while (0)
  1231. static int pagemap_test_fork(bool present)
  1232. {
  1233. pid_t child = fork();
  1234. uint64_t value;
  1235. int fd, result;
  1236. if (!child) {
  1237. /* Open the pagemap fd of the child itself */
  1238. fd = pagemap_open();
  1239. value = pagemap_read_vaddr(fd, area_dst);
  1240. /*
  1241. * After fork() uffd-wp bit should be gone as long as we're
  1242. * without UFFD_FEATURE_EVENT_FORK
  1243. */
  1244. pagemap_check_wp(value, false);
  1245. /* Succeed */
  1246. exit(0);
  1247. }
  1248. waitpid(child, &result, 0);
  1249. return result;
  1250. }
  1251. static void userfaultfd_pagemap_test(unsigned int test_pgsize)
  1252. {
  1253. struct uffdio_register uffdio_register;
  1254. int pagemap_fd;
  1255. uint64_t value;
  1256. /* Pagemap tests uffd-wp only */
  1257. if (!test_uffdio_wp)
  1258. return;
  1259. /* Not enough memory to test this page size */
  1260. if (test_pgsize > nr_pages * page_size)
  1261. return;
  1262. printf("testing uffd-wp with pagemap (pgsize=%u): ", test_pgsize);
  1263. /* Flush so it doesn't flush twice in parent/child later */
  1264. fflush(stdout);
  1265. uffd_test_ctx_init(0);
  1266. if (test_pgsize > page_size) {
  1267. /* This is a thp test */
  1268. if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
  1269. err("madvise(MADV_HUGEPAGE) failed");
  1270. } else if (test_pgsize == page_size) {
  1271. /* This is normal page test; force no thp */
  1272. if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
  1273. err("madvise(MADV_NOHUGEPAGE) failed");
  1274. }
  1275. uffdio_register.range.start = (unsigned long) area_dst;
  1276. uffdio_register.range.len = nr_pages * page_size;
  1277. uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
  1278. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1279. err("register failed");
  1280. pagemap_fd = pagemap_open();
  1281. /* Touch the page */
  1282. *area_dst = 1;
  1283. wp_range(uffd, (uint64_t)area_dst, test_pgsize, true);
  1284. value = pagemap_read_vaddr(pagemap_fd, area_dst);
  1285. pagemap_check_wp(value, true);
  1286. /* Make sure uffd-wp bit dropped when fork */
  1287. if (pagemap_test_fork(true))
  1288. err("Detected stall uffd-wp bit in child");
  1289. /* Exclusive required or PAGEOUT won't work */
  1290. if (!(value & PM_MMAP_EXCLUSIVE))
  1291. err("multiple mapping detected: 0x%"PRIx64, value);
  1292. if (madvise(area_dst, test_pgsize, MADV_PAGEOUT))
  1293. err("madvise(MADV_PAGEOUT) failed");
  1294. /* Uffd-wp should persist even swapped out */
  1295. value = pagemap_read_vaddr(pagemap_fd, area_dst);
  1296. pagemap_check_wp(value, true);
  1297. /* Make sure uffd-wp bit dropped when fork */
  1298. if (pagemap_test_fork(false))
  1299. err("Detected stall uffd-wp bit in child");
  1300. /* Unprotect; this tests swap pte modifications */
  1301. wp_range(uffd, (uint64_t)area_dst, page_size, false);
  1302. value = pagemap_read_vaddr(pagemap_fd, area_dst);
  1303. pagemap_check_wp(value, false);
  1304. /* Fault in the page from disk */
  1305. *area_dst = 2;
  1306. value = pagemap_read_vaddr(pagemap_fd, area_dst);
  1307. pagemap_check_wp(value, false);
  1308. close(pagemap_fd);
  1309. printf("done\n");
  1310. }
  1311. static int userfaultfd_stress(void)
  1312. {
  1313. void *area;
  1314. unsigned long nr;
  1315. struct uffdio_register uffdio_register;
  1316. struct uffd_stats uffd_stats[nr_cpus];
  1317. uffd_test_ctx_init(0);
  1318. if (posix_memalign(&area, page_size, page_size))
  1319. err("out of memory");
  1320. zeropage = area;
  1321. bzero(zeropage, page_size);
  1322. pthread_mutex_lock(&uffd_read_mutex);
  1323. pthread_attr_init(&attr);
  1324. pthread_attr_setstacksize(&attr, 16*1024*1024);
  1325. while (bounces--) {
  1326. printf("bounces: %d, mode:", bounces);
  1327. if (bounces & BOUNCE_RANDOM)
  1328. printf(" rnd");
  1329. if (bounces & BOUNCE_RACINGFAULTS)
  1330. printf(" racing");
  1331. if (bounces & BOUNCE_VERIFY)
  1332. printf(" ver");
  1333. if (bounces & BOUNCE_POLL)
  1334. printf(" poll");
  1335. else
  1336. printf(" read");
  1337. printf(", ");
  1338. fflush(stdout);
  1339. if (bounces & BOUNCE_POLL)
  1340. fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
  1341. else
  1342. fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
  1343. /* register */
  1344. uffdio_register.range.start = (unsigned long) area_dst;
  1345. uffdio_register.range.len = nr_pages * page_size;
  1346. uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
  1347. if (test_uffdio_wp)
  1348. uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
  1349. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1350. err("register failure");
  1351. assert_expected_ioctls_present(
  1352. uffdio_register.mode, uffdio_register.ioctls);
  1353. if (area_dst_alias) {
  1354. uffdio_register.range.start = (unsigned long)
  1355. area_dst_alias;
  1356. if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
  1357. err("register failure alias");
  1358. }
  1359. /*
  1360. * The madvise done previously isn't enough: some
  1361. * uffd_thread could have read userfaults (one of
  1362. * those already resolved by the background thread)
  1363. * and it may be in the process of calling
  1364. * UFFDIO_COPY. UFFDIO_COPY will read the zapped
  1365. * area_src and it would map a zero page in it (of
  1366. * course such a UFFDIO_COPY is perfectly safe as it'd
  1367. * return -EEXIST). The problem comes at the next
  1368. * bounce though: that racing UFFDIO_COPY would
  1369. * generate zeropages in the area_src, so invalidating
  1370. * the previous MADV_DONTNEED. Without this additional
  1371. * MADV_DONTNEED those zeropages leftovers in the
  1372. * area_src would lead to -EEXIST failure during the
  1373. * next bounce, effectively leaving a zeropage in the
  1374. * area_dst.
  1375. *
  1376. * Try to comment this out madvise to see the memory
  1377. * corruption being caught pretty quick.
  1378. *
  1379. * khugepaged is also inhibited to collapse THP after
  1380. * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
  1381. * required to MADV_DONTNEED here.
  1382. */
  1383. uffd_test_ops->release_pages(area_dst);
  1384. uffd_stats_reset(uffd_stats, nr_cpus);
  1385. /* bounce pass */
  1386. if (stress(uffd_stats))
  1387. return 1;
  1388. /* Clear all the write protections if there is any */
  1389. if (test_uffdio_wp)
  1390. wp_range(uffd, (unsigned long)area_dst,
  1391. nr_pages * page_size, false);
  1392. /* unregister */
  1393. if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
  1394. err("unregister failure");
  1395. if (area_dst_alias) {
  1396. uffdio_register.range.start = (unsigned long) area_dst;
  1397. if (ioctl(uffd, UFFDIO_UNREGISTER,
  1398. &uffdio_register.range))
  1399. err("unregister failure alias");
  1400. }
  1401. /* verification */
  1402. if (bounces & BOUNCE_VERIFY)
  1403. for (nr = 0; nr < nr_pages; nr++)
  1404. if (*area_count(area_dst, nr) != count_verify[nr])
  1405. err("error area_count %llu %llu %lu\n",
  1406. *area_count(area_src, nr),
  1407. count_verify[nr], nr);
  1408. /* prepare next bounce */
  1409. swap(area_src, area_dst);
  1410. swap(area_src_alias, area_dst_alias);
  1411. uffd_stats_report(uffd_stats, nr_cpus);
  1412. }
  1413. if (test_type == TEST_ANON) {
  1414. /*
  1415. * shmem/hugetlb won't be able to run since they have different
  1416. * behavior on fork() (file-backed memory normally drops ptes
  1417. * directly when fork), meanwhile the pagemap test will verify
  1418. * pgtable entry of fork()ed child.
  1419. */
  1420. userfaultfd_pagemap_test(page_size);
  1421. /*
  1422. * Hard-code for x86_64 for now for 2M THP, as x86_64 is
  1423. * currently the only one that supports uffd-wp
  1424. */
  1425. userfaultfd_pagemap_test(page_size * 512);
  1426. }
  1427. return userfaultfd_zeropage_test() || userfaultfd_sig_test()
  1428. || userfaultfd_events_test() || userfaultfd_minor_test();
  1429. }
  1430. /*
  1431. * Copied from mlock2-tests.c
  1432. */
  1433. unsigned long default_huge_page_size(void)
  1434. {
  1435. unsigned long hps = 0;
  1436. char *line = NULL;
  1437. size_t linelen = 0;
  1438. FILE *f = fopen("/proc/meminfo", "r");
  1439. if (!f)
  1440. return 0;
  1441. while (getline(&line, &linelen, f) > 0) {
  1442. if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
  1443. hps <<= 10;
  1444. break;
  1445. }
  1446. }
  1447. free(line);
  1448. fclose(f);
  1449. return hps;
  1450. }
  1451. static void set_test_type(const char *type)
  1452. {
  1453. if (!strcmp(type, "anon")) {
  1454. test_type = TEST_ANON;
  1455. uffd_test_ops = &anon_uffd_test_ops;
  1456. } else if (!strcmp(type, "hugetlb")) {
  1457. test_type = TEST_HUGETLB;
  1458. uffd_test_ops = &hugetlb_uffd_test_ops;
  1459. } else if (!strcmp(type, "hugetlb_shared")) {
  1460. map_shared = true;
  1461. test_type = TEST_HUGETLB;
  1462. uffd_test_ops = &hugetlb_uffd_test_ops;
  1463. /* Minor faults require shared hugetlb; only enable here. */
  1464. test_uffdio_minor = true;
  1465. } else if (!strcmp(type, "shmem")) {
  1466. map_shared = true;
  1467. test_type = TEST_SHMEM;
  1468. uffd_test_ops = &shmem_uffd_test_ops;
  1469. test_uffdio_minor = true;
  1470. }
  1471. }
  1472. static void parse_test_type_arg(const char *raw_type)
  1473. {
  1474. char *buf = strdup(raw_type);
  1475. uint64_t features = UFFD_API_FEATURES;
  1476. while (buf) {
  1477. const char *token = strsep(&buf, ":");
  1478. if (!test_type)
  1479. set_test_type(token);
  1480. else if (!strcmp(token, "dev"))
  1481. test_dev_userfaultfd = true;
  1482. else if (!strcmp(token, "syscall"))
  1483. test_dev_userfaultfd = false;
  1484. else if (!strcmp(token, "collapse"))
  1485. test_collapse = true;
  1486. else
  1487. err("unrecognized test mod '%s'", token);
  1488. }
  1489. if (!test_type)
  1490. err("failed to parse test type argument: '%s'", raw_type);
  1491. if (test_collapse && test_type != TEST_SHMEM)
  1492. err("Unsupported test: %s", raw_type);
  1493. if (test_type == TEST_HUGETLB)
  1494. page_size = hpage_size;
  1495. else
  1496. page_size = sysconf(_SC_PAGE_SIZE);
  1497. if (!page_size)
  1498. err("Unable to determine page size");
  1499. if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
  1500. > page_size)
  1501. err("Impossible to run this test");
  1502. /*
  1503. * Whether we can test certain features depends not just on test type,
  1504. * but also on whether or not this particular kernel supports the
  1505. * feature.
  1506. */
  1507. userfaultfd_open(&features);
  1508. test_uffdio_wp = test_uffdio_wp &&
  1509. (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
  1510. test_uffdio_minor = test_uffdio_minor &&
  1511. (features & uffd_minor_feature());
  1512. close(uffd);
  1513. uffd = -1;
  1514. }
  1515. static void sigalrm(int sig)
  1516. {
  1517. if (sig != SIGALRM)
  1518. abort();
  1519. test_uffdio_copy_eexist = true;
  1520. test_uffdio_zeropage_eexist = true;
  1521. alarm(ALARM_INTERVAL_SECS);
  1522. }
  1523. int main(int argc, char **argv)
  1524. {
  1525. size_t bytes;
  1526. if (argc < 4)
  1527. usage();
  1528. if (signal(SIGALRM, sigalrm) == SIG_ERR)
  1529. err("failed to arm SIGALRM");
  1530. alarm(ALARM_INTERVAL_SECS);
  1531. hpage_size = default_huge_page_size();
  1532. parse_test_type_arg(argv[1]);
  1533. bytes = atol(argv[2]) * 1024 * 1024;
  1534. if (test_collapse && bytes & (hpage_size - 1))
  1535. err("MiB must be multiple of %lu if :collapse mod set",
  1536. hpage_size >> 20);
  1537. nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
  1538. if (test_collapse) {
  1539. /* nr_cpus must divide (bytes / page_size), otherwise,
  1540. * area allocations of (nr_pages * paze_size) won't be a
  1541. * multiple of hpage_size, even if bytes is a multiple of
  1542. * hpage_size.
  1543. *
  1544. * This means that nr_cpus must divide (N * (2 << (H-P))
  1545. * where:
  1546. * bytes = hpage_size * N
  1547. * hpage_size = 2 << H
  1548. * page_size = 2 << P
  1549. *
  1550. * And we want to chose nr_cpus to be the largest value
  1551. * satisfying this constraint, not larger than the number
  1552. * of online CPUs. Unfortunately, prime factorization of
  1553. * N and nr_cpus may be arbitrary, so have to search for it.
  1554. * Instead, just use the highest power of 2 dividing both
  1555. * nr_cpus and (bytes / page_size).
  1556. */
  1557. int x = factor_of_2(nr_cpus);
  1558. int y = factor_of_2(bytes / page_size);
  1559. nr_cpus = x < y ? x : y;
  1560. }
  1561. nr_pages_per_cpu = bytes / page_size / nr_cpus;
  1562. if (!nr_pages_per_cpu) {
  1563. _err("invalid MiB");
  1564. usage();
  1565. }
  1566. bounces = atoi(argv[3]);
  1567. if (bounces <= 0) {
  1568. _err("invalid bounces");
  1569. usage();
  1570. }
  1571. nr_pages = nr_pages_per_cpu * nr_cpus;
  1572. if (test_type == TEST_HUGETLB && map_shared) {
  1573. if (argc < 5)
  1574. usage();
  1575. huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
  1576. if (huge_fd < 0)
  1577. err("Open of %s failed", argv[4]);
  1578. if (ftruncate(huge_fd, 0))
  1579. err("ftruncate %s to size 0 failed", argv[4]);
  1580. } else if (test_type == TEST_SHMEM) {
  1581. shm_fd = memfd_create(argv[0], 0);
  1582. if (shm_fd < 0)
  1583. err("memfd_create");
  1584. if (ftruncate(shm_fd, nr_pages * page_size * 2))
  1585. err("ftruncate");
  1586. if (fallocate(shm_fd,
  1587. FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
  1588. nr_pages * page_size * 2))
  1589. err("fallocate");
  1590. }
  1591. printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
  1592. nr_pages, nr_pages_per_cpu);
  1593. return userfaultfd_stress();
  1594. }
  1595. #else /* __NR_userfaultfd */
  1596. #warning "missing __NR_userfaultfd definition"
  1597. int main(void)
  1598. {
  1599. printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
  1600. return KSFT_SKIP;
  1601. }
  1602. #endif /* __NR_userfaultfd */