protection_keys.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
  4. *
  5. * There are examples in here of:
  6. * * how to set protection keys on memory
  7. * * how to set/clear bits in pkey registers (the rights register)
  8. * * how to handle SEGV_PKUERR signals and extract pkey-relevant
  9. * information from the siginfo
  10. *
  11. * Things to add:
  12. * make sure KSM and KSM COW breaking works
  13. * prefault pages in at malloc, or not
  14. * protect MPX bounds tables with protection keys?
  15. * make sure VMA splitting/merging is working correctly
  16. * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
  17. * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
  18. * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
  19. *
  20. * Compile like this:
  21. * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  22. * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  23. */
  24. #define _GNU_SOURCE
  25. #define __SANE_USERSPACE_TYPES__
  26. #include <errno.h>
  27. #include <linux/elf.h>
  28. #include <linux/futex.h>
  29. #include <time.h>
  30. #include <sys/time.h>
  31. #include <sys/syscall.h>
  32. #include <string.h>
  33. #include <stdio.h>
  34. #include <stdint.h>
  35. #include <stdbool.h>
  36. #include <signal.h>
  37. #include <assert.h>
  38. #include <stdlib.h>
  39. #include <ucontext.h>
  40. #include <sys/mman.h>
  41. #include <sys/types.h>
  42. #include <sys/wait.h>
  43. #include <sys/stat.h>
  44. #include <fcntl.h>
  45. #include <unistd.h>
  46. #include <sys/ptrace.h>
  47. #include <setjmp.h>
  48. #include "pkey-helpers.h"
  49. int iteration_nr = 1;
  50. int test_nr;
  51. u64 shadow_pkey_reg;
  52. int dprint_in_signal;
  53. char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
  54. void cat_into_file(char *str, char *file)
  55. {
  56. int fd = open(file, O_RDWR);
  57. int ret;
  58. dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
  59. /*
  60. * these need to be raw because they are called under
  61. * pkey_assert()
  62. */
  63. if (fd < 0) {
  64. fprintf(stderr, "error opening '%s'\n", str);
  65. perror("error: ");
  66. exit(__LINE__);
  67. }
  68. ret = write(fd, str, strlen(str));
  69. if (ret != strlen(str)) {
  70. perror("write to file failed");
  71. fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
  72. exit(__LINE__);
  73. }
  74. close(fd);
  75. }
  76. #if CONTROL_TRACING > 0
  77. static int warned_tracing;
  78. int tracing_root_ok(void)
  79. {
  80. if (geteuid() != 0) {
  81. if (!warned_tracing)
  82. fprintf(stderr, "WARNING: not run as root, "
  83. "can not do tracing control\n");
  84. warned_tracing = 1;
  85. return 0;
  86. }
  87. return 1;
  88. }
  89. #endif
  90. void tracing_on(void)
  91. {
  92. #if CONTROL_TRACING > 0
  93. #define TRACEDIR "/sys/kernel/debug/tracing"
  94. char pidstr[32];
  95. if (!tracing_root_ok())
  96. return;
  97. sprintf(pidstr, "%d", getpid());
  98. cat_into_file("0", TRACEDIR "/tracing_on");
  99. cat_into_file("\n", TRACEDIR "/trace");
  100. if (1) {
  101. cat_into_file("function_graph", TRACEDIR "/current_tracer");
  102. cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
  103. } else {
  104. cat_into_file("nop", TRACEDIR "/current_tracer");
  105. }
  106. cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
  107. cat_into_file("1", TRACEDIR "/tracing_on");
  108. dprintf1("enabled tracing\n");
  109. #endif
  110. }
  111. void tracing_off(void)
  112. {
  113. #if CONTROL_TRACING > 0
  114. if (!tracing_root_ok())
  115. return;
  116. cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
  117. #endif
  118. }
  119. void abort_hooks(void)
  120. {
  121. fprintf(stderr, "running %s()...\n", __func__);
  122. tracing_off();
  123. #ifdef SLEEP_ON_ABORT
  124. sleep(SLEEP_ON_ABORT);
  125. #endif
  126. }
  127. /*
  128. * This attempts to have roughly a page of instructions followed by a few
  129. * instructions that do a write, and another page of instructions. That
  130. * way, we are pretty sure that the write is in the second page of
  131. * instructions and has at least a page of padding behind it.
  132. *
  133. * *That* lets us be sure to madvise() away the write instruction, which
  134. * will then fault, which makes sure that the fault code handles
  135. * execute-only memory properly.
  136. */
  137. #ifdef __powerpc64__
  138. /* This way, both 4K and 64K alignment are maintained */
  139. __attribute__((__aligned__(65536)))
  140. #else
  141. __attribute__((__aligned__(PAGE_SIZE)))
  142. #endif
  143. void lots_o_noops_around_write(int *write_to_me)
  144. {
  145. dprintf3("running %s()\n", __func__);
  146. __page_o_noops();
  147. /* Assume this happens in the second page of instructions: */
  148. *write_to_me = __LINE__;
  149. /* pad out by another page: */
  150. __page_o_noops();
  151. dprintf3("%s() done\n", __func__);
  152. }
  153. void dump_mem(void *dumpme, int len_bytes)
  154. {
  155. char *c = (void *)dumpme;
  156. int i;
  157. for (i = 0; i < len_bytes; i += sizeof(u64)) {
  158. u64 *ptr = (u64 *)(c + i);
  159. dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
  160. }
  161. }
  162. static u32 hw_pkey_get(int pkey, unsigned long flags)
  163. {
  164. u64 pkey_reg = __read_pkey_reg();
  165. dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
  166. __func__, pkey, flags, 0, 0);
  167. dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg);
  168. return (u32) get_pkey_bits(pkey_reg, pkey);
  169. }
  170. static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
  171. {
  172. u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
  173. u64 old_pkey_reg = __read_pkey_reg();
  174. u64 new_pkey_reg;
  175. /* make sure that 'rights' only contains the bits we expect: */
  176. assert(!(rights & ~mask));
  177. /* modify bits accordingly in old pkey_reg and assign it */
  178. new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights);
  179. __write_pkey_reg(new_pkey_reg);
  180. dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
  181. " pkey_reg now: %016llx old_pkey_reg: %016llx\n",
  182. __func__, pkey, rights, flags, 0, __read_pkey_reg(),
  183. old_pkey_reg);
  184. return 0;
  185. }
  186. void pkey_disable_set(int pkey, int flags)
  187. {
  188. unsigned long syscall_flags = 0;
  189. int ret;
  190. int pkey_rights;
  191. u64 orig_pkey_reg = read_pkey_reg();
  192. dprintf1("START->%s(%d, 0x%x)\n", __func__,
  193. pkey, flags);
  194. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  195. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  196. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  197. pkey, pkey, pkey_rights);
  198. pkey_assert(pkey_rights >= 0);
  199. pkey_rights |= flags;
  200. ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
  201. assert(!ret);
  202. /* pkey_reg and flags have the same format */
  203. shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
  204. dprintf1("%s(%d) shadow: 0x%016llx\n",
  205. __func__, pkey, shadow_pkey_reg);
  206. pkey_assert(ret >= 0);
  207. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  208. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  209. pkey, pkey, pkey_rights);
  210. dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
  211. __func__, pkey, read_pkey_reg());
  212. if (flags)
  213. pkey_assert(read_pkey_reg() >= orig_pkey_reg);
  214. dprintf1("END<---%s(%d, 0x%x)\n", __func__,
  215. pkey, flags);
  216. }
  217. void pkey_disable_clear(int pkey, int flags)
  218. {
  219. unsigned long syscall_flags = 0;
  220. int ret;
  221. int pkey_rights = hw_pkey_get(pkey, syscall_flags);
  222. u64 orig_pkey_reg = read_pkey_reg();
  223. pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
  224. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  225. pkey, pkey, pkey_rights);
  226. pkey_assert(pkey_rights >= 0);
  227. pkey_rights &= ~flags;
  228. ret = hw_pkey_set(pkey, pkey_rights, 0);
  229. shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
  230. pkey_assert(ret >= 0);
  231. pkey_rights = hw_pkey_get(pkey, syscall_flags);
  232. dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
  233. pkey, pkey, pkey_rights);
  234. dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
  235. pkey, read_pkey_reg());
  236. if (flags)
  237. assert(read_pkey_reg() <= orig_pkey_reg);
  238. }
  239. void pkey_write_allow(int pkey)
  240. {
  241. pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
  242. }
  243. void pkey_write_deny(int pkey)
  244. {
  245. pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
  246. }
  247. void pkey_access_allow(int pkey)
  248. {
  249. pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
  250. }
  251. void pkey_access_deny(int pkey)
  252. {
  253. pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
  254. }
  255. /* Failed address bound checks: */
  256. #ifndef SEGV_BNDERR
  257. # define SEGV_BNDERR 3
  258. #endif
  259. #ifndef SEGV_PKUERR
  260. # define SEGV_PKUERR 4
  261. #endif
  262. static char *si_code_str(int si_code)
  263. {
  264. if (si_code == SEGV_MAPERR)
  265. return "SEGV_MAPERR";
  266. if (si_code == SEGV_ACCERR)
  267. return "SEGV_ACCERR";
  268. if (si_code == SEGV_BNDERR)
  269. return "SEGV_BNDERR";
  270. if (si_code == SEGV_PKUERR)
  271. return "SEGV_PKUERR";
  272. return "UNKNOWN";
  273. }
  274. int pkey_faults;
  275. int last_si_pkey = -1;
  276. void signal_handler(int signum, siginfo_t *si, void *vucontext)
  277. {
  278. ucontext_t *uctxt = vucontext;
  279. int trapno;
  280. unsigned long ip;
  281. char *fpregs;
  282. #if defined(__i386__) || defined(__x86_64__) /* arch */
  283. u32 *pkey_reg_ptr;
  284. int pkey_reg_offset;
  285. #endif /* arch */
  286. u64 siginfo_pkey;
  287. u32 *si_pkey_ptr;
  288. dprint_in_signal = 1;
  289. dprintf1(">>>>===============SIGSEGV============================\n");
  290. dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
  291. __func__, __LINE__,
  292. __read_pkey_reg(), shadow_pkey_reg);
  293. trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
  294. ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
  295. fpregs = (char *) uctxt->uc_mcontext.fpregs;
  296. dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
  297. __func__, trapno, ip, si_code_str(si->si_code),
  298. si->si_code);
  299. #if defined(__i386__) || defined(__x86_64__) /* arch */
  300. #ifdef __i386__
  301. /*
  302. * 32-bit has some extra padding so that userspace can tell whether
  303. * the XSTATE header is present in addition to the "legacy" FPU
  304. * state. We just assume that it is here.
  305. */
  306. fpregs += 0x70;
  307. #endif /* i386 */
  308. pkey_reg_offset = pkey_reg_xstate_offset();
  309. pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
  310. /*
  311. * If we got a PKEY fault, we *HAVE* to have at least one bit set in
  312. * here.
  313. */
  314. dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
  315. if (DEBUG_LEVEL > 4)
  316. dump_mem(pkey_reg_ptr - 128, 256);
  317. pkey_assert(*pkey_reg_ptr);
  318. #endif /* arch */
  319. dprintf1("siginfo: %p\n", si);
  320. dprintf1(" fpregs: %p\n", fpregs);
  321. if ((si->si_code == SEGV_MAPERR) ||
  322. (si->si_code == SEGV_ACCERR) ||
  323. (si->si_code == SEGV_BNDERR)) {
  324. printf("non-PK si_code, exiting...\n");
  325. exit(4);
  326. }
  327. si_pkey_ptr = siginfo_get_pkey_ptr(si);
  328. dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
  329. dump_mem((u8 *)si_pkey_ptr - 8, 24);
  330. siginfo_pkey = *si_pkey_ptr;
  331. pkey_assert(siginfo_pkey < NR_PKEYS);
  332. last_si_pkey = siginfo_pkey;
  333. /*
  334. * need __read_pkey_reg() version so we do not do shadow_pkey_reg
  335. * checking
  336. */
  337. dprintf1("signal pkey_reg from pkey_reg: %016llx\n",
  338. __read_pkey_reg());
  339. dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey);
  340. #if defined(__i386__) || defined(__x86_64__) /* arch */
  341. dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
  342. *(u64 *)pkey_reg_ptr = 0x00000000;
  343. dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n");
  344. #elif defined(__powerpc64__) /* arch */
  345. /* restore access and let the faulting instruction continue */
  346. pkey_access_allow(siginfo_pkey);
  347. #endif /* arch */
  348. pkey_faults++;
  349. dprintf1("<<<<==================================================\n");
  350. dprint_in_signal = 0;
  351. }
  352. int wait_all_children(void)
  353. {
  354. int status;
  355. return waitpid(-1, &status, 0);
  356. }
  357. void sig_chld(int x)
  358. {
  359. dprint_in_signal = 1;
  360. dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
  361. dprint_in_signal = 0;
  362. }
  363. void setup_sigsegv_handler(void)
  364. {
  365. int r, rs;
  366. struct sigaction newact;
  367. struct sigaction oldact;
  368. /* #PF is mapped to sigsegv */
  369. int signum = SIGSEGV;
  370. newact.sa_handler = 0;
  371. newact.sa_sigaction = signal_handler;
  372. /*sigset_t - signals to block while in the handler */
  373. /* get the old signal mask. */
  374. rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
  375. pkey_assert(rs == 0);
  376. /* call sa_sigaction, not sa_handler*/
  377. newact.sa_flags = SA_SIGINFO;
  378. newact.sa_restorer = 0; /* void(*)(), obsolete */
  379. r = sigaction(signum, &newact, &oldact);
  380. r = sigaction(SIGALRM, &newact, &oldact);
  381. pkey_assert(r == 0);
  382. }
  383. void setup_handlers(void)
  384. {
  385. signal(SIGCHLD, &sig_chld);
  386. setup_sigsegv_handler();
  387. }
  388. pid_t fork_lazy_child(void)
  389. {
  390. pid_t forkret;
  391. forkret = fork();
  392. pkey_assert(forkret >= 0);
  393. dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
  394. if (!forkret) {
  395. /* in the child */
  396. while (1) {
  397. dprintf1("child sleeping...\n");
  398. sleep(30);
  399. }
  400. }
  401. return forkret;
  402. }
  403. int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  404. unsigned long pkey)
  405. {
  406. int sret;
  407. dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
  408. ptr, size, orig_prot, pkey);
  409. errno = 0;
  410. sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
  411. if (errno) {
  412. dprintf2("SYS_mprotect_key sret: %d\n", sret);
  413. dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
  414. dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
  415. if (DEBUG_LEVEL >= 2)
  416. perror("SYS_mprotect_pkey");
  417. }
  418. return sret;
  419. }
  420. int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
  421. {
  422. int ret = syscall(SYS_pkey_alloc, flags, init_val);
  423. dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
  424. __func__, flags, init_val, ret, errno);
  425. return ret;
  426. }
  427. int alloc_pkey(void)
  428. {
  429. int ret;
  430. unsigned long init_val = 0x0;
  431. dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
  432. __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
  433. ret = sys_pkey_alloc(0, init_val);
  434. /*
  435. * pkey_alloc() sets PKEY register, so we need to reflect it in
  436. * shadow_pkey_reg:
  437. */
  438. dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  439. " shadow: 0x%016llx\n",
  440. __func__, __LINE__, ret, __read_pkey_reg(),
  441. shadow_pkey_reg);
  442. if (ret > 0) {
  443. /* clear both the bits: */
  444. shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
  445. ~PKEY_MASK);
  446. dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  447. " shadow: 0x%016llx\n",
  448. __func__,
  449. __LINE__, ret, __read_pkey_reg(),
  450. shadow_pkey_reg);
  451. /*
  452. * move the new state in from init_val
  453. * (remember, we cheated and init_val == pkey_reg format)
  454. */
  455. shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
  456. init_val);
  457. }
  458. dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  459. " shadow: 0x%016llx\n",
  460. __func__, __LINE__, ret, __read_pkey_reg(),
  461. shadow_pkey_reg);
  462. dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
  463. /* for shadow checking: */
  464. read_pkey_reg();
  465. dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  466. " shadow: 0x%016llx\n",
  467. __func__, __LINE__, ret, __read_pkey_reg(),
  468. shadow_pkey_reg);
  469. return ret;
  470. }
  471. int sys_pkey_free(unsigned long pkey)
  472. {
  473. int ret = syscall(SYS_pkey_free, pkey);
  474. dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
  475. return ret;
  476. }
  477. /*
  478. * I had a bug where pkey bits could be set by mprotect() but
  479. * not cleared. This ensures we get lots of random bit sets
  480. * and clears on the vma and pte pkey bits.
  481. */
  482. int alloc_random_pkey(void)
  483. {
  484. int max_nr_pkey_allocs;
  485. int ret;
  486. int i;
  487. int alloced_pkeys[NR_PKEYS];
  488. int nr_alloced = 0;
  489. int random_index;
  490. memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
  491. /* allocate every possible key and make a note of which ones we got */
  492. max_nr_pkey_allocs = NR_PKEYS;
  493. for (i = 0; i < max_nr_pkey_allocs; i++) {
  494. int new_pkey = alloc_pkey();
  495. if (new_pkey < 0)
  496. break;
  497. alloced_pkeys[nr_alloced++] = new_pkey;
  498. }
  499. pkey_assert(nr_alloced > 0);
  500. /* select a random one out of the allocated ones */
  501. random_index = rand() % nr_alloced;
  502. ret = alloced_pkeys[random_index];
  503. /* now zero it out so we don't free it next */
  504. alloced_pkeys[random_index] = 0;
  505. /* go through the allocated ones that we did not want and free them */
  506. for (i = 0; i < nr_alloced; i++) {
  507. int free_ret;
  508. if (!alloced_pkeys[i])
  509. continue;
  510. free_ret = sys_pkey_free(alloced_pkeys[i]);
  511. pkey_assert(!free_ret);
  512. }
  513. dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  514. " shadow: 0x%016llx\n", __func__,
  515. __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
  516. return ret;
  517. }
  518. int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
  519. unsigned long pkey)
  520. {
  521. int nr_iterations = random() % 100;
  522. int ret;
  523. while (0) {
  524. int rpkey = alloc_random_pkey();
  525. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  526. dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  527. ptr, size, orig_prot, pkey, ret);
  528. if (nr_iterations-- < 0)
  529. break;
  530. dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  531. " shadow: 0x%016llx\n",
  532. __func__, __LINE__, ret, __read_pkey_reg(),
  533. shadow_pkey_reg);
  534. sys_pkey_free(rpkey);
  535. dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  536. " shadow: 0x%016llx\n",
  537. __func__, __LINE__, ret, __read_pkey_reg(),
  538. shadow_pkey_reg);
  539. }
  540. pkey_assert(pkey < NR_PKEYS);
  541. ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
  542. dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
  543. ptr, size, orig_prot, pkey, ret);
  544. pkey_assert(!ret);
  545. dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
  546. " shadow: 0x%016llx\n", __func__,
  547. __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
  548. return ret;
  549. }
  550. struct pkey_malloc_record {
  551. void *ptr;
  552. long size;
  553. int prot;
  554. };
  555. struct pkey_malloc_record *pkey_malloc_records;
  556. struct pkey_malloc_record *pkey_last_malloc_record;
  557. long nr_pkey_malloc_records;
  558. void record_pkey_malloc(void *ptr, long size, int prot)
  559. {
  560. long i;
  561. struct pkey_malloc_record *rec = NULL;
  562. for (i = 0; i < nr_pkey_malloc_records; i++) {
  563. rec = &pkey_malloc_records[i];
  564. /* find a free record */
  565. if (rec)
  566. break;
  567. }
  568. if (!rec) {
  569. /* every record is full */
  570. size_t old_nr_records = nr_pkey_malloc_records;
  571. size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
  572. size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
  573. dprintf2("new_nr_records: %zd\n", new_nr_records);
  574. dprintf2("new_size: %zd\n", new_size);
  575. pkey_malloc_records = realloc(pkey_malloc_records, new_size);
  576. pkey_assert(pkey_malloc_records != NULL);
  577. rec = &pkey_malloc_records[nr_pkey_malloc_records];
  578. /*
  579. * realloc() does not initialize memory, so zero it from
  580. * the first new record all the way to the end.
  581. */
  582. for (i = 0; i < new_nr_records - old_nr_records; i++)
  583. memset(rec + i, 0, sizeof(*rec));
  584. }
  585. dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
  586. (int)(rec - pkey_malloc_records), rec, ptr, size);
  587. rec->ptr = ptr;
  588. rec->size = size;
  589. rec->prot = prot;
  590. pkey_last_malloc_record = rec;
  591. nr_pkey_malloc_records++;
  592. }
  593. void free_pkey_malloc(void *ptr)
  594. {
  595. long i;
  596. int ret;
  597. dprintf3("%s(%p)\n", __func__, ptr);
  598. for (i = 0; i < nr_pkey_malloc_records; i++) {
  599. struct pkey_malloc_record *rec = &pkey_malloc_records[i];
  600. dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
  601. ptr, i, rec, rec->ptr, rec->size);
  602. if ((ptr < rec->ptr) ||
  603. (ptr >= rec->ptr + rec->size))
  604. continue;
  605. dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
  606. ptr, i, rec, rec->ptr, rec->size);
  607. nr_pkey_malloc_records--;
  608. ret = munmap(rec->ptr, rec->size);
  609. dprintf3("munmap ret: %d\n", ret);
  610. pkey_assert(!ret);
  611. dprintf3("clearing rec->ptr, rec: %p\n", rec);
  612. rec->ptr = NULL;
  613. dprintf3("done clearing rec->ptr, rec: %p\n", rec);
  614. return;
  615. }
  616. pkey_assert(false);
  617. }
  618. void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
  619. {
  620. void *ptr;
  621. int ret;
  622. read_pkey_reg();
  623. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  624. size, prot, pkey);
  625. pkey_assert(pkey < NR_PKEYS);
  626. ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  627. pkey_assert(ptr != (void *)-1);
  628. ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
  629. pkey_assert(!ret);
  630. record_pkey_malloc(ptr, size, prot);
  631. read_pkey_reg();
  632. dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
  633. return ptr;
  634. }
  635. void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
  636. {
  637. int ret;
  638. void *ptr;
  639. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  640. size, prot, pkey);
  641. /*
  642. * Guarantee we can fit at least one huge page in the resulting
  643. * allocation by allocating space for 2:
  644. */
  645. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  646. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  647. pkey_assert(ptr != (void *)-1);
  648. record_pkey_malloc(ptr, size, prot);
  649. mprotect_pkey(ptr, size, prot, pkey);
  650. dprintf1("unaligned ptr: %p\n", ptr);
  651. ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
  652. dprintf1(" aligned ptr: %p\n", ptr);
  653. ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
  654. dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
  655. ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
  656. dprintf1("MADV_WILLNEED ret: %d\n", ret);
  657. memset(ptr, 0, HPAGE_SIZE);
  658. dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
  659. return ptr;
  660. }
  661. int hugetlb_setup_ok;
  662. #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
  663. #define GET_NR_HUGE_PAGES 10
  664. void setup_hugetlbfs(void)
  665. {
  666. int err;
  667. int fd;
  668. char buf[256];
  669. long hpagesz_kb;
  670. long hpagesz_mb;
  671. if (geteuid() != 0) {
  672. fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
  673. return;
  674. }
  675. cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
  676. /*
  677. * Now go make sure that we got the pages and that they
  678. * are PMD-level pages. Someone might have made PUD-level
  679. * pages the default.
  680. */
  681. hpagesz_kb = HPAGE_SIZE / 1024;
  682. hpagesz_mb = hpagesz_kb / 1024;
  683. sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
  684. fd = open(buf, O_RDONLY);
  685. if (fd < 0) {
  686. fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
  687. hpagesz_mb, strerror(errno));
  688. return;
  689. }
  690. /* -1 to guarantee leaving the trailing \0 */
  691. err = read(fd, buf, sizeof(buf)-1);
  692. close(fd);
  693. if (err <= 0) {
  694. fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n",
  695. hpagesz_mb, strerror(errno));
  696. return;
  697. }
  698. if (atoi(buf) != GET_NR_HUGE_PAGES) {
  699. fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n",
  700. hpagesz_mb, buf, GET_NR_HUGE_PAGES);
  701. return;
  702. }
  703. hugetlb_setup_ok = 1;
  704. }
  705. void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
  706. {
  707. void *ptr;
  708. int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
  709. if (!hugetlb_setup_ok)
  710. return PTR_ERR_ENOTSUP;
  711. dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
  712. size = ALIGN_UP(size, HPAGE_SIZE * 2);
  713. pkey_assert(pkey < NR_PKEYS);
  714. ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
  715. pkey_assert(ptr != (void *)-1);
  716. mprotect_pkey(ptr, size, prot, pkey);
  717. record_pkey_malloc(ptr, size, prot);
  718. dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
  719. return ptr;
  720. }
  721. void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
  722. {
  723. void *ptr;
  724. int fd;
  725. dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
  726. size, prot, pkey);
  727. pkey_assert(pkey < NR_PKEYS);
  728. fd = open("/dax/foo", O_RDWR);
  729. pkey_assert(fd >= 0);
  730. ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
  731. pkey_assert(ptr != (void *)-1);
  732. mprotect_pkey(ptr, size, prot, pkey);
  733. record_pkey_malloc(ptr, size, prot);
  734. dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
  735. close(fd);
  736. return ptr;
  737. }
  738. void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
  739. malloc_pkey_with_mprotect,
  740. malloc_pkey_with_mprotect_subpage,
  741. malloc_pkey_anon_huge,
  742. malloc_pkey_hugetlb
  743. /* can not do direct with the pkey_mprotect() API:
  744. malloc_pkey_mmap_direct,
  745. malloc_pkey_mmap_dax,
  746. */
  747. };
  748. void *malloc_pkey(long size, int prot, u16 pkey)
  749. {
  750. void *ret;
  751. static int malloc_type;
  752. int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
  753. pkey_assert(pkey < NR_PKEYS);
  754. while (1) {
  755. pkey_assert(malloc_type < nr_malloc_types);
  756. ret = pkey_malloc[malloc_type](size, prot, pkey);
  757. pkey_assert(ret != (void *)-1);
  758. malloc_type++;
  759. if (malloc_type >= nr_malloc_types)
  760. malloc_type = (random()%nr_malloc_types);
  761. /* try again if the malloc_type we tried is unsupported */
  762. if (ret == PTR_ERR_ENOTSUP)
  763. continue;
  764. break;
  765. }
  766. dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
  767. size, prot, pkey, ret);
  768. return ret;
  769. }
  770. int last_pkey_faults;
  771. #define UNKNOWN_PKEY -2
  772. void expected_pkey_fault(int pkey)
  773. {
  774. dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
  775. __func__, last_pkey_faults, pkey_faults);
  776. dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
  777. pkey_assert(last_pkey_faults + 1 == pkey_faults);
  778. /*
  779. * For exec-only memory, we do not know the pkey in
  780. * advance, so skip this check.
  781. */
  782. if (pkey != UNKNOWN_PKEY)
  783. pkey_assert(last_si_pkey == pkey);
  784. #if defined(__i386__) || defined(__x86_64__) /* arch */
  785. /*
  786. * The signal handler shold have cleared out PKEY register to let the
  787. * test program continue. We now have to restore it.
  788. */
  789. if (__read_pkey_reg() != 0)
  790. #else /* arch */
  791. if (__read_pkey_reg() != shadow_pkey_reg)
  792. #endif /* arch */
  793. pkey_assert(0);
  794. __write_pkey_reg(shadow_pkey_reg);
  795. dprintf1("%s() set pkey_reg=%016llx to restore state after signal "
  796. "nuked it\n", __func__, shadow_pkey_reg);
  797. last_pkey_faults = pkey_faults;
  798. last_si_pkey = -1;
  799. }
  800. #define do_not_expect_pkey_fault(msg) do { \
  801. if (last_pkey_faults != pkey_faults) \
  802. dprintf0("unexpected PKey fault: %s\n", msg); \
  803. pkey_assert(last_pkey_faults == pkey_faults); \
  804. } while (0)
  805. int test_fds[10] = { -1 };
  806. int nr_test_fds;
  807. void __save_test_fd(int fd)
  808. {
  809. pkey_assert(fd >= 0);
  810. pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
  811. test_fds[nr_test_fds] = fd;
  812. nr_test_fds++;
  813. }
  814. int get_test_read_fd(void)
  815. {
  816. int test_fd = open("/etc/passwd", O_RDONLY);
  817. __save_test_fd(test_fd);
  818. return test_fd;
  819. }
  820. void close_test_fds(void)
  821. {
  822. int i;
  823. for (i = 0; i < nr_test_fds; i++) {
  824. if (test_fds[i] < 0)
  825. continue;
  826. close(test_fds[i]);
  827. test_fds[i] = -1;
  828. }
  829. nr_test_fds = 0;
  830. }
  831. #define barrier() __asm__ __volatile__("": : :"memory")
  832. __attribute__((noinline)) int read_ptr(int *ptr)
  833. {
  834. /*
  835. * Keep GCC from optimizing this away somehow
  836. */
  837. barrier();
  838. return *ptr;
  839. }
  840. void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
  841. {
  842. int i, err;
  843. int max_nr_pkey_allocs;
  844. int alloced_pkeys[NR_PKEYS];
  845. int nr_alloced = 0;
  846. long size;
  847. pkey_assert(pkey_last_malloc_record);
  848. size = pkey_last_malloc_record->size;
  849. /*
  850. * This is a bit of a hack. But mprotect() requires
  851. * huge-page-aligned sizes when operating on hugetlbfs.
  852. * So, make sure that we use something that's a multiple
  853. * of a huge page when we can.
  854. */
  855. if (size >= HPAGE_SIZE)
  856. size = HPAGE_SIZE;
  857. /* allocate every possible key and make sure key-0 never got allocated */
  858. max_nr_pkey_allocs = NR_PKEYS;
  859. for (i = 0; i < max_nr_pkey_allocs; i++) {
  860. int new_pkey = alloc_pkey();
  861. pkey_assert(new_pkey != 0);
  862. if (new_pkey < 0)
  863. break;
  864. alloced_pkeys[nr_alloced++] = new_pkey;
  865. }
  866. /* free all the allocated keys */
  867. for (i = 0; i < nr_alloced; i++) {
  868. int free_ret;
  869. if (!alloced_pkeys[i])
  870. continue;
  871. free_ret = sys_pkey_free(alloced_pkeys[i]);
  872. pkey_assert(!free_ret);
  873. }
  874. /* attach key-0 in various modes */
  875. err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
  876. pkey_assert(!err);
  877. err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
  878. pkey_assert(!err);
  879. err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
  880. pkey_assert(!err);
  881. err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
  882. pkey_assert(!err);
  883. err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
  884. pkey_assert(!err);
  885. }
  886. void test_read_of_write_disabled_region(int *ptr, u16 pkey)
  887. {
  888. int ptr_contents;
  889. dprintf1("disabling write access to PKEY[1], doing read\n");
  890. pkey_write_deny(pkey);
  891. ptr_contents = read_ptr(ptr);
  892. dprintf1("*ptr: %d\n", ptr_contents);
  893. dprintf1("\n");
  894. }
  895. void test_read_of_access_disabled_region(int *ptr, u16 pkey)
  896. {
  897. int ptr_contents;
  898. dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
  899. read_pkey_reg();
  900. pkey_access_deny(pkey);
  901. ptr_contents = read_ptr(ptr);
  902. dprintf1("*ptr: %d\n", ptr_contents);
  903. expected_pkey_fault(pkey);
  904. }
  905. void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
  906. u16 pkey)
  907. {
  908. int ptr_contents;
  909. dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
  910. pkey, ptr);
  911. ptr_contents = read_ptr(ptr);
  912. dprintf1("reading ptr before disabling the read : %d\n",
  913. ptr_contents);
  914. read_pkey_reg();
  915. pkey_access_deny(pkey);
  916. ptr_contents = read_ptr(ptr);
  917. dprintf1("*ptr: %d\n", ptr_contents);
  918. expected_pkey_fault(pkey);
  919. }
  920. void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
  921. u16 pkey)
  922. {
  923. *ptr = __LINE__;
  924. dprintf1("disabling write access; after accessing the page, "
  925. "to PKEY[%02d], doing write\n", pkey);
  926. pkey_write_deny(pkey);
  927. *ptr = __LINE__;
  928. expected_pkey_fault(pkey);
  929. }
  930. void test_write_of_write_disabled_region(int *ptr, u16 pkey)
  931. {
  932. dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
  933. pkey_write_deny(pkey);
  934. *ptr = __LINE__;
  935. expected_pkey_fault(pkey);
  936. }
  937. void test_write_of_access_disabled_region(int *ptr, u16 pkey)
  938. {
  939. dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
  940. pkey_access_deny(pkey);
  941. *ptr = __LINE__;
  942. expected_pkey_fault(pkey);
  943. }
  944. void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
  945. u16 pkey)
  946. {
  947. *ptr = __LINE__;
  948. dprintf1("disabling access; after accessing the page, "
  949. " to PKEY[%02d], doing write\n", pkey);
  950. pkey_access_deny(pkey);
  951. *ptr = __LINE__;
  952. expected_pkey_fault(pkey);
  953. }
  954. void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
  955. {
  956. int ret;
  957. int test_fd = get_test_read_fd();
  958. dprintf1("disabling access to PKEY[%02d], "
  959. "having kernel read() to buffer\n", pkey);
  960. pkey_access_deny(pkey);
  961. ret = read(test_fd, ptr, 1);
  962. dprintf1("read ret: %d\n", ret);
  963. pkey_assert(ret);
  964. }
  965. void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
  966. {
  967. int ret;
  968. int test_fd = get_test_read_fd();
  969. pkey_write_deny(pkey);
  970. ret = read(test_fd, ptr, 100);
  971. dprintf1("read ret: %d\n", ret);
  972. if (ret < 0 && (DEBUG_LEVEL > 0))
  973. perror("verbose read result (OK for this to be bad)");
  974. pkey_assert(ret);
  975. }
  976. void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
  977. {
  978. int pipe_ret, vmsplice_ret;
  979. struct iovec iov;
  980. int pipe_fds[2];
  981. pipe_ret = pipe(pipe_fds);
  982. pkey_assert(pipe_ret == 0);
  983. dprintf1("disabling access to PKEY[%02d], "
  984. "having kernel vmsplice from buffer\n", pkey);
  985. pkey_access_deny(pkey);
  986. iov.iov_base = ptr;
  987. iov.iov_len = PAGE_SIZE;
  988. vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
  989. dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
  990. pkey_assert(vmsplice_ret == -1);
  991. close(pipe_fds[0]);
  992. close(pipe_fds[1]);
  993. }
  994. void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
  995. {
  996. int ignored = 0xdada;
  997. int futex_ret;
  998. int some_int = __LINE__;
  999. dprintf1("disabling write to PKEY[%02d], "
  1000. "doing futex gunk in buffer\n", pkey);
  1001. *ptr = some_int;
  1002. pkey_write_deny(pkey);
  1003. futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
  1004. &ignored, ignored);
  1005. if (DEBUG_LEVEL > 0)
  1006. perror("futex");
  1007. dprintf1("futex() ret: %d\n", futex_ret);
  1008. }
  1009. /* Assumes that all pkeys other than 'pkey' are unallocated */
  1010. void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
  1011. {
  1012. int err;
  1013. int i;
  1014. /* Note: 0 is the default pkey, so don't mess with it */
  1015. for (i = 1; i < NR_PKEYS; i++) {
  1016. if (pkey == i)
  1017. continue;
  1018. dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
  1019. err = sys_pkey_free(i);
  1020. pkey_assert(err);
  1021. err = sys_pkey_free(i);
  1022. pkey_assert(err);
  1023. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
  1024. pkey_assert(err);
  1025. }
  1026. }
  1027. /* Assumes that all pkeys other than 'pkey' are unallocated */
  1028. void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
  1029. {
  1030. int err;
  1031. int bad_pkey = NR_PKEYS+99;
  1032. /* pass a known-invalid pkey in: */
  1033. err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
  1034. pkey_assert(err);
  1035. }
  1036. void become_child(void)
  1037. {
  1038. pid_t forkret;
  1039. forkret = fork();
  1040. pkey_assert(forkret >= 0);
  1041. dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
  1042. if (!forkret) {
  1043. /* in the child */
  1044. return;
  1045. }
  1046. exit(0);
  1047. }
  1048. /* Assumes that all pkeys other than 'pkey' are unallocated */
  1049. void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
  1050. {
  1051. int err;
  1052. int allocated_pkeys[NR_PKEYS] = {0};
  1053. int nr_allocated_pkeys = 0;
  1054. int i;
  1055. for (i = 0; i < NR_PKEYS*3; i++) {
  1056. int new_pkey;
  1057. dprintf1("%s() alloc loop: %d\n", __func__, i);
  1058. new_pkey = alloc_pkey();
  1059. dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx"
  1060. " shadow: 0x%016llx\n",
  1061. __func__, __LINE__, err, __read_pkey_reg(),
  1062. shadow_pkey_reg);
  1063. read_pkey_reg(); /* for shadow checking */
  1064. dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
  1065. if ((new_pkey == -1) && (errno == ENOSPC)) {
  1066. dprintf2("%s() failed to allocate pkey after %d tries\n",
  1067. __func__, nr_allocated_pkeys);
  1068. } else {
  1069. /*
  1070. * Ensure the number of successes never
  1071. * exceeds the number of keys supported
  1072. * in the hardware.
  1073. */
  1074. pkey_assert(nr_allocated_pkeys < NR_PKEYS);
  1075. allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
  1076. }
  1077. /*
  1078. * Make sure that allocation state is properly
  1079. * preserved across fork().
  1080. */
  1081. if (i == NR_PKEYS*2)
  1082. become_child();
  1083. }
  1084. dprintf3("%s()::%d\n", __func__, __LINE__);
  1085. /*
  1086. * On x86:
  1087. * There are 16 pkeys supported in hardware. Three are
  1088. * allocated by the time we get here:
  1089. * 1. The default key (0)
  1090. * 2. One possibly consumed by an execute-only mapping.
  1091. * 3. One allocated by the test code and passed in via
  1092. * 'pkey' to this function.
  1093. * Ensure that we can allocate at least another 13 (16-3).
  1094. *
  1095. * On powerpc:
  1096. * There are either 5, 28, 29 or 32 pkeys supported in
  1097. * hardware depending on the page size (4K or 64K) and
  1098. * platform (powernv or powervm). Four are allocated by
  1099. * the time we get here. These include pkey-0, pkey-1,
  1100. * exec-only pkey and the one allocated by the test code.
  1101. * Ensure that we can allocate the remaining.
  1102. */
  1103. pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
  1104. for (i = 0; i < nr_allocated_pkeys; i++) {
  1105. err = sys_pkey_free(allocated_pkeys[i]);
  1106. pkey_assert(!err);
  1107. read_pkey_reg(); /* for shadow checking */
  1108. }
  1109. }
  1110. void arch_force_pkey_reg_init(void)
  1111. {
  1112. #if defined(__i386__) || defined(__x86_64__) /* arch */
  1113. u64 *buf;
  1114. /*
  1115. * All keys should be allocated and set to allow reads and
  1116. * writes, so the register should be all 0. If not, just
  1117. * skip the test.
  1118. */
  1119. if (read_pkey_reg())
  1120. return;
  1121. /*
  1122. * Just allocate an absurd about of memory rather than
  1123. * doing the XSAVE size enumeration dance.
  1124. */
  1125. buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  1126. /* These __builtins require compiling with -mxsave */
  1127. /* XSAVE to build a valid buffer: */
  1128. __builtin_ia32_xsave(buf, XSTATE_PKEY);
  1129. /* Clear XSTATE_BV[PKRU]: */
  1130. buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
  1131. /* XRSTOR will likely get PKRU back to the init state: */
  1132. __builtin_ia32_xrstor(buf, XSTATE_PKEY);
  1133. munmap(buf, 1*MB);
  1134. #endif
  1135. }
  1136. /*
  1137. * This is mostly useless on ppc for now. But it will not
  1138. * hurt anything and should give some better coverage as
  1139. * a long-running test that continually checks the pkey
  1140. * register.
  1141. */
  1142. void test_pkey_init_state(int *ptr, u16 pkey)
  1143. {
  1144. int err;
  1145. int allocated_pkeys[NR_PKEYS] = {0};
  1146. int nr_allocated_pkeys = 0;
  1147. int i;
  1148. for (i = 0; i < NR_PKEYS; i++) {
  1149. int new_pkey = alloc_pkey();
  1150. if (new_pkey < 0)
  1151. continue;
  1152. allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
  1153. }
  1154. dprintf3("%s()::%d\n", __func__, __LINE__);
  1155. arch_force_pkey_reg_init();
  1156. /*
  1157. * Loop for a bit, hoping to get exercise the kernel
  1158. * context switch code.
  1159. */
  1160. for (i = 0; i < 1000000; i++)
  1161. read_pkey_reg();
  1162. for (i = 0; i < nr_allocated_pkeys; i++) {
  1163. err = sys_pkey_free(allocated_pkeys[i]);
  1164. pkey_assert(!err);
  1165. read_pkey_reg(); /* for shadow checking */
  1166. }
  1167. }
  1168. /*
  1169. * pkey 0 is special. It is allocated by default, so you do not
  1170. * have to call pkey_alloc() to use it first. Make sure that it
  1171. * is usable.
  1172. */
  1173. void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
  1174. {
  1175. long size;
  1176. int prot;
  1177. assert(pkey_last_malloc_record);
  1178. size = pkey_last_malloc_record->size;
  1179. /*
  1180. * This is a bit of a hack. But mprotect() requires
  1181. * huge-page-aligned sizes when operating on hugetlbfs.
  1182. * So, make sure that we use something that's a multiple
  1183. * of a huge page when we can.
  1184. */
  1185. if (size >= HPAGE_SIZE)
  1186. size = HPAGE_SIZE;
  1187. prot = pkey_last_malloc_record->prot;
  1188. /* Use pkey 0 */
  1189. mprotect_pkey(ptr, size, prot, 0);
  1190. /* Make sure that we can set it back to the original pkey. */
  1191. mprotect_pkey(ptr, size, prot, pkey);
  1192. }
  1193. void test_ptrace_of_child(int *ptr, u16 pkey)
  1194. {
  1195. __attribute__((__unused__)) int peek_result;
  1196. pid_t child_pid;
  1197. void *ignored = 0;
  1198. long ret;
  1199. int status;
  1200. /*
  1201. * This is the "control" for our little expermient. Make sure
  1202. * we can always access it when ptracing.
  1203. */
  1204. int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
  1205. int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
  1206. /*
  1207. * Fork a child which is an exact copy of this process, of course.
  1208. * That means we can do all of our tests via ptrace() and then plain
  1209. * memory access and ensure they work differently.
  1210. */
  1211. child_pid = fork_lazy_child();
  1212. dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
  1213. ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
  1214. if (ret)
  1215. perror("attach");
  1216. dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
  1217. pkey_assert(ret != -1);
  1218. ret = waitpid(child_pid, &status, WUNTRACED);
  1219. if ((ret != child_pid) || !(WIFSTOPPED(status))) {
  1220. fprintf(stderr, "weird waitpid result %ld stat %x\n",
  1221. ret, status);
  1222. pkey_assert(0);
  1223. }
  1224. dprintf2("waitpid ret: %ld\n", ret);
  1225. dprintf2("waitpid status: %d\n", status);
  1226. pkey_access_deny(pkey);
  1227. pkey_write_deny(pkey);
  1228. /* Write access, untested for now:
  1229. ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
  1230. pkey_assert(ret != -1);
  1231. dprintf1("poke at %p: %ld\n", peek_at, ret);
  1232. */
  1233. /*
  1234. * Try to access the pkey-protected "ptr" via ptrace:
  1235. */
  1236. ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
  1237. /* expect it to work, without an error: */
  1238. pkey_assert(ret != -1);
  1239. /* Now access from the current task, and expect an exception: */
  1240. peek_result = read_ptr(ptr);
  1241. expected_pkey_fault(pkey);
  1242. /*
  1243. * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
  1244. */
  1245. ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
  1246. /* expect it to work, without an error: */
  1247. pkey_assert(ret != -1);
  1248. /* Now access from the current task, and expect NO exception: */
  1249. peek_result = read_ptr(plain_ptr);
  1250. do_not_expect_pkey_fault("read plain pointer after ptrace");
  1251. ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
  1252. pkey_assert(ret != -1);
  1253. ret = kill(child_pid, SIGKILL);
  1254. pkey_assert(ret != -1);
  1255. wait(&status);
  1256. free(plain_ptr_unaligned);
  1257. }
  1258. void *get_pointer_to_instructions(void)
  1259. {
  1260. void *p1;
  1261. p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
  1262. dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
  1263. /* lots_o_noops_around_write should be page-aligned already */
  1264. assert(p1 == &lots_o_noops_around_write);
  1265. /* Point 'p1' at the *second* page of the function: */
  1266. p1 += PAGE_SIZE;
  1267. /*
  1268. * Try to ensure we fault this in on next touch to ensure
  1269. * we get an instruction fault as opposed to a data one
  1270. */
  1271. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1272. return p1;
  1273. }
  1274. void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
  1275. {
  1276. void *p1;
  1277. int scratch;
  1278. int ptr_contents;
  1279. int ret;
  1280. p1 = get_pointer_to_instructions();
  1281. lots_o_noops_around_write(&scratch);
  1282. ptr_contents = read_ptr(p1);
  1283. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1284. ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
  1285. pkey_assert(!ret);
  1286. pkey_access_deny(pkey);
  1287. dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
  1288. /*
  1289. * Make sure this is an *instruction* fault
  1290. */
  1291. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1292. lots_o_noops_around_write(&scratch);
  1293. do_not_expect_pkey_fault("executing on PROT_EXEC memory");
  1294. expect_fault_on_read_execonly_key(p1, pkey);
  1295. }
  1296. void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
  1297. {
  1298. void *p1;
  1299. int scratch;
  1300. int ptr_contents;
  1301. int ret;
  1302. dprintf1("%s() start\n", __func__);
  1303. p1 = get_pointer_to_instructions();
  1304. lots_o_noops_around_write(&scratch);
  1305. ptr_contents = read_ptr(p1);
  1306. dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
  1307. /* Use a *normal* mprotect(), not mprotect_pkey(): */
  1308. ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
  1309. pkey_assert(!ret);
  1310. /*
  1311. * Reset the shadow, assuming that the above mprotect()
  1312. * correctly changed PKRU, but to an unknown value since
  1313. * the actual allocated pkey is unknown.
  1314. */
  1315. shadow_pkey_reg = __read_pkey_reg();
  1316. dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
  1317. /* Make sure this is an *instruction* fault */
  1318. madvise(p1, PAGE_SIZE, MADV_DONTNEED);
  1319. lots_o_noops_around_write(&scratch);
  1320. do_not_expect_pkey_fault("executing on PROT_EXEC memory");
  1321. expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
  1322. /*
  1323. * Put the memory back to non-PROT_EXEC. Should clear the
  1324. * exec-only pkey off the VMA and allow it to be readable
  1325. * again. Go to PROT_NONE first to check for a kernel bug
  1326. * that did not clear the pkey when doing PROT_NONE.
  1327. */
  1328. ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
  1329. pkey_assert(!ret);
  1330. ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
  1331. pkey_assert(!ret);
  1332. ptr_contents = read_ptr(p1);
  1333. do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
  1334. }
  1335. #if defined(__i386__) || defined(__x86_64__)
  1336. void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
  1337. {
  1338. u32 new_pkru;
  1339. pid_t child;
  1340. int status, ret;
  1341. int pkey_offset = pkey_reg_xstate_offset();
  1342. size_t xsave_size = cpu_max_xsave_size();
  1343. void *xsave;
  1344. u32 *pkey_register;
  1345. u64 *xstate_bv;
  1346. struct iovec iov;
  1347. new_pkru = ~read_pkey_reg();
  1348. /* Don't make PROT_EXEC mappings inaccessible */
  1349. new_pkru &= ~3;
  1350. child = fork();
  1351. pkey_assert(child >= 0);
  1352. dprintf3("[%d] fork() ret: %d\n", getpid(), child);
  1353. if (!child) {
  1354. ptrace(PTRACE_TRACEME, 0, 0, 0);
  1355. /* Stop and allow the tracer to modify PKRU directly */
  1356. raise(SIGSTOP);
  1357. /*
  1358. * need __read_pkey_reg() version so we do not do shadow_pkey_reg
  1359. * checking
  1360. */
  1361. if (__read_pkey_reg() != new_pkru)
  1362. exit(1);
  1363. /* Stop and allow the tracer to clear XSTATE_BV for PKRU */
  1364. raise(SIGSTOP);
  1365. if (__read_pkey_reg() != 0)
  1366. exit(1);
  1367. /* Stop and allow the tracer to examine PKRU */
  1368. raise(SIGSTOP);
  1369. exit(0);
  1370. }
  1371. pkey_assert(child == waitpid(child, &status, 0));
  1372. dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
  1373. pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
  1374. xsave = (void *)malloc(xsave_size);
  1375. pkey_assert(xsave > 0);
  1376. /* Modify the PKRU register directly */
  1377. iov.iov_base = xsave;
  1378. iov.iov_len = xsave_size;
  1379. ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1380. pkey_assert(ret == 0);
  1381. pkey_register = (u32 *)(xsave + pkey_offset);
  1382. pkey_assert(*pkey_register == read_pkey_reg());
  1383. *pkey_register = new_pkru;
  1384. ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1385. pkey_assert(ret == 0);
  1386. /* Test that the modification is visible in ptrace before any execution */
  1387. memset(xsave, 0xCC, xsave_size);
  1388. ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1389. pkey_assert(ret == 0);
  1390. pkey_assert(*pkey_register == new_pkru);
  1391. /* Execute the tracee */
  1392. ret = ptrace(PTRACE_CONT, child, 0, 0);
  1393. pkey_assert(ret == 0);
  1394. /* Test that the tracee saw the PKRU value change */
  1395. pkey_assert(child == waitpid(child, &status, 0));
  1396. dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
  1397. pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
  1398. /* Test that the modification is visible in ptrace after execution */
  1399. memset(xsave, 0xCC, xsave_size);
  1400. ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1401. pkey_assert(ret == 0);
  1402. pkey_assert(*pkey_register == new_pkru);
  1403. /* Clear the PKRU bit from XSTATE_BV */
  1404. xstate_bv = (u64 *)(xsave + 512);
  1405. *xstate_bv &= ~(1 << 9);
  1406. ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1407. pkey_assert(ret == 0);
  1408. /* Test that the modification is visible in ptrace before any execution */
  1409. memset(xsave, 0xCC, xsave_size);
  1410. ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1411. pkey_assert(ret == 0);
  1412. pkey_assert(*pkey_register == 0);
  1413. ret = ptrace(PTRACE_CONT, child, 0, 0);
  1414. pkey_assert(ret == 0);
  1415. /* Test that the tracee saw the PKRU value go to 0 */
  1416. pkey_assert(child == waitpid(child, &status, 0));
  1417. dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
  1418. pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
  1419. /* Test that the modification is visible in ptrace after execution */
  1420. memset(xsave, 0xCC, xsave_size);
  1421. ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
  1422. pkey_assert(ret == 0);
  1423. pkey_assert(*pkey_register == 0);
  1424. ret = ptrace(PTRACE_CONT, child, 0, 0);
  1425. pkey_assert(ret == 0);
  1426. pkey_assert(child == waitpid(child, &status, 0));
  1427. dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
  1428. pkey_assert(WIFEXITED(status));
  1429. pkey_assert(WEXITSTATUS(status) == 0);
  1430. free(xsave);
  1431. }
  1432. #endif
  1433. void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
  1434. {
  1435. int size = PAGE_SIZE;
  1436. int sret;
  1437. if (cpu_has_pkeys()) {
  1438. dprintf1("SKIP: %s: no CPU support\n", __func__);
  1439. return;
  1440. }
  1441. sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
  1442. pkey_assert(sret < 0);
  1443. }
  1444. void (*pkey_tests[])(int *ptr, u16 pkey) = {
  1445. test_read_of_write_disabled_region,
  1446. test_read_of_access_disabled_region,
  1447. test_read_of_access_disabled_region_with_page_already_mapped,
  1448. test_write_of_write_disabled_region,
  1449. test_write_of_write_disabled_region_with_page_already_mapped,
  1450. test_write_of_access_disabled_region,
  1451. test_write_of_access_disabled_region_with_page_already_mapped,
  1452. test_kernel_write_of_access_disabled_region,
  1453. test_kernel_write_of_write_disabled_region,
  1454. test_kernel_gup_of_access_disabled_region,
  1455. test_kernel_gup_write_to_write_disabled_region,
  1456. test_executing_on_unreadable_memory,
  1457. test_implicit_mprotect_exec_only_memory,
  1458. test_mprotect_with_pkey_0,
  1459. test_ptrace_of_child,
  1460. test_pkey_init_state,
  1461. test_pkey_syscalls_on_non_allocated_pkey,
  1462. test_pkey_syscalls_bad_args,
  1463. test_pkey_alloc_exhaust,
  1464. test_pkey_alloc_free_attach_pkey0,
  1465. #if defined(__i386__) || defined(__x86_64__)
  1466. test_ptrace_modifies_pkru,
  1467. #endif
  1468. };
  1469. void run_tests_once(void)
  1470. {
  1471. int *ptr;
  1472. int prot = PROT_READ|PROT_WRITE;
  1473. for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
  1474. int pkey;
  1475. int orig_pkey_faults = pkey_faults;
  1476. dprintf1("======================\n");
  1477. dprintf1("test %d preparing...\n", test_nr);
  1478. tracing_on();
  1479. pkey = alloc_random_pkey();
  1480. dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
  1481. ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
  1482. dprintf1("test %d starting...\n", test_nr);
  1483. pkey_tests[test_nr](ptr, pkey);
  1484. dprintf1("freeing test memory: %p\n", ptr);
  1485. free_pkey_malloc(ptr);
  1486. sys_pkey_free(pkey);
  1487. dprintf1("pkey_faults: %d\n", pkey_faults);
  1488. dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
  1489. tracing_off();
  1490. close_test_fds();
  1491. printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
  1492. dprintf1("======================\n\n");
  1493. }
  1494. iteration_nr++;
  1495. }
  1496. void pkey_setup_shadow(void)
  1497. {
  1498. shadow_pkey_reg = __read_pkey_reg();
  1499. }
  1500. int main(void)
  1501. {
  1502. int nr_iterations = 22;
  1503. int pkeys_supported = is_pkeys_supported();
  1504. srand((unsigned int)time(NULL));
  1505. setup_handlers();
  1506. printf("has pkeys: %d\n", pkeys_supported);
  1507. if (!pkeys_supported) {
  1508. int size = PAGE_SIZE;
  1509. int *ptr;
  1510. printf("running PKEY tests for unsupported CPU/OS\n");
  1511. ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
  1512. assert(ptr != (void *)-1);
  1513. test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
  1514. exit(0);
  1515. }
  1516. pkey_setup_shadow();
  1517. printf("startup pkey_reg: %016llx\n", read_pkey_reg());
  1518. setup_hugetlbfs();
  1519. while (nr_iterations-- > 0)
  1520. run_tests_once();
  1521. printf("done (all tests OK)\n");
  1522. return 0;
  1523. }