encl.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright(c) 2016-20 Intel Corporation. */
  3. #include <linux/lockdep.h>
  4. #include <linux/mm.h>
  5. #include <linux/mman.h>
  6. #include <linux/shmem_fs.h>
  7. #include <linux/suspend.h>
  8. #include <linux/sched/mm.h>
  9. #include <asm/sgx.h>
  10. #include "encl.h"
  11. #include "encls.h"
  12. #include "sgx.h"
  13. static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
  14. struct sgx_backing *backing);
  15. #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
  16. /*
  17. * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to
  18. * determine the page index associated with the first PCMD entry
  19. * within a PCMD page.
  20. */
  21. #define PCMD_FIRST_MASK GENMASK(4, 0)
  22. /**
  23. * reclaimer_writing_to_pcmd() - Query if any enclave page associated with
  24. * a PCMD page is in process of being reclaimed.
  25. * @encl: Enclave to which PCMD page belongs
  26. * @start_addr: Address of enclave page using first entry within the PCMD page
  27. *
  28. * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is
  29. * stored. The PCMD data of a reclaimed enclave page contains enough
  30. * information for the processor to verify the page at the time
  31. * it is loaded back into the Enclave Page Cache (EPC).
  32. *
  33. * The backing storage to which enclave pages are reclaimed is laid out as
  34. * follows:
  35. * Encrypted enclave pages:SECS page:PCMD pages
  36. *
  37. * Each PCMD page contains the PCMD metadata of
  38. * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.
  39. *
  40. * A PCMD page can only be truncated if it is (a) empty, and (b) not in the
  41. * process of getting data (and thus soon being non-empty). (b) is tested with
  42. * a check if an enclave page sharing the PCMD page is in the process of being
  43. * reclaimed.
  44. *
  45. * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it
  46. * intends to reclaim that enclave page - it means that the PCMD page
  47. * associated with that enclave page is about to get some data and thus
  48. * even if the PCMD page is empty, it should not be truncated.
  49. *
  50. * Context: Enclave mutex (&sgx_encl->lock) must be held.
  51. * Return: 1 if the reclaimer is about to write to the PCMD page
  52. * 0 if the reclaimer has no intention to write to the PCMD page
  53. */
  54. static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,
  55. unsigned long start_addr)
  56. {
  57. int reclaimed = 0;
  58. int i;
  59. /*
  60. * PCMD_FIRST_MASK is based on number of PCMD entries within
  61. * PCMD page being 32.
  62. */
  63. BUILD_BUG_ON(PCMDS_PER_PAGE != 32);
  64. for (i = 0; i < PCMDS_PER_PAGE; i++) {
  65. struct sgx_encl_page *entry;
  66. unsigned long addr;
  67. addr = start_addr + i * PAGE_SIZE;
  68. /*
  69. * Stop when reaching the SECS page - it does not
  70. * have a page_array entry and its reclaim is
  71. * started and completed with enclave mutex held so
  72. * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED
  73. * flag.
  74. */
  75. if (addr == encl->base + encl->size)
  76. break;
  77. entry = xa_load(&encl->page_array, PFN_DOWN(addr));
  78. if (!entry)
  79. continue;
  80. /*
  81. * VA page slot ID uses same bit as the flag so it is important
  82. * to ensure that the page is not already in backing store.
  83. */
  84. if (entry->epc_page &&
  85. (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
  86. reclaimed = 1;
  87. break;
  88. }
  89. }
  90. return reclaimed;
  91. }
  92. /*
  93. * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's
  94. * follow right after the EPC data in the backing storage. In addition to the
  95. * visible enclave pages, there's one extra page slot for SECS, before PCMD
  96. * structs.
  97. */
  98. static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,
  99. unsigned long page_index)
  100. {
  101. pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);
  102. return epc_end_off + page_index * sizeof(struct sgx_pcmd);
  103. }
  104. /*
  105. * Free a page from the backing storage in the given page index.
  106. */
  107. static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)
  108. {
  109. struct inode *inode = file_inode(encl->backing);
  110. shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
  111. }
  112. /*
  113. * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC
  114. * Pages" in the SDM.
  115. */
  116. static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
  117. struct sgx_epc_page *epc_page,
  118. struct sgx_epc_page *secs_page)
  119. {
  120. unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
  121. struct sgx_encl *encl = encl_page->encl;
  122. pgoff_t page_index, page_pcmd_off;
  123. unsigned long pcmd_first_page;
  124. struct sgx_pageinfo pginfo;
  125. struct sgx_backing b;
  126. bool pcmd_page_empty;
  127. u8 *pcmd_page;
  128. int ret;
  129. if (secs_page)
  130. page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
  131. else
  132. page_index = PFN_DOWN(encl->size);
  133. /*
  134. * Address of enclave page using the first entry within the PCMD page.
  135. */
  136. pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;
  137. page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
  138. ret = sgx_encl_lookup_backing(encl, page_index, &b);
  139. if (ret)
  140. return ret;
  141. pginfo.addr = encl_page->desc & PAGE_MASK;
  142. pginfo.contents = (unsigned long)kmap_atomic(b.contents);
  143. pcmd_page = kmap_atomic(b.pcmd);
  144. pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
  145. if (secs_page)
  146. pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
  147. else
  148. pginfo.secs = 0;
  149. ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page),
  150. sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset);
  151. if (ret) {
  152. if (encls_failed(ret))
  153. ENCLS_WARN(ret, "ELDU");
  154. ret = -EFAULT;
  155. }
  156. memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
  157. set_page_dirty(b.pcmd);
  158. /*
  159. * The area for the PCMD in the page was zeroed above. Check if the
  160. * whole page is now empty meaning that all PCMD's have been zeroed:
  161. */
  162. pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
  163. kunmap_atomic(pcmd_page);
  164. kunmap_atomic((void *)(unsigned long)pginfo.contents);
  165. get_page(b.pcmd);
  166. sgx_encl_put_backing(&b);
  167. sgx_encl_truncate_backing_page(encl, page_index);
  168. if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
  169. sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
  170. pcmd_page = kmap_atomic(b.pcmd);
  171. if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
  172. pr_warn("PCMD page not empty after truncate.\n");
  173. kunmap_atomic(pcmd_page);
  174. }
  175. put_page(b.pcmd);
  176. return ret;
  177. }
  178. static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
  179. struct sgx_epc_page *secs_page)
  180. {
  181. unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
  182. struct sgx_encl *encl = encl_page->encl;
  183. struct sgx_epc_page *epc_page;
  184. int ret;
  185. epc_page = sgx_alloc_epc_page(encl_page, false);
  186. if (IS_ERR(epc_page))
  187. return epc_page;
  188. ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
  189. if (ret) {
  190. sgx_encl_free_epc_page(epc_page);
  191. return ERR_PTR(ret);
  192. }
  193. sgx_free_va_slot(encl_page->va_page, va_offset);
  194. list_move(&encl_page->va_page->list, &encl->va_pages);
  195. encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
  196. encl_page->epc_page = epc_page;
  197. return epc_page;
  198. }
  199. /*
  200. * Ensure the SECS page is not swapped out. Must be called with encl->lock
  201. * to protect the enclave states including SECS and ensure the SECS page is
  202. * not swapped out again while being used.
  203. */
  204. static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
  205. {
  206. struct sgx_epc_page *epc_page = encl->secs.epc_page;
  207. if (!epc_page)
  208. epc_page = sgx_encl_eldu(&encl->secs, NULL);
  209. return epc_page;
  210. }
  211. static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
  212. struct sgx_encl_page *entry)
  213. {
  214. struct sgx_epc_page *epc_page;
  215. /* Entry successfully located. */
  216. if (entry->epc_page) {
  217. if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
  218. return ERR_PTR(-EBUSY);
  219. return entry;
  220. }
  221. epc_page = sgx_encl_load_secs(encl);
  222. if (IS_ERR(epc_page))
  223. return ERR_CAST(epc_page);
  224. epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
  225. if (IS_ERR(epc_page))
  226. return ERR_CAST(epc_page);
  227. encl->secs_child_cnt++;
  228. sgx_mark_page_reclaimable(entry->epc_page);
  229. return entry;
  230. }
  231. static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
  232. unsigned long addr,
  233. unsigned long vm_flags)
  234. {
  235. unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
  236. struct sgx_encl_page *entry;
  237. entry = xa_load(&encl->page_array, PFN_DOWN(addr));
  238. if (!entry)
  239. return ERR_PTR(-EFAULT);
  240. /*
  241. * Verify that the page has equal or higher build time
  242. * permissions than the VMA permissions (i.e. the subset of {VM_READ,
  243. * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
  244. */
  245. if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
  246. return ERR_PTR(-EFAULT);
  247. return __sgx_encl_load_page(encl, entry);
  248. }
  249. struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
  250. unsigned long addr)
  251. {
  252. struct sgx_encl_page *entry;
  253. entry = xa_load(&encl->page_array, PFN_DOWN(addr));
  254. if (!entry)
  255. return ERR_PTR(-EFAULT);
  256. return __sgx_encl_load_page(encl, entry);
  257. }
  258. /**
  259. * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
  260. * @vma: VMA obtained from fault info from where page is accessed
  261. * @encl: enclave accessing the page
  262. * @addr: address that triggered the page fault
  263. *
  264. * When an initialized enclave accesses a page with no backing EPC page
  265. * on a SGX2 system then the EPC can be added dynamically via the SGX2
  266. * ENCLS[EAUG] instruction.
  267. *
  268. * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
  269. * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
  270. */
  271. static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
  272. struct sgx_encl *encl, unsigned long addr)
  273. {
  274. vm_fault_t vmret = VM_FAULT_SIGBUS;
  275. struct sgx_pageinfo pginfo = {0};
  276. struct sgx_encl_page *encl_page;
  277. struct sgx_epc_page *epc_page;
  278. struct sgx_va_page *va_page;
  279. unsigned long phys_addr;
  280. u64 secinfo_flags;
  281. int ret;
  282. if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
  283. return VM_FAULT_SIGBUS;
  284. /*
  285. * Ignore internal permission checking for dynamically added pages.
  286. * They matter only for data added during the pre-initialization
  287. * phase. The enclave decides the permissions by the means of
  288. * EACCEPT, EACCEPTCOPY and EMODPE.
  289. */
  290. secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
  291. encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
  292. if (IS_ERR(encl_page))
  293. return VM_FAULT_OOM;
  294. mutex_lock(&encl->lock);
  295. epc_page = sgx_encl_load_secs(encl);
  296. if (IS_ERR(epc_page)) {
  297. if (PTR_ERR(epc_page) == -EBUSY)
  298. vmret = VM_FAULT_NOPAGE;
  299. goto err_out_unlock;
  300. }
  301. epc_page = sgx_alloc_epc_page(encl_page, false);
  302. if (IS_ERR(epc_page)) {
  303. if (PTR_ERR(epc_page) == -EBUSY)
  304. vmret = VM_FAULT_NOPAGE;
  305. goto err_out_unlock;
  306. }
  307. va_page = sgx_encl_grow(encl, false);
  308. if (IS_ERR(va_page)) {
  309. if (PTR_ERR(va_page) == -EBUSY)
  310. vmret = VM_FAULT_NOPAGE;
  311. goto err_out_epc;
  312. }
  313. if (va_page)
  314. list_add(&va_page->list, &encl->va_pages);
  315. ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
  316. encl_page, GFP_KERNEL);
  317. /*
  318. * If ret == -EBUSY then page was created in another flow while
  319. * running without encl->lock
  320. */
  321. if (ret)
  322. goto err_out_shrink;
  323. pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
  324. pginfo.addr = encl_page->desc & PAGE_MASK;
  325. pginfo.metadata = 0;
  326. ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
  327. if (ret)
  328. goto err_out;
  329. encl_page->encl = encl;
  330. encl_page->epc_page = epc_page;
  331. encl_page->type = SGX_PAGE_TYPE_REG;
  332. encl->secs_child_cnt++;
  333. sgx_mark_page_reclaimable(encl_page->epc_page);
  334. phys_addr = sgx_get_epc_phys_addr(epc_page);
  335. /*
  336. * Do not undo everything when creating PTE entry fails - next #PF
  337. * would find page ready for a PTE.
  338. */
  339. vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
  340. if (vmret != VM_FAULT_NOPAGE) {
  341. mutex_unlock(&encl->lock);
  342. return VM_FAULT_SIGBUS;
  343. }
  344. mutex_unlock(&encl->lock);
  345. return VM_FAULT_NOPAGE;
  346. err_out:
  347. xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
  348. err_out_shrink:
  349. sgx_encl_shrink(encl, va_page);
  350. err_out_epc:
  351. sgx_encl_free_epc_page(epc_page);
  352. err_out_unlock:
  353. mutex_unlock(&encl->lock);
  354. kfree(encl_page);
  355. return vmret;
  356. }
  357. static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
  358. {
  359. unsigned long addr = (unsigned long)vmf->address;
  360. struct vm_area_struct *vma = vmf->vma;
  361. struct sgx_encl_page *entry;
  362. unsigned long phys_addr;
  363. struct sgx_encl *encl;
  364. vm_fault_t ret;
  365. encl = vma->vm_private_data;
  366. /*
  367. * It's very unlikely but possible that allocating memory for the
  368. * mm_list entry of a forked process failed in sgx_vma_open(). When
  369. * this happens, vm_private_data is set to NULL.
  370. */
  371. if (unlikely(!encl))
  372. return VM_FAULT_SIGBUS;
  373. /*
  374. * The page_array keeps track of all enclave pages, whether they
  375. * are swapped out or not. If there is no entry for this page and
  376. * the system supports SGX2 then it is possible to dynamically add
  377. * a new enclave page. This is only possible for an initialized
  378. * enclave that will be checked for right away.
  379. */
  380. if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
  381. (!xa_load(&encl->page_array, PFN_DOWN(addr))))
  382. return sgx_encl_eaug_page(vma, encl, addr);
  383. mutex_lock(&encl->lock);
  384. entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
  385. if (IS_ERR(entry)) {
  386. mutex_unlock(&encl->lock);
  387. if (PTR_ERR(entry) == -EBUSY)
  388. return VM_FAULT_NOPAGE;
  389. return VM_FAULT_SIGBUS;
  390. }
  391. phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
  392. ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
  393. if (ret != VM_FAULT_NOPAGE) {
  394. mutex_unlock(&encl->lock);
  395. return VM_FAULT_SIGBUS;
  396. }
  397. sgx_encl_test_and_clear_young(vma->vm_mm, entry);
  398. mutex_unlock(&encl->lock);
  399. return VM_FAULT_NOPAGE;
  400. }
  401. static void sgx_vma_open(struct vm_area_struct *vma)
  402. {
  403. struct sgx_encl *encl = vma->vm_private_data;
  404. /*
  405. * It's possible but unlikely that vm_private_data is NULL. This can
  406. * happen in a grandchild of a process, when sgx_encl_mm_add() had
  407. * failed to allocate memory in this callback.
  408. */
  409. if (unlikely(!encl))
  410. return;
  411. if (sgx_encl_mm_add(encl, vma->vm_mm))
  412. vma->vm_private_data = NULL;
  413. }
  414. /**
  415. * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
  416. * @encl: an enclave pointer
  417. * @start: lower bound of the address range, inclusive
  418. * @end: upper bound of the address range, exclusive
  419. * @vm_flags: VMA flags
  420. *
  421. * Iterate through the enclave pages contained within [@start, @end) to verify
  422. * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
  423. * do not contain any permissions that are not contained in the build time
  424. * permissions of any of the enclave pages within the given address range.
  425. *
  426. * An enclave creator must declare the strongest permissions that will be
  427. * needed for each enclave page. This ensures that mappings have the identical
  428. * or weaker permissions than the earlier declared permissions.
  429. *
  430. * Return: 0 on success, -EACCES otherwise
  431. */
  432. int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
  433. unsigned long end, unsigned long vm_flags)
  434. {
  435. unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
  436. struct sgx_encl_page *page;
  437. unsigned long count = 0;
  438. int ret = 0;
  439. XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
  440. /* Disallow mapping outside enclave's address range. */
  441. if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
  442. (start < encl->base || end > encl->base + encl->size))
  443. return -EACCES;
  444. /*
  445. * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
  446. * conflict with the enclave page permissions.
  447. */
  448. if (current->personality & READ_IMPLIES_EXEC)
  449. return -EACCES;
  450. mutex_lock(&encl->lock);
  451. xas_lock(&xas);
  452. xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
  453. if (~page->vm_max_prot_bits & vm_prot_bits) {
  454. ret = -EACCES;
  455. break;
  456. }
  457. /* Reschedule on every XA_CHECK_SCHED iteration. */
  458. if (!(++count % XA_CHECK_SCHED)) {
  459. xas_pause(&xas);
  460. xas_unlock(&xas);
  461. mutex_unlock(&encl->lock);
  462. cond_resched();
  463. mutex_lock(&encl->lock);
  464. xas_lock(&xas);
  465. }
  466. }
  467. xas_unlock(&xas);
  468. mutex_unlock(&encl->lock);
  469. return ret;
  470. }
  471. static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,
  472. unsigned long end, unsigned long newflags)
  473. {
  474. return sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
  475. }
  476. static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,
  477. unsigned long addr, void *data)
  478. {
  479. unsigned long offset = addr & ~PAGE_MASK;
  480. int ret;
  481. ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
  482. if (ret)
  483. return -EIO;
  484. return 0;
  485. }
  486. static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,
  487. unsigned long addr, void *data)
  488. {
  489. unsigned long offset = addr & ~PAGE_MASK;
  490. int ret;
  491. ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
  492. if (ret)
  493. return -EIO;
  494. return 0;
  495. }
  496. /*
  497. * Load an enclave page to EPC if required, and take encl->lock.
  498. */
  499. static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
  500. unsigned long addr,
  501. unsigned long vm_flags)
  502. {
  503. struct sgx_encl_page *entry;
  504. for ( ; ; ) {
  505. mutex_lock(&encl->lock);
  506. entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
  507. if (PTR_ERR(entry) != -EBUSY)
  508. break;
  509. mutex_unlock(&encl->lock);
  510. }
  511. if (IS_ERR(entry))
  512. mutex_unlock(&encl->lock);
  513. return entry;
  514. }
  515. static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
  516. void *buf, int len, int write)
  517. {
  518. struct sgx_encl *encl = vma->vm_private_data;
  519. struct sgx_encl_page *entry = NULL;
  520. char data[sizeof(unsigned long)];
  521. unsigned long align;
  522. int offset;
  523. int cnt;
  524. int ret = 0;
  525. int i;
  526. /*
  527. * If process was forked, VMA is still there but vm_private_data is set
  528. * to NULL.
  529. */
  530. if (!encl)
  531. return -EFAULT;
  532. if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
  533. return -EFAULT;
  534. for (i = 0; i < len; i += cnt) {
  535. entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK,
  536. vma->vm_flags);
  537. if (IS_ERR(entry)) {
  538. ret = PTR_ERR(entry);
  539. break;
  540. }
  541. align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
  542. offset = (addr + i) & (sizeof(unsigned long) - 1);
  543. cnt = sizeof(unsigned long) - offset;
  544. cnt = min(cnt, len - i);
  545. ret = sgx_encl_debug_read(encl, entry, align, data);
  546. if (ret)
  547. goto out;
  548. if (write) {
  549. memcpy(data + offset, buf + i, cnt);
  550. ret = sgx_encl_debug_write(encl, entry, align, data);
  551. if (ret)
  552. goto out;
  553. } else {
  554. memcpy(buf + i, data + offset, cnt);
  555. }
  556. out:
  557. mutex_unlock(&encl->lock);
  558. if (ret)
  559. break;
  560. }
  561. return ret < 0 ? ret : i;
  562. }
  563. const struct vm_operations_struct sgx_vm_ops = {
  564. .fault = sgx_vma_fault,
  565. .mprotect = sgx_vma_mprotect,
  566. .open = sgx_vma_open,
  567. .access = sgx_vma_access,
  568. };
  569. /**
  570. * sgx_encl_release - Destroy an enclave instance
  571. * @ref: address of a kref inside &sgx_encl
  572. *
  573. * Used together with kref_put(). Frees all the resources associated with the
  574. * enclave and the instance itself.
  575. */
  576. void sgx_encl_release(struct kref *ref)
  577. {
  578. struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
  579. unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
  580. struct sgx_va_page *va_page;
  581. struct sgx_encl_page *entry;
  582. unsigned long count = 0;
  583. XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
  584. xas_lock(&xas);
  585. xas_for_each(&xas, entry, max_page_index) {
  586. if (entry->epc_page) {
  587. /*
  588. * The page and its radix tree entry cannot be freed
  589. * if the page is being held by the reclaimer.
  590. */
  591. if (sgx_unmark_page_reclaimable(entry->epc_page))
  592. continue;
  593. sgx_encl_free_epc_page(entry->epc_page);
  594. encl->secs_child_cnt--;
  595. entry->epc_page = NULL;
  596. }
  597. kfree(entry);
  598. /*
  599. * Invoke scheduler on every XA_CHECK_SCHED iteration
  600. * to prevent soft lockups.
  601. */
  602. if (!(++count % XA_CHECK_SCHED)) {
  603. xas_pause(&xas);
  604. xas_unlock(&xas);
  605. cond_resched();
  606. xas_lock(&xas);
  607. }
  608. }
  609. xas_unlock(&xas);
  610. xa_destroy(&encl->page_array);
  611. if (!encl->secs_child_cnt && encl->secs.epc_page) {
  612. sgx_encl_free_epc_page(encl->secs.epc_page);
  613. encl->secs.epc_page = NULL;
  614. }
  615. while (!list_empty(&encl->va_pages)) {
  616. va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
  617. list);
  618. list_del(&va_page->list);
  619. sgx_encl_free_epc_page(va_page->epc_page);
  620. kfree(va_page);
  621. }
  622. if (encl->backing)
  623. fput(encl->backing);
  624. cleanup_srcu_struct(&encl->srcu);
  625. WARN_ON_ONCE(!list_empty(&encl->mm_list));
  626. /* Detect EPC page leak's. */
  627. WARN_ON_ONCE(encl->secs_child_cnt);
  628. WARN_ON_ONCE(encl->secs.epc_page);
  629. kfree(encl);
  630. }
  631. /*
  632. * 'mm' is exiting and no longer needs mmu notifications.
  633. */
  634. static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
  635. struct mm_struct *mm)
  636. {
  637. struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
  638. struct sgx_encl_mm *tmp = NULL;
  639. /*
  640. * The enclave itself can remove encl_mm. Note, objects can't be moved
  641. * off an RCU protected list, but deletion is ok.
  642. */
  643. spin_lock(&encl_mm->encl->mm_lock);
  644. list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
  645. if (tmp == encl_mm) {
  646. list_del_rcu(&encl_mm->list);
  647. break;
  648. }
  649. }
  650. spin_unlock(&encl_mm->encl->mm_lock);
  651. if (tmp == encl_mm) {
  652. synchronize_srcu(&encl_mm->encl->srcu);
  653. mmu_notifier_put(mn);
  654. }
  655. }
  656. static void sgx_mmu_notifier_free(struct mmu_notifier *mn)
  657. {
  658. struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
  659. /* 'encl_mm' is going away, put encl_mm->encl reference: */
  660. kref_put(&encl_mm->encl->refcount, sgx_encl_release);
  661. kfree(encl_mm);
  662. }
  663. static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
  664. .release = sgx_mmu_notifier_release,
  665. .free_notifier = sgx_mmu_notifier_free,
  666. };
  667. static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
  668. struct mm_struct *mm)
  669. {
  670. struct sgx_encl_mm *encl_mm = NULL;
  671. struct sgx_encl_mm *tmp;
  672. int idx;
  673. idx = srcu_read_lock(&encl->srcu);
  674. list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
  675. if (tmp->mm == mm) {
  676. encl_mm = tmp;
  677. break;
  678. }
  679. }
  680. srcu_read_unlock(&encl->srcu, idx);
  681. return encl_mm;
  682. }
  683. int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
  684. {
  685. struct sgx_encl_mm *encl_mm;
  686. int ret;
  687. /*
  688. * Even though a single enclave may be mapped into an mm more than once,
  689. * each 'mm' only appears once on encl->mm_list. This is guaranteed by
  690. * holding the mm's mmap lock for write before an mm can be added or
  691. * remove to an encl->mm_list.
  692. */
  693. mmap_assert_write_locked(mm);
  694. /*
  695. * It's possible that an entry already exists in the mm_list, because it
  696. * is removed only on VFS release or process exit.
  697. */
  698. if (sgx_encl_find_mm(encl, mm))
  699. return 0;
  700. encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
  701. if (!encl_mm)
  702. return -ENOMEM;
  703. /* Grab a refcount for the encl_mm->encl reference: */
  704. kref_get(&encl->refcount);
  705. encl_mm->encl = encl;
  706. encl_mm->mm = mm;
  707. encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
  708. ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
  709. if (ret) {
  710. kfree(encl_mm);
  711. return ret;
  712. }
  713. spin_lock(&encl->mm_lock);
  714. list_add_rcu(&encl_mm->list, &encl->mm_list);
  715. /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
  716. smp_wmb();
  717. encl->mm_list_version++;
  718. spin_unlock(&encl->mm_lock);
  719. return 0;
  720. }
  721. /**
  722. * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
  723. * @encl: the enclave
  724. *
  725. * Some SGX functions require that no cached linear-to-physical address
  726. * mappings are present before they can succeed. For example, ENCLS[EWB]
  727. * copies a page from the enclave page cache to regular main memory but
  728. * it fails if it cannot ensure that there are no cached
  729. * linear-to-physical address mappings referring to the page.
  730. *
  731. * SGX hardware flushes all cached linear-to-physical mappings on a CPU
  732. * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
  733. * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
  734. * address mappings are cleared but coordination with the tracking done within
  735. * the SGX hardware is needed to support the SGX functions that depend on this
  736. * cache clearing.
  737. *
  738. * When the ENCLS[ETRACK] function is issued on an enclave the hardware
  739. * tracks threads operating inside the enclave at that time. The SGX
  740. * hardware tracking require that all the identified threads must have
  741. * exited the enclave in order to flush the mappings before a function such
  742. * as ENCLS[EWB] will be permitted
  743. *
  744. * The following flow is used to support SGX functions that require that
  745. * no cached linear-to-physical address mappings are present:
  746. * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
  747. * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
  748. * accessing the enclave.
  749. * 3) Send IPI to identified CPUs, kicking them out of the enclave and
  750. * thus flushing all locally cached linear-to-physical address mappings.
  751. * 4) Execute SGX function.
  752. *
  753. * Context: It is required to call this function after ENCLS[ETRACK].
  754. * This will ensure that if any new mm appears (racing with
  755. * sgx_encl_mm_add()) then the new mm will enter into the
  756. * enclave with fresh linear-to-physical address mappings.
  757. *
  758. * It is required that all IPIs are completed before a new
  759. * ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
  760. * of the above flow with the enclave's mutex.
  761. *
  762. * Return: cpumask of CPUs that might be accessing @encl
  763. */
  764. const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
  765. {
  766. cpumask_t *cpumask = &encl->cpumask;
  767. struct sgx_encl_mm *encl_mm;
  768. int idx;
  769. cpumask_clear(cpumask);
  770. idx = srcu_read_lock(&encl->srcu);
  771. list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
  772. if (!mmget_not_zero(encl_mm->mm))
  773. continue;
  774. cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
  775. mmput_async(encl_mm->mm);
  776. }
  777. srcu_read_unlock(&encl->srcu, idx);
  778. return cpumask;
  779. }
  780. static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
  781. pgoff_t index)
  782. {
  783. struct address_space *mapping = encl->backing->f_mapping;
  784. gfp_t gfpmask = mapping_gfp_mask(mapping);
  785. return shmem_read_mapping_page_gfp(mapping, index, gfpmask);
  786. }
  787. /**
  788. * __sgx_encl_get_backing() - Pin the backing storage
  789. * @encl: an enclave pointer
  790. * @page_index: enclave page index
  791. * @backing: data for accessing backing storage for the page
  792. *
  793. * Pin the backing storage pages for storing the encrypted contents and Paging
  794. * Crypto MetaData (PCMD) of an enclave page.
  795. *
  796. * Return:
  797. * 0 on success,
  798. * -errno otherwise.
  799. */
  800. static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
  801. struct sgx_backing *backing)
  802. {
  803. pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
  804. struct page *contents;
  805. struct page *pcmd;
  806. contents = sgx_encl_get_backing_page(encl, page_index);
  807. if (IS_ERR(contents))
  808. return PTR_ERR(contents);
  809. pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
  810. if (IS_ERR(pcmd)) {
  811. put_page(contents);
  812. return PTR_ERR(pcmd);
  813. }
  814. backing->contents = contents;
  815. backing->pcmd = pcmd;
  816. backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
  817. return 0;
  818. }
  819. /*
  820. * When called from ksgxd, returns the mem_cgroup of a struct mm stored
  821. * in the enclave's mm_list. When not called from ksgxd, just returns
  822. * the mem_cgroup of the current task.
  823. */
  824. static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
  825. {
  826. struct mem_cgroup *memcg = NULL;
  827. struct sgx_encl_mm *encl_mm;
  828. int idx;
  829. /*
  830. * If called from normal task context, return the mem_cgroup
  831. * of the current task's mm. The remainder of the handling is for
  832. * ksgxd.
  833. */
  834. if (!current_is_ksgxd())
  835. return get_mem_cgroup_from_mm(current->mm);
  836. /*
  837. * Search the enclave's mm_list to find an mm associated with
  838. * this enclave to charge the allocation to.
  839. */
  840. idx = srcu_read_lock(&encl->srcu);
  841. list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
  842. if (!mmget_not_zero(encl_mm->mm))
  843. continue;
  844. memcg = get_mem_cgroup_from_mm(encl_mm->mm);
  845. mmput_async(encl_mm->mm);
  846. break;
  847. }
  848. srcu_read_unlock(&encl->srcu, idx);
  849. /*
  850. * In the rare case that there isn't an mm associated with
  851. * the enclave, set memcg to the current active mem_cgroup.
  852. * This will be the root mem_cgroup if there is no active
  853. * mem_cgroup.
  854. */
  855. if (!memcg)
  856. return get_mem_cgroup_from_mm(NULL);
  857. return memcg;
  858. }
  859. /**
  860. * sgx_encl_alloc_backing() - create a new backing storage page
  861. * @encl: an enclave pointer
  862. * @page_index: enclave page index
  863. * @backing: data for accessing backing storage for the page
  864. *
  865. * When called from ksgxd, sets the active memcg from one of the
  866. * mms in the enclave's mm_list prior to any backing page allocation,
  867. * in order to ensure that shmem page allocations are charged to the
  868. * enclave. Create a backing page for loading data back into an EPC page with
  869. * ELDU. This function takes a reference on a new backing page which
  870. * must be dropped with a corresponding call to sgx_encl_put_backing().
  871. *
  872. * Return:
  873. * 0 on success,
  874. * -errno otherwise.
  875. */
  876. int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
  877. struct sgx_backing *backing)
  878. {
  879. struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
  880. struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
  881. int ret;
  882. ret = __sgx_encl_get_backing(encl, page_index, backing);
  883. set_active_memcg(memcg);
  884. mem_cgroup_put(encl_memcg);
  885. return ret;
  886. }
  887. /**
  888. * sgx_encl_lookup_backing() - retrieve an existing backing storage page
  889. * @encl: an enclave pointer
  890. * @page_index: enclave page index
  891. * @backing: data for accessing backing storage for the page
  892. *
  893. * Retrieve a backing page for loading data back into an EPC page with ELDU.
  894. * It is the caller's responsibility to ensure that it is appropriate to use
  895. * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
  896. * not used correctly, this will cause an allocation which is not accounted for.
  897. * This function takes a reference on an existing backing page which must be
  898. * dropped with a corresponding call to sgx_encl_put_backing().
  899. *
  900. * Return:
  901. * 0 on success,
  902. * -errno otherwise.
  903. */
  904. static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
  905. struct sgx_backing *backing)
  906. {
  907. return __sgx_encl_get_backing(encl, page_index, backing);
  908. }
  909. /**
  910. * sgx_encl_put_backing() - Unpin the backing storage
  911. * @backing: data for accessing backing storage for the page
  912. */
  913. void sgx_encl_put_backing(struct sgx_backing *backing)
  914. {
  915. put_page(backing->pcmd);
  916. put_page(backing->contents);
  917. }
  918. static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,
  919. void *data)
  920. {
  921. pte_t pte;
  922. int ret;
  923. ret = pte_young(*ptep);
  924. if (ret) {
  925. pte = pte_mkold(*ptep);
  926. set_pte_at((struct mm_struct *)data, addr, ptep, pte);
  927. }
  928. return ret;
  929. }
  930. /**
  931. * sgx_encl_test_and_clear_young() - Test and reset the accessed bit
  932. * @mm: mm_struct that is checked
  933. * @page: enclave page to be tested for recent access
  934. *
  935. * Checks the Access (A) bit from the PTE corresponding to the enclave page and
  936. * clears it.
  937. *
  938. * Return: 1 if the page has been recently accessed and 0 if not.
  939. */
  940. int sgx_encl_test_and_clear_young(struct mm_struct *mm,
  941. struct sgx_encl_page *page)
  942. {
  943. unsigned long addr = page->desc & PAGE_MASK;
  944. struct sgx_encl *encl = page->encl;
  945. struct vm_area_struct *vma;
  946. int ret;
  947. ret = sgx_encl_find(mm, addr, &vma);
  948. if (ret)
  949. return 0;
  950. if (encl != vma->vm_private_data)
  951. return 0;
  952. ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE,
  953. sgx_encl_test_and_clear_young_cb, vma->vm_mm);
  954. if (ret < 0)
  955. return 0;
  956. return ret;
  957. }
  958. struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
  959. unsigned long offset,
  960. u64 secinfo_flags)
  961. {
  962. struct sgx_encl_page *encl_page;
  963. unsigned long prot;
  964. encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
  965. if (!encl_page)
  966. return ERR_PTR(-ENOMEM);
  967. encl_page->desc = encl->base + offset;
  968. encl_page->encl = encl;
  969. prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
  970. _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
  971. _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
  972. /*
  973. * TCS pages must always RW set for CPU access while the SECINFO
  974. * permissions are *always* zero - the CPU ignores the user provided
  975. * values and silently overwrites them with zero permissions.
  976. */
  977. if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
  978. prot |= PROT_READ | PROT_WRITE;
  979. /* Calculate maximum of the VM flags for the page. */
  980. encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
  981. return encl_page;
  982. }
  983. /**
  984. * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
  985. * @encl: the enclave
  986. * @addr: page aligned pointer to single page for which PTEs will be removed
  987. *
  988. * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
  989. * @addr from each VMA. Ensure that page fault handler is ready to handle
  990. * new mappings of @addr before calling this function.
  991. */
  992. void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
  993. {
  994. unsigned long mm_list_version;
  995. struct sgx_encl_mm *encl_mm;
  996. struct vm_area_struct *vma;
  997. int idx, ret;
  998. do {
  999. mm_list_version = encl->mm_list_version;
  1000. /* Pairs with smp_wmb() in sgx_encl_mm_add(). */
  1001. smp_rmb();
  1002. idx = srcu_read_lock(&encl->srcu);
  1003. list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
  1004. if (!mmget_not_zero(encl_mm->mm))
  1005. continue;
  1006. mmap_read_lock(encl_mm->mm);
  1007. ret = sgx_encl_find(encl_mm->mm, addr, &vma);
  1008. if (!ret && encl == vma->vm_private_data)
  1009. zap_vma_ptes(vma, addr, PAGE_SIZE);
  1010. mmap_read_unlock(encl_mm->mm);
  1011. mmput_async(encl_mm->mm);
  1012. }
  1013. srcu_read_unlock(&encl->srcu, idx);
  1014. } while (unlikely(encl->mm_list_version != mm_list_version));
  1015. }
  1016. /**
  1017. * sgx_alloc_va_page() - Allocate a Version Array (VA) page
  1018. * @reclaim: Reclaim EPC pages directly if none available. Enclave
  1019. * mutex should not be held if this is set.
  1020. *
  1021. * Allocate a free EPC page and convert it to a Version Array (VA) page.
  1022. *
  1023. * Return:
  1024. * a VA page,
  1025. * -errno otherwise
  1026. */
  1027. struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
  1028. {
  1029. struct sgx_epc_page *epc_page;
  1030. int ret;
  1031. epc_page = sgx_alloc_epc_page(NULL, reclaim);
  1032. if (IS_ERR(epc_page))
  1033. return ERR_CAST(epc_page);
  1034. ret = __epa(sgx_get_epc_virt_addr(epc_page));
  1035. if (ret) {
  1036. WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
  1037. sgx_encl_free_epc_page(epc_page);
  1038. return ERR_PTR(-EFAULT);
  1039. }
  1040. return epc_page;
  1041. }
  1042. /**
  1043. * sgx_alloc_va_slot - allocate a VA slot
  1044. * @va_page: a &struct sgx_va_page instance
  1045. *
  1046. * Allocates a slot from a &struct sgx_va_page instance.
  1047. *
  1048. * Return: offset of the slot inside the VA page
  1049. */
  1050. unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)
  1051. {
  1052. int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
  1053. if (slot < SGX_VA_SLOT_COUNT)
  1054. set_bit(slot, va_page->slots);
  1055. return slot << 3;
  1056. }
  1057. /**
  1058. * sgx_free_va_slot - free a VA slot
  1059. * @va_page: a &struct sgx_va_page instance
  1060. * @offset: offset of the slot inside the VA page
  1061. *
  1062. * Frees a slot from a &struct sgx_va_page instance.
  1063. */
  1064. void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)
  1065. {
  1066. clear_bit(offset >> 3, va_page->slots);
  1067. }
  1068. /**
  1069. * sgx_va_page_full - is the VA page full?
  1070. * @va_page: a &struct sgx_va_page instance
  1071. *
  1072. * Return: true if all slots have been taken
  1073. */
  1074. bool sgx_va_page_full(struct sgx_va_page *va_page)
  1075. {
  1076. int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);
  1077. return slot == SGX_VA_SLOT_COUNT;
  1078. }
  1079. /**
  1080. * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
  1081. * @page: EPC page to be freed
  1082. *
  1083. * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
  1084. * only upon success, it puts the page back to free page list. Otherwise, it
  1085. * gives a WARNING to indicate page is leaked.
  1086. */
  1087. void sgx_encl_free_epc_page(struct sgx_epc_page *page)
  1088. {
  1089. int ret;
  1090. WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
  1091. ret = __eremove(sgx_get_epc_virt_addr(page));
  1092. if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
  1093. return;
  1094. sgx_free_epc_page(page);
  1095. }