samsung-sm8650
/
android_kernel_samsung_sm8650_ksu


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
							// SPDX-License-Identifier: GPL-2.0
/*  Copyright(c) 2016-20 Intel Corporation. */

#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/shmem_fs.h>
#include <linux/suspend.h>
#include <linux/sched/mm.h>
#include <asm/sgx.h>
#include "encl.h"
#include "encls.h"
#include "sgx.h"

static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,

			    struct sgx_backing *backing);

#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd))
/*

 * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to

 * determine the page index associated with the first PCMD entry

 * within a PCMD page.

 */
#define PCMD_FIRST_MASK GENMASK(4, 0)

/**

 * reclaimer_writing_to_pcmd() - Query if any enclave page associated with

 *                               a PCMD page is in process of being reclaimed.

 * @encl:        Enclave to which PCMD page belongs

 * @start_addr:  Address of enclave page using first entry within the PCMD page

 *

 * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is

 * stored. The PCMD data of a reclaimed enclave page contains enough

 * information for the processor to verify the page at the time

 * it is loaded back into the Enclave Page Cache (EPC).

 *

 * The backing storage to which enclave pages are reclaimed is laid out as

 * follows:

 * Encrypted enclave pages:SECS page:PCMD pages

 *

 * Each PCMD page contains the PCMD metadata of

 * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages.

 *

 * A PCMD page can only be truncated if it is (a) empty, and (b) not in the

 * process of getting data (and thus soon being non-empty). (b) is tested with

 * a check if an enclave page sharing the PCMD page is in the process of being

 * reclaimed.

 *

 * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it

 * intends to reclaim that enclave page - it means that the PCMD page

 * associated with that enclave page is about to get some data and thus

 * even if the PCMD page is empty, it should not be truncated.

 *

 * Context: Enclave mutex (&sgx_encl->lock) must be held.

 * Return: 1 if the reclaimer is about to write to the PCMD page

 *         0 if the reclaimer has no intention to write to the PCMD page

 */
static int reclaimer_writing_to_pcmd(struct sgx_encl *encl,

				     unsigned long start_addr)

{
	int reclaimed = 0;
	int i;

	/*

	 * PCMD_FIRST_MASK is based on number of PCMD entries within

	 * PCMD page being 32.

	 */
	BUILD_BUG_ON(PCMDS_PER_PAGE != 32);

	for (i = 0; i < PCMDS_PER_PAGE; i++) {
		struct sgx_encl_page *entry;
		unsigned long addr;

		addr = start_addr + i * PAGE_SIZE;

		/*

		 * Stop when reaching the SECS page - it does not

		 * have a page_array entry and its reclaim is

		 * started and completed with enclave mutex held so

		 * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED

		 * flag.

		 */
		if (addr == encl->base + encl->size)
			break;

		entry = xa_load(&encl->page_array, PFN_DOWN(addr));
		if (!entry)
			continue;

		/*

		 * VA page slot ID uses same bit as the flag so it is important

		 * to ensure that the page is not already in backing store.

		 */
		if (entry->epc_page &&
		    (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) {
			reclaimed = 1;
			break;
		}
	}

	return reclaimed;
}

/*

 * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's

 * follow right after the EPC data in the backing storage. In addition to the

 * visible enclave pages, there's one extra page slot for SECS, before PCMD

 * structs.

 */
static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl,

							    unsigned long page_index)

{
	pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs);

	return epc_end_off + page_index * sizeof(struct sgx_pcmd);
}

/*

 * Free a page from the backing storage in the given page index.

 */
static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index)

{
	struct inode *inode = file_inode(encl->backing);

	shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1);
}

/*

 * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC

 * Pages" in the SDM.

 */
static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
			   struct sgx_epc_page *epc_page,
			   struct sgx_epc_page *secs_page)
{
	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
	struct sgx_encl *encl = encl_page->encl;
	pgoff_t page_index, page_pcmd_off;
	unsigned long pcmd_first_page;
	struct sgx_pageinfo pginfo;
	struct sgx_backing b;
	bool pcmd_page_empty;
	u8 *pcmd_page;
	int ret;

	if (secs_page)
		page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
	else
		page_index = PFN_DOWN(encl->size);

	/*

	 * Address of enclave page using the first entry within the PCMD page.

	 */
	pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base;

	page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);

	ret = sgx_encl_lookup_backing(encl, page_index, &b);
	if (ret)
		return ret;

	pginfo.addr = encl_page->desc & PAGE_MASK;
	pginfo.contents = (unsigned long)kmap_atomic(b.contents);
	pcmd_page = kmap_atomic(b.pcmd);
	pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;

	if (secs_page)
		pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page);
	else
		pginfo.secs = 0;

	ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page),
		     sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset);
	if (ret) {
		if (encls_failed(ret))
			ENCLS_WARN(ret, "ELDU");

		ret = -EFAULT;
	}

	memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd));
	set_page_dirty(b.pcmd);

	/*

	 * The area for the PCMD in the page was zeroed above.  Check if the

	 * whole page is now empty meaning that all PCMD's have been zeroed:

	 */
	pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);

	kunmap_atomic(pcmd_page);
	kunmap_atomic((void *)(unsigned long)pginfo.contents);

	get_page(b.pcmd);
	sgx_encl_put_backing(&b);

	sgx_encl_truncate_backing_page(encl, page_index);

	if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
		sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
		pcmd_page = kmap_atomic(b.pcmd);
		if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
			pr_warn("PCMD page not empty after truncate.\n");
		kunmap_atomic(pcmd_page);
	}

	put_page(b.pcmd);

	return ret;
}

static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,

					  struct sgx_epc_page *secs_page)

{

	unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK;
	struct sgx_encl *encl = encl_page->encl;
	struct sgx_epc_page *epc_page;
	int ret;

	epc_page = sgx_alloc_epc_page(encl_page, false);
	if (IS_ERR(epc_page))
		return epc_page;

	ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
	if (ret) {
		sgx_encl_free_epc_page(epc_page);
		return ERR_PTR(ret);
	}

	sgx_free_va_slot(encl_page->va_page, va_offset);
	list_move(&encl_page->va_page->list, &encl->va_pages);
	encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK;
	encl_page->epc_page = epc_page;

	return epc_page;
}

/*

 * Ensure the SECS page is not swapped out.  Must be called with encl->lock

 * to protect the enclave states including SECS and ensure the SECS page is

 * not swapped out again while being used.

 */
static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)

{
	struct sgx_epc_page *epc_page = encl->secs.epc_page;

	if (!epc_page)
		epc_page = sgx_encl_eldu(&encl->secs, NULL);

	return epc_page;
}

static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,

						  struct sgx_encl_page *entry)

{
	struct sgx_epc_page *epc_page;

	/* Entry successfully located. */
	if (entry->epc_page) {
		if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)
			return ERR_PTR(-EBUSY);

		return entry;
	}

	epc_page = sgx_encl_load_secs(encl);
	if (IS_ERR(epc_page))
		return ERR_CAST(epc_page);

	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
	if (IS_ERR(epc_page))
		return ERR_CAST(epc_page);

	encl->secs_child_cnt++;
	sgx_mark_page_reclaimable(entry->epc_page);

	return entry;
}

static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,

						       unsigned long addr,

						       unsigned long vm_flags)

{
	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
	struct sgx_encl_page *entry;

	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
	if (!entry)
		return ERR_PTR(-EFAULT);

	/*

	 * Verify that the page has equal or higher build time

	 * permissions than the VMA permissions (i.e. the subset of {VM_READ,

	 * VM_WRITE, VM_EXECUTE} in vma->vm_flags).

	 */
	if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
		return ERR_PTR(-EFAULT);

	return __sgx_encl_load_page(encl, entry);
}

struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,

					 unsigned long addr)

{
	struct sgx_encl_page *entry;

	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
	if (!entry)
		return ERR_PTR(-EFAULT);

	return __sgx_encl_load_page(encl, entry);
}

/**

 * sgx_encl_eaug_page() - Dynamically add page to initialized enclave

 * @vma:	VMA obtained from fault info from where page is accessed

 * @encl:	enclave accessing the page

 * @addr:	address that triggered the page fault

 *

 * When an initialized enclave accesses a page with no backing EPC page

 * on a SGX2 system then the EPC can be added dynamically via the SGX2

 * ENCLS[EAUG] instruction.

 *

 * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed

 * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.

 */
static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,

				     struct sgx_encl *encl, unsigned long addr)

{
	vm_fault_t vmret = VM_FAULT_SIGBUS;
	struct sgx_pageinfo pginfo = {0};
	struct sgx_encl_page *encl_page;
	struct sgx_epc_page *epc_page;
	struct sgx_va_page *va_page;
	unsigned long phys_addr;
	u64 secinfo_flags;
	int ret;

	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
		return VM_FAULT_SIGBUS;

	/*

	 * Ignore internal permission checking for dynamically added pages.

	 * They matter only for data added during the pre-initialization

	 * phase. The enclave decides the permissions by the means of

	 * EACCEPT, EACCEPTCOPY and EMODPE.

	 */
	secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
	encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
	if (IS_ERR(encl_page))
		return VM_FAULT_OOM;

	mutex_lock(&encl->lock);

	epc_page = sgx_encl_load_secs(encl);
	if (IS_ERR(epc_page)) {
		if (PTR_ERR(epc_page) == -EBUSY)
			vmret = VM_FAULT_NOPAGE;
		goto err_out_unlock;
	}

	epc_page = sgx_alloc_epc_page(encl_page, false);
	if (IS_ERR(epc_page)) {
		if (PTR_ERR(epc_page) == -EBUSY)
			vmret =  VM_FAULT_NOPAGE;
		goto err_out_unlock;
	}

	va_page = sgx_encl_grow(encl, false);
	if (IS_ERR(va_page)) {
		if (PTR_ERR(va_page) == -EBUSY)
			vmret = VM_FAULT_NOPAGE;
		goto err_out_epc;
	}

	if (va_page)
		list_add(&va_page->list, &encl->va_pages);

	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
			encl_page, GFP_KERNEL);
	/*

	 * If ret == -EBUSY then page was created in another flow while

	 * running without encl->lock

	 */
	if (ret)
		goto err_out_shrink;

	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
	pginfo.addr = encl_page->desc & PAGE_MASK;
	pginfo.metadata = 0;

	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
	if (ret)
		goto err_out;

	encl_page->encl = encl;
	encl_page->epc_page = epc_page;
	encl_page->type = SGX_PAGE_TYPE_REG;
	encl->secs_child_cnt++;

	sgx_mark_page_reclaimable(encl_page->epc_page);

	phys_addr = sgx_get_epc_phys_addr(epc_page);
	/*

	 * Do not undo everything when creating PTE entry fails - next #PF

	 * would find page ready for a PTE.

	 */
	vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
	if (vmret != VM_FAULT_NOPAGE) {
		mutex_unlock(&encl->lock);
		return VM_FAULT_SIGBUS;
	}
	mutex_unlock(&encl->lock);
	return VM_FAULT_NOPAGE;

err_out:
	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));

err_out_shrink:
	sgx_encl_shrink(encl, va_page);
err_out_epc:
	sgx_encl_free_epc_page(epc_page);
err_out_unlock:
	mutex_unlock(&encl->lock);
	kfree(encl_page);

	return vmret;
}

static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)

{
	unsigned long addr = (unsigned long)vmf->address;
	struct vm_area_struct *vma = vmf->vma;
	struct sgx_encl_page *entry;
	unsigned long phys_addr;
	struct sgx_encl *encl;
	vm_fault_t ret;

	encl = vma->vm_private_data;

	/*

	 * It's very unlikely but possible that allocating memory for the

	 * mm_list entry of a forked process failed in sgx_vma_open(). When

	 * this happens, vm_private_data is set to NULL.

	 */
	if (unlikely(!encl))
		return VM_FAULT_SIGBUS;

	/*

	 * The page_array keeps track of all enclave pages, whether they

	 * are swapped out or not. If there is no entry for this page and

	 * the system supports SGX2 then it is possible to dynamically add

	 * a new enclave page. This is only possible for an initialized

	 * enclave that will be checked for right away.

	 */
	if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
	    (!xa_load(&encl->page_array, PFN_DOWN(addr))))
		return sgx_encl_eaug_page(vma, encl, addr);

	mutex_lock(&encl->lock);

	entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
	if (IS_ERR(entry)) {
		mutex_unlock(&encl->lock);

		if (PTR_ERR(entry) == -EBUSY)
			return VM_FAULT_NOPAGE;

		return VM_FAULT_SIGBUS;
	}

	phys_addr = sgx_get_epc_phys_addr(entry->epc_page);

	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
	if (ret != VM_FAULT_NOPAGE) {
		mutex_unlock(&encl->lock);

		return VM_FAULT_SIGBUS;
	}

	sgx_encl_test_and_clear_young(vma->vm_mm, entry);
	mutex_unlock(&encl->lock);

	return VM_FAULT_NOPAGE;
}

static void sgx_vma_open(struct vm_area_struct *vma)

{
	struct sgx_encl *encl = vma->vm_private_data;

	/*

	 * It's possible but unlikely that vm_private_data is NULL. This can

	 * happen in a grandchild of a process, when sgx_encl_mm_add() had

	 * failed to allocate memory in this callback.

	 */
	if (unlikely(!encl))
		return;

	if (sgx_encl_mm_add(encl, vma->vm_mm))
		vma->vm_private_data = NULL;
}


/**

 * sgx_encl_may_map() - Check if a requested VMA mapping is allowed

 * @encl:		an enclave pointer

 * @start:		lower bound of the address range, inclusive

 * @end:		upper bound of the address range, exclusive

 * @vm_flags:		VMA flags

 *

 * Iterate through the enclave pages contained within [@start, @end) to verify

 * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}

 * do not contain any permissions that are not contained in the build time

 * permissions of any of the enclave pages within the given address range.

 *

 * An enclave creator must declare the strongest permissions that will be

 * needed for each enclave page. This ensures that mappings have the identical

 * or weaker permissions than the earlier declared permissions.

 *

 * Return: 0 on success, -EACCES otherwise

 */
int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,

		     unsigned long end, unsigned long vm_flags)

{
	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
	struct sgx_encl_page *page;
	unsigned long count = 0;
	int ret = 0;

	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));

	/* Disallow mapping outside enclave's address range. */
	if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
	    (start < encl->base || end > encl->base + encl->size))
		return -EACCES;

	/*

	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might

	 * conflict with the enclave page permissions.

	 */
	if (current->personality & READ_IMPLIES_EXEC)
		return -EACCES;

	mutex_lock(&encl->lock);
	xas_lock(&xas);
	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
		if (~page->vm_max_prot_bits & vm_prot_bits) {
			ret = -EACCES;
			break;
		}

		/* Reschedule on every XA_CHECK_SCHED iteration. */
		if (!(++count % XA_CHECK_SCHED)) {
			xas_pause(&xas);
			xas_unlock(&xas);
			mutex_unlock(&encl->lock);

			cond_resched();

			mutex_lock(&encl->lock);
			xas_lock(&xas);
		}
	}
	xas_unlock(&xas);
	mutex_unlock(&encl->lock);

	return ret;
}

static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start,

			    unsigned long end, unsigned long newflags)

{
	return sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
}

static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page,

			       unsigned long addr, void *data)

{
	unsigned long offset = addr & ~PAGE_MASK;
	int ret;


	ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
	if (ret)
		return -EIO;

	return 0;
}

static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page,

				unsigned long addr, void *data)

{
	unsigned long offset = addr & ~PAGE_MASK;
	int ret;

	ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data);
	if (ret)
		return -EIO;

	return 0;
}

/*

 * Load an enclave page to EPC if required, and take encl->lock.

 */
static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,

						   unsigned long addr,

						   unsigned long vm_flags)

{
	struct sgx_encl_page *entry;

	for ( ; ; ) {
		mutex_lock(&encl->lock);

		entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
		if (PTR_ERR(entry) != -EBUSY)
			break;

		mutex_unlock(&encl->lock);
	}

	if (IS_ERR(entry))
		mutex_unlock(&encl->lock);

	return entry;
}

static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,

			  void *buf, int len, int write)

{
	struct sgx_encl *encl = vma->vm_private_data;
	struct sgx_encl_page *entry = NULL;
	char data[sizeof(unsigned long)];
	unsigned long align;
	int offset;
	int cnt;
	int ret = 0;
	int i;

	/*

	 * If process was forked, VMA is still there but vm_private_data is set

	 * to NULL.

	 */
	if (!encl)
		return -EFAULT;

	if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
		return -EFAULT;

	for (i = 0; i < len; i += cnt) {
		entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK,
					      vma->vm_flags);
		if (IS_ERR(entry)) {
			ret = PTR_ERR(entry);
			break;
		}

		align = ALIGN_DOWN(addr + i, sizeof(unsigned long));
		offset = (addr + i) & (sizeof(unsigned long) - 1);
		cnt = sizeof(unsigned long) - offset;
		cnt = min(cnt, len - i);

		ret = sgx_encl_debug_read(encl, entry, align, data);
		if (ret)
			goto out;

		if (write) {
			memcpy(data + offset, buf + i, cnt);
			ret = sgx_encl_debug_write(encl, entry, align, data);
			if (ret)
				goto out;
		} else {
			memcpy(buf + i, data + offset, cnt);
		}

out:
		mutex_unlock(&encl->lock);

		if (ret)
			break;
	}

	return ret < 0 ? ret : i;
}

const struct vm_operations_struct sgx_vm_ops = {
	.fault = sgx_vma_fault,
	.mprotect = sgx_vma_mprotect,
	.open = sgx_vma_open,
	.access = sgx_vma_access,
};

/**

 * sgx_encl_release - Destroy an enclave instance

 * @ref:	address of a kref inside &sgx_encl

 *

 * Used together with kref_put(). Frees all the resources associated with the

 * enclave and the instance itself.

 */
void sgx_encl_release(struct kref *ref)

{
	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
	unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
	struct sgx_va_page *va_page;
	struct sgx_encl_page *entry;
	unsigned long count = 0;

	XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));

	xas_lock(&xas);
	xas_for_each(&xas, entry, max_page_index) {
		if (entry->epc_page) {
			/*

			 * The page and its radix tree entry cannot be freed

			 * if the page is being held by the reclaimer.

			 */
			if (sgx_unmark_page_reclaimable(entry->epc_page))
				continue;

			sgx_encl_free_epc_page(entry->epc_page);
			encl->secs_child_cnt--;
			entry->epc_page = NULL;
		}

		kfree(entry);
		/*

		 * Invoke scheduler on every XA_CHECK_SCHED iteration

		 * to prevent soft lockups.

		 */
		if (!(++count % XA_CHECK_SCHED)) {
			xas_pause(&xas);
			xas_unlock(&xas);

			cond_resched();

			xas_lock(&xas);
		}
	}
	xas_unlock(&xas);

	xa_destroy(&encl->page_array);

	if (!encl->secs_child_cnt && encl->secs.epc_page) {
		sgx_encl_free_epc_page(encl->secs.epc_page);
		encl->secs.epc_page = NULL;
	}

	while (!list_empty(&encl->va_pages)) {
		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
					   list);
		list_del(&va_page->list);
		sgx_encl_free_epc_page(va_page->epc_page);
		kfree(va_page);
	}

	if (encl->backing)
		fput(encl->backing);

	cleanup_srcu_struct(&encl->srcu);

	WARN_ON_ONCE(!list_empty(&encl->mm_list));

	/* Detect EPC page leak's. */
	WARN_ON_ONCE(encl->secs_child_cnt);
	WARN_ON_ONCE(encl->secs.epc_page);

	kfree(encl);
}

/*

 * 'mm' is exiting and no longer needs mmu notifications.

 */
static void sgx_mmu_notifier_release(struct mmu_notifier *mn,

				     struct mm_struct *mm)

{
	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
	struct sgx_encl_mm *tmp = NULL;

	/*

	 * The enclave itself can remove encl_mm.  Note, objects can't be moved

	 * off an RCU protected list, but deletion is ok.

	 */
	spin_lock(&encl_mm->encl->mm_lock);
	list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
		if (tmp == encl_mm) {
			list_del_rcu(&encl_mm->list);
			break;
		}
	}
	spin_unlock(&encl_mm->encl->mm_lock);

	if (tmp == encl_mm) {
		synchronize_srcu(&encl_mm->encl->srcu);
		mmu_notifier_put(mn);
	}
}

static void sgx_mmu_notifier_free(struct mmu_notifier *mn)

{
	struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);

	/* 'encl_mm' is going away, put encl_mm->encl reference: */
	kref_put(&encl_mm->encl->refcount, sgx_encl_release);

	kfree(encl_mm);
}

static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
	.release		= sgx_mmu_notifier_release,
	.free_notifier		= sgx_mmu_notifier_free,
};

static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,

					    struct mm_struct *mm)

{
	struct sgx_encl_mm *encl_mm = NULL;
	struct sgx_encl_mm *tmp;
	int idx;

	idx = srcu_read_lock(&encl->srcu);

	list_for_each_entry_rcu(tmp, &encl->mm_list, list) {
		if (tmp->mm == mm) {
			encl_mm = tmp;
			break;
		}
	}

	srcu_read_unlock(&encl->srcu, idx);

	return encl_mm;
}

int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)

{
	struct sgx_encl_mm *encl_mm;
	int ret;

	/*

	 * Even though a single enclave may be mapped into an mm more than once,

	 * each 'mm' only appears once on encl->mm_list. This is guaranteed by

	 * holding the mm's mmap lock for write before an mm can be added or

	 * remove to an encl->mm_list.

	 */
	mmap_assert_write_locked(mm);

	/*

	 * It's possible that an entry already exists in the mm_list, because it

	 * is removed only on VFS release or process exit.

	 */
	if (sgx_encl_find_mm(encl, mm))
		return 0;

	encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
	if (!encl_mm)
		return -ENOMEM;

	/* Grab a refcount for the encl_mm->encl reference: */
	kref_get(&encl->refcount);
	encl_mm->encl = encl;
	encl_mm->mm = mm;
	encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;

	ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
	if (ret) {
		kfree(encl_mm);
		return ret;
	}

	spin_lock(&encl->mm_lock);
	list_add_rcu(&encl_mm->list, &encl->mm_list);
	/* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
	smp_wmb();
	encl->mm_list_version++;
	spin_unlock(&encl->mm_lock);

	return 0;
}

/**

 * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave

 * @encl: the enclave

 *

 * Some SGX functions require that no cached linear-to-physical address

 * mappings are present before they can succeed. For example, ENCLS[EWB]

 * copies a page from the enclave page cache to regular main memory but

 * it fails if it cannot ensure that there are no cached

 * linear-to-physical address mappings referring to the page.

 *

 * SGX hardware flushes all cached linear-to-physical mappings on a CPU

 * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave

 * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical

 * address mappings are cleared but coordination with the tracking done within

 * the SGX hardware is needed to support the SGX functions that depend on this

 * cache clearing.

 *

 * When the ENCLS[ETRACK] function is issued on an enclave the hardware

 * tracks threads operating inside the enclave at that time. The SGX

 * hardware tracking require that all the identified threads must have

 * exited the enclave in order to flush the mappings before a function such

 * as ENCLS[EWB] will be permitted

 *

 * The following flow is used to support SGX functions that require that

 * no cached linear-to-physical address mappings are present:

 * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.

 * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be

 *    accessing the enclave.

 * 3) Send IPI to identified CPUs, kicking them out of the enclave and

 *    thus flushing all locally cached linear-to-physical address mappings.

 * 4) Execute SGX function.

 *

 * Context: It is required to call this function after ENCLS[ETRACK].

 *          This will ensure that if any new mm appears (racing with

 *          sgx_encl_mm_add()) then the new mm will enter into the

 *          enclave with fresh linear-to-physical address mappings.

 *

 *          It is required that all IPIs are completed before a new

 *          ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3

 *          of the above flow with the enclave's mutex.

 *

 * Return: cpumask of CPUs that might be accessing @encl

 */
const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)

{
	cpumask_t *cpumask = &encl->cpumask;
	struct sgx_encl_mm *encl_mm;
	int idx;

	cpumask_clear(cpumask);

	idx = srcu_read_lock(&encl->srcu);

	list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
		if (!mmget_not_zero(encl_mm->mm))
			continue;

		cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));

		mmput_async(encl_mm->mm);
	}

	srcu_read_unlock(&encl->srcu, idx);

	return cpumask;
}

static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,

					      pgoff_t index)

{
	struct address_space *mapping = encl->backing->f_mapping;
	gfp_t gfpmask = mapping_gfp_mask(mapping);

	return shmem_read_mapping_page_gfp(mapping, index, gfpmask);
}

/**

 * __sgx_encl_get_backing() - Pin the backing storage

 * @encl:	an enclave pointer

 * @page_index:	enclave page index

 * @backing:	data for accessing backing storage for the page

 *

 * Pin the backing storage pages for storing the encrypted contents and Paging

 * Crypto MetaData (PCMD) of an enclave page.

 *

 * Return:

 *   0 on success,

 *   -errno otherwise.

 */
static int __sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
			 struct sgx_backing *backing)
{
	pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
	struct page *contents;
	struct page *pcmd;

	contents = sgx_encl_get_backing_page(encl, page_index);
	if (IS_ERR(contents))
		return PTR_ERR(contents);

	pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off));
	if (IS_ERR(pcmd)) {
		put_page(contents);
		return PTR_ERR(pcmd);
	}

	backing->contents = contents;
	backing->pcmd = pcmd;
	backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);

	return 0;
}

/*

 * When called from ksgxd, returns the mem_cgroup of a struct mm stored

 * in the enclave's mm_list. When not called from ksgxd, just returns

 * the mem_cgroup of the current task.

 */
static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)

{
	struct mem_cgroup *memcg = NULL;
	struct sgx_encl_mm *encl_mm;
	int idx;

	/*

	 * If called from normal task context, return the mem_cgroup

	 * of the current task's mm. The remainder of the handling is for

	 * ksgxd.

	 */
	if (!current_is_ksgxd())
		return get_mem_cgroup_from_mm(current->mm);

	/*

	 * Search the enclave's mm_list to find an mm associated with

	 * this enclave to charge the allocation to.

	 */
	idx = srcu_read_lock(&encl->srcu);

	list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
		if (!mmget_not_zero(encl_mm->mm))
			continue;

		memcg = get_mem_cgroup_from_mm(encl_mm->mm);

		mmput_async(encl_mm->mm);

		break;
	}

	srcu_read_unlock(&encl->srcu, idx);

	/*

	 * In the rare case that there isn't an mm associated with

	 * the enclave, set memcg to the current active mem_cgroup.

	 * This will be the root mem_cgroup if there is no active

	 * mem_cgroup.

	 */
	if (!memcg)
		return get_mem_cgroup_from_mm(NULL);

	return memcg;
}

/**

 * sgx_encl_alloc_backing() - create a new backing storage page

 * @encl:	an enclave pointer

 * @page_index:	enclave page index

 * @backing:	data for accessing backing storage for the page

 *

 * When called from ksgxd, sets the active memcg from one of the

 * mms in the enclave's mm_list prior to any backing page allocation,

 * in order to ensure that shmem page allocations are charged to the

 * enclave.  Create a backing page for loading data back into an EPC page with

 * ELDU.  This function takes a reference on a new backing page which

 * must be dropped with a corresponding call to sgx_encl_put_backing().

 *

 * Return:

 *   0 on success,

 *   -errno otherwise.

 */
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,

			   struct sgx_backing *backing)

{
	struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
	struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
	int ret;

	ret = __sgx_encl_get_backing(encl, page_index, backing);

	set_active_memcg(memcg);
	mem_cgroup_put(encl_memcg);

	return ret;
}

/**

 * sgx_encl_lookup_backing() - retrieve an existing backing storage page

 * @encl:	an enclave pointer

 * @page_index:	enclave page index

 * @backing:	data for accessing backing storage for the page

 *

 * Retrieve a backing page for loading data back into an EPC page with ELDU.

 * It is the caller's responsibility to ensure that it is appropriate to use

 * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is

 * not used correctly, this will cause an allocation which is not accounted for.

 * This function takes a reference on an existing backing page which must be

 * dropped with a corresponding call to sgx_encl_put_backing().

 *

 * Return:

 *   0 on success,

 *   -errno otherwise.

 */
static int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,

			   struct sgx_backing *backing)

{
	return __sgx_encl_get_backing(encl, page_index, backing);
}

/**

 * sgx_encl_put_backing() - Unpin the backing storage

 * @backing:	data for accessing backing storage for the page

 */
void sgx_encl_put_backing(struct sgx_backing *backing)

{
	put_page(backing->pcmd);
	put_page(backing->contents);
}

static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr,

					    void *data)

{
	pte_t pte;
	int ret;

	ret = pte_young(*ptep);
	if (ret) {
		pte = pte_mkold(*ptep);
		set_pte_at((struct mm_struct *)data, addr, ptep, pte);
	}

	return ret;
}

/**

 * sgx_encl_test_and_clear_young() - Test and reset the accessed bit

 * @mm:		mm_struct that is checked

 * @page:	enclave page to be tested for recent access

 *

 * Checks the Access (A) bit from the PTE corresponding to the enclave page and

 * clears it.

 *

 * Return: 1 if the page has been recently accessed and 0 if not.

 */
int sgx_encl_test_and_clear_young(struct mm_struct *mm,

				  struct sgx_encl_page *page)

{
	unsigned long addr = page->desc & PAGE_MASK;
	struct sgx_encl *encl = page->encl;
	struct vm_area_struct *vma;
	int ret;

	ret = sgx_encl_find(mm, addr, &vma);
	if (ret)
		return 0;

	if (encl != vma->vm_private_data)
		return 0;

	ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE,
				  sgx_encl_test_and_clear_young_cb, vma->vm_mm);
	if (ret < 0)
		return 0;

	return ret;
}

struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,

					  unsigned long offset,

					  u64 secinfo_flags)

{
	struct sgx_encl_page *encl_page;
	unsigned long prot;

	encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
	if (!encl_page)
		return ERR_PTR(-ENOMEM);

	encl_page->desc = encl->base + offset;
	encl_page->encl = encl;

	prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
	       _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
	       _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);

	/*

	 * TCS pages must always RW set for CPU access while the SECINFO

	 * permissions are *always* zero - the CPU ignores the user provided

	 * values and silently overwrites them with zero permissions.

	 */
	if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
		prot |= PROT_READ | PROT_WRITE;

	/* Calculate maximum of the VM flags for the page. */
	encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);

	return encl_page;
}

/**

 * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave

 * @encl: the enclave

 * @addr: page aligned pointer to single page for which PTEs will be removed

 *

 * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping

 * @addr from each VMA. Ensure that page fault handler is ready to handle

 * new mappings of @addr before calling this function.

 */
void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)

{
	unsigned long mm_list_version;
	struct sgx_encl_mm *encl_mm;
	struct vm_area_struct *vma;
	int idx, ret;

	do {
		mm_list_version = encl->mm_list_version;

		/* Pairs with smp_wmb() in sgx_encl_mm_add(). */
		smp_rmb();

		idx = srcu_read_lock(&encl->srcu);

		list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
			if (!mmget_not_zero(encl_mm->mm))
				continue;

			mmap_read_lock(encl_mm->mm);

			ret = sgx_encl_find(encl_mm->mm, addr, &vma);
			if (!ret && encl == vma->vm_private_data)
				zap_vma_ptes(vma, addr, PAGE_SIZE);

			mmap_read_unlock(encl_mm->mm);

			mmput_async(encl_mm->mm);
		}

		srcu_read_unlock(&encl->srcu, idx);
	} while (unlikely(encl->mm_list_version != mm_list_version));
}

/**

 * sgx_alloc_va_page() - Allocate a Version Array (VA) page

 * @reclaim: Reclaim EPC pages directly if none available. Enclave

 *           mutex should not be held if this is set.

 *

 * Allocate a free EPC page and convert it to a Version Array (VA) page.

 *

 * Return:

 *   a VA page,

 *   -errno otherwise

 */
struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)

{
	struct sgx_epc_page *epc_page;
	int ret;

	epc_page = sgx_alloc_epc_page(NULL, reclaim);
	if (IS_ERR(epc_page))
		return ERR_CAST(epc_page);

	ret = __epa(sgx_get_epc_virt_addr(epc_page));
	if (ret) {
		WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
		sgx_encl_free_epc_page(epc_page);
		return ERR_PTR(-EFAULT);
	}

	return epc_page;
}

/**

 * sgx_alloc_va_slot - allocate a VA slot

 * @va_page:	a &struct sgx_va_page instance

 *

 * Allocates a slot from a &struct sgx_va_page instance.

 *

 * Return: offset of the slot inside the VA page

 */
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page)

{
	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);

	if (slot < SGX_VA_SLOT_COUNT)
		set_bit(slot, va_page->slots);

	return slot << 3;
}

/**

 * sgx_free_va_slot - free a VA slot

 * @va_page:	a &struct sgx_va_page instance

 * @offset:	offset of the slot inside the VA page

 *

 * Frees a slot from a &struct sgx_va_page instance.

 */
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset)

{
	clear_bit(offset >> 3, va_page->slots);
}

/**

 * sgx_va_page_full - is the VA page full?

 * @va_page:	a &struct sgx_va_page instance

 *

 * Return: true if all slots have been taken

 */
bool sgx_va_page_full(struct sgx_va_page *va_page)

{
	int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT);

	return slot == SGX_VA_SLOT_COUNT;
}

/**

 * sgx_encl_free_epc_page - free an EPC page assigned to an enclave

 * @page:	EPC page to be freed

 *

 * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and

 * only upon success, it puts the page back to free page list.  Otherwise, it

 * gives a WARNING to indicate page is leaked.

 */
void sgx_encl_free_epc_page(struct sgx_epc_page *page)

{
	int ret;

	WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);

	ret = __eremove(sgx_get_epc_virt_addr(page));
	if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
		return;

	sgx_free_epc_page(page);
}