123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Device driver to expose SGX enclave memory to KVM guests.
- *
- * Copyright(c) 2021 Intel Corporation.
- */
- #include <linux/miscdevice.h>
- #include <linux/mm.h>
- #include <linux/mman.h>
- #include <linux/sched/mm.h>
- #include <linux/sched/signal.h>
- #include <linux/slab.h>
- #include <linux/xarray.h>
- #include <asm/sgx.h>
- #include <uapi/asm/sgx.h>
- #include "encls.h"
- #include "sgx.h"
- struct sgx_vepc {
- struct xarray page_array;
- struct mutex lock;
- };
- /*
- * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
- * virtual EPC instances, and the lock to protect it.
- */
- static struct mutex zombie_secs_pages_lock;
- static struct list_head zombie_secs_pages;
- static int __sgx_vepc_fault(struct sgx_vepc *vepc,
- struct vm_area_struct *vma, unsigned long addr)
- {
- struct sgx_epc_page *epc_page;
- unsigned long index, pfn;
- int ret;
- WARN_ON(!mutex_is_locked(&vepc->lock));
- /* Calculate index of EPC page in virtual EPC's page_array */
- index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
- epc_page = xa_load(&vepc->page_array, index);
- if (epc_page)
- return 0;
- epc_page = sgx_alloc_epc_page(vepc, false);
- if (IS_ERR(epc_page))
- return PTR_ERR(epc_page);
- ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
- if (ret)
- goto err_free;
- pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
- ret = vmf_insert_pfn(vma, addr, pfn);
- if (ret != VM_FAULT_NOPAGE) {
- ret = -EFAULT;
- goto err_delete;
- }
- return 0;
- err_delete:
- xa_erase(&vepc->page_array, index);
- err_free:
- sgx_free_epc_page(epc_page);
- return ret;
- }
- static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
- {
- struct vm_area_struct *vma = vmf->vma;
- struct sgx_vepc *vepc = vma->vm_private_data;
- int ret;
- mutex_lock(&vepc->lock);
- ret = __sgx_vepc_fault(vepc, vma, vmf->address);
- mutex_unlock(&vepc->lock);
- if (!ret)
- return VM_FAULT_NOPAGE;
- if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
- mmap_read_unlock(vma->vm_mm);
- return VM_FAULT_RETRY;
- }
- return VM_FAULT_SIGBUS;
- }
- static const struct vm_operations_struct sgx_vepc_vm_ops = {
- .fault = sgx_vepc_fault,
- };
- static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
- {
- struct sgx_vepc *vepc = file->private_data;
- if (!(vma->vm_flags & VM_SHARED))
- return -EINVAL;
- vma->vm_ops = &sgx_vepc_vm_ops;
- /* Don't copy VMA in fork() */
- vm_flags_set(vma, VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY);
- vma->vm_private_data = vepc;
- return 0;
- }
- static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page)
- {
- /*
- * Take a previously guest-owned EPC page and return it to the
- * general EPC page pool.
- *
- * Guests can not be trusted to have left this page in a good
- * state, so run EREMOVE on the page unconditionally. In the
- * case that a guest properly EREMOVE'd this page, a superfluous
- * EREMOVE is harmless.
- */
- return __eremove(sgx_get_epc_virt_addr(epc_page));
- }
- static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
- {
- int ret = sgx_vepc_remove_page(epc_page);
- if (ret) {
- /*
- * Only SGX_CHILD_PRESENT is expected, which is because of
- * EREMOVE'ing an SECS still with child, in which case it can
- * be handled by EREMOVE'ing the SECS again after all pages in
- * virtual EPC have been EREMOVE'd. See comments in below in
- * sgx_vepc_release().
- *
- * The user of virtual EPC (KVM) needs to guarantee there's no
- * logical processor is still running in the enclave in guest,
- * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
- * handled here.
- */
- WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
- ret, ret);
- return ret;
- }
- sgx_free_epc_page(epc_page);
- return 0;
- }
- static long sgx_vepc_remove_all(struct sgx_vepc *vepc)
- {
- struct sgx_epc_page *entry;
- unsigned long index;
- long failures = 0;
- xa_for_each(&vepc->page_array, index, entry) {
- int ret = sgx_vepc_remove_page(entry);
- if (ret) {
- if (ret == SGX_CHILD_PRESENT) {
- /* The page is a SECS, userspace will retry. */
- failures++;
- } else {
- /*
- * Report errors due to #GP or SGX_ENCLAVE_ACT; do not
- * WARN, as userspace can induce said failures by
- * calling the ioctl concurrently on multiple vEPCs or
- * while one or more CPUs is running the enclave. Only
- * a #PF on EREMOVE indicates a kernel/hardware issue.
- */
- WARN_ON_ONCE(encls_faulted(ret) &&
- ENCLS_TRAPNR(ret) != X86_TRAP_GP);
- return -EBUSY;
- }
- }
- cond_resched();
- }
- /*
- * Return the number of SECS pages that failed to be removed, so
- * userspace knows that it has to retry.
- */
- return failures;
- }
- static int sgx_vepc_release(struct inode *inode, struct file *file)
- {
- struct sgx_vepc *vepc = file->private_data;
- struct sgx_epc_page *epc_page, *tmp, *entry;
- unsigned long index;
- LIST_HEAD(secs_pages);
- xa_for_each(&vepc->page_array, index, entry) {
- /*
- * Remove all normal, child pages. sgx_vepc_free_page()
- * will fail if EREMOVE fails, but this is OK and expected on
- * SECS pages. Those can only be EREMOVE'd *after* all their
- * child pages. Retries below will clean them up.
- */
- if (sgx_vepc_free_page(entry))
- continue;
- xa_erase(&vepc->page_array, index);
- cond_resched();
- }
- /*
- * Retry EREMOVE'ing pages. This will clean up any SECS pages that
- * only had children in this 'epc' area.
- */
- xa_for_each(&vepc->page_array, index, entry) {
- epc_page = entry;
- /*
- * An EREMOVE failure here means that the SECS page still
- * has children. But, since all children in this 'sgx_vepc'
- * have been removed, the SECS page must have a child on
- * another instance.
- */
- if (sgx_vepc_free_page(epc_page))
- list_add_tail(&epc_page->list, &secs_pages);
- xa_erase(&vepc->page_array, index);
- cond_resched();
- }
- /*
- * SECS pages are "pinned" by child pages, and "unpinned" once all
- * children have been EREMOVE'd. A child page in this instance
- * may have pinned an SECS page encountered in an earlier release(),
- * creating a zombie. Since some children were EREMOVE'd above,
- * try to EREMOVE all zombies in the hopes that one was unpinned.
- */
- mutex_lock(&zombie_secs_pages_lock);
- list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
- /*
- * Speculatively remove the page from the list of zombies,
- * if the page is successfully EREMOVE'd it will be added to
- * the list of free pages. If EREMOVE fails, throw the page
- * on the local list, which will be spliced on at the end.
- */
- list_del(&epc_page->list);
- if (sgx_vepc_free_page(epc_page))
- list_add_tail(&epc_page->list, &secs_pages);
- cond_resched();
- }
- if (!list_empty(&secs_pages))
- list_splice_tail(&secs_pages, &zombie_secs_pages);
- mutex_unlock(&zombie_secs_pages_lock);
- xa_destroy(&vepc->page_array);
- kfree(vepc);
- return 0;
- }
- static int sgx_vepc_open(struct inode *inode, struct file *file)
- {
- struct sgx_vepc *vepc;
- vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
- if (!vepc)
- return -ENOMEM;
- mutex_init(&vepc->lock);
- xa_init(&vepc->page_array);
- file->private_data = vepc;
- return 0;
- }
- static long sgx_vepc_ioctl(struct file *file,
- unsigned int cmd, unsigned long arg)
- {
- struct sgx_vepc *vepc = file->private_data;
- switch (cmd) {
- case SGX_IOC_VEPC_REMOVE_ALL:
- if (arg)
- return -EINVAL;
- return sgx_vepc_remove_all(vepc);
- default:
- return -ENOTTY;
- }
- }
- static const struct file_operations sgx_vepc_fops = {
- .owner = THIS_MODULE,
- .open = sgx_vepc_open,
- .unlocked_ioctl = sgx_vepc_ioctl,
- .compat_ioctl = sgx_vepc_ioctl,
- .release = sgx_vepc_release,
- .mmap = sgx_vepc_mmap,
- };
- static struct miscdevice sgx_vepc_dev = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "sgx_vepc",
- .nodename = "sgx_vepc",
- .fops = &sgx_vepc_fops,
- };
- int __init sgx_vepc_init(void)
- {
- /* SGX virtualization requires KVM to work */
- if (!cpu_feature_enabled(X86_FEATURE_VMX))
- return -ENODEV;
- INIT_LIST_HEAD(&zombie_secs_pages);
- mutex_init(&zombie_secs_pages_lock);
- return misc_register(&sgx_vepc_dev);
- }
- /**
- * sgx_virt_ecreate() - Run ECREATE on behalf of guest
- * @pageinfo: Pointer to PAGEINFO structure
- * @secs: Userspace pointer to SECS page
- * @trapnr: trap number injected to guest in case of ECREATE error
- *
- * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
- * of enforcing policies of guest's enclaves, and return the trap number
- * which should be injected to guest in case of any ECREATE error.
- *
- * Return:
- * - 0: ECREATE was successful.
- * - <0: on error.
- */
- int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
- int *trapnr)
- {
- int ret;
- /*
- * @secs is an untrusted, userspace-provided address. It comes from
- * KVM and is assumed to be a valid pointer which points somewhere in
- * userspace. This can fault and call SGX or other fault handlers when
- * userspace mapping @secs doesn't exist.
- *
- * Add a WARN() to make sure @secs is already valid userspace pointer
- * from caller (KVM), who should already have handled invalid pointer
- * case (for instance, made by malicious guest). All other checks,
- * such as alignment of @secs, are deferred to ENCLS itself.
- */
- if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
- return -EINVAL;
- __uaccess_begin();
- ret = __ecreate(pageinfo, (void *)secs);
- __uaccess_end();
- if (encls_faulted(ret)) {
- *trapnr = ENCLS_TRAPNR(ret);
- return -EFAULT;
- }
- /* ECREATE doesn't return an error code, it faults or succeeds. */
- WARN_ON_ONCE(ret);
- return 0;
- }
- EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
- static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
- void __user *secs)
- {
- int ret;
- /*
- * Make sure all userspace pointers from caller (KVM) are valid.
- * All other checks deferred to ENCLS itself. Also see comment
- * for @secs in sgx_virt_ecreate().
- */
- #define SGX_EINITTOKEN_SIZE 304
- if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
- !access_ok(token, SGX_EINITTOKEN_SIZE) ||
- !access_ok(secs, PAGE_SIZE)))
- return -EINVAL;
- __uaccess_begin();
- ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
- __uaccess_end();
- return ret;
- }
- /**
- * sgx_virt_einit() - Run EINIT on behalf of guest
- * @sigstruct: Userspace pointer to SIGSTRUCT structure
- * @token: Userspace pointer to EINITTOKEN structure
- * @secs: Userspace pointer to SECS page
- * @lepubkeyhash: Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
- * @trapnr: trap number injected to guest in case of EINIT error
- *
- * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
- * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
- * needs to update hardware values to guest's virtual MSR values in order to
- * ensure EINIT is executed with expected hardware values.
- *
- * Return:
- * - 0: EINIT was successful.
- * - <0: on error.
- */
- int sgx_virt_einit(void __user *sigstruct, void __user *token,
- void __user *secs, u64 *lepubkeyhash, int *trapnr)
- {
- int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
- ret = __sgx_virt_einit(sigstruct, token, secs);
- } else {
- preempt_disable();
- sgx_update_lepubkeyhash(lepubkeyhash);
- ret = __sgx_virt_einit(sigstruct, token, secs);
- preempt_enable();
- }
- /* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
- if (ret == -EINVAL)
- return ret;
- if (encls_faulted(ret)) {
- *trapnr = ENCLS_TRAPNR(ret);
- return -EFAULT;
- }
- return ret;
- }
- EXPORT_SYMBOL_GPL(sgx_virt_einit);
|