pkvm.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2020 - Google LLC
  4. * Author: Quentin Perret <[email protected]>
  5. */
  6. #include <linux/io.h>
  7. #include <linux/kmemleak.h>
  8. #include <linux/kvm_host.h>
  9. #include <linux/memblock.h>
  10. #include <linux/mm.h>
  11. #include <linux/mutex.h>
  12. #include <linux/of_address.h>
  13. #include <linux/of_fdt.h>
  14. #include <linux/of_reserved_mem.h>
  15. #include <linux/sort.h>
  16. #include <linux/stat.h>
  17. #include <asm/kvm_hyp.h>
  18. #include <asm/kvm_mmu.h>
  19. #include <asm/kvm_pkvm.h>
  20. #include <asm/kvm_pkvm_module.h>
  21. #include <asm/setup.h>
  22. #include <uapi/linux/mount.h>
  23. #include <linux/init_syscalls.h>
  24. #include "hyp_constants.h"
  25. DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
  26. static struct reserved_mem *pkvm_firmware_mem;
  27. static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
  28. static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
  29. static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
  30. static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
  31. static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
  32. phys_addr_t hyp_mem_base;
  33. phys_addr_t hyp_mem_size;
  34. static int cmp_hyp_memblock(const void *p1, const void *p2)
  35. {
  36. const struct memblock_region *r1 = p1;
  37. const struct memblock_region *r2 = p2;
  38. return r1->base < r2->base ? -1 : (r1->base > r2->base);
  39. }
  40. static void __init sort_memblock_regions(void)
  41. {
  42. sort(hyp_memory,
  43. *hyp_memblock_nr_ptr,
  44. sizeof(struct memblock_region),
  45. cmp_hyp_memblock,
  46. NULL);
  47. }
  48. static int __init register_memblock_regions(void)
  49. {
  50. struct memblock_region *reg;
  51. for_each_mem_region(reg) {
  52. if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
  53. return -ENOMEM;
  54. hyp_memory[*hyp_memblock_nr_ptr] = *reg;
  55. (*hyp_memblock_nr_ptr)++;
  56. }
  57. sort_memblock_regions();
  58. return 0;
  59. }
  60. static int cmp_moveable_reg(const void *p1, const void *p2)
  61. {
  62. const struct pkvm_moveable_reg *r1 = p1;
  63. const struct pkvm_moveable_reg *r2 = p2;
  64. /*
  65. * Moveable regions may overlap, so put the largest one first when start
  66. * addresses are equal to allow a simpler walk from e.g.
  67. * host_stage2_unmap_unmoveable_regs().
  68. */
  69. if (r1->start < r2->start)
  70. return -1;
  71. else if (r1->start > r2->start)
  72. return 1;
  73. else if (r1->size > r2->size)
  74. return -1;
  75. else if (r1->size < r2->size)
  76. return 1;
  77. return 0;
  78. }
  79. static void __init sort_moveable_regs(void)
  80. {
  81. sort(moveable_regs,
  82. kvm_nvhe_sym(pkvm_moveable_regs_nr),
  83. sizeof(struct pkvm_moveable_reg),
  84. cmp_moveable_reg,
  85. NULL);
  86. }
  87. static int __init register_moveable_regions(void)
  88. {
  89. struct memblock_region *reg;
  90. struct device_node *np;
  91. int i = 0;
  92. for_each_mem_region(reg) {
  93. if (i >= PKVM_NR_MOVEABLE_REGS)
  94. return -ENOMEM;
  95. moveable_regs[i].start = reg->base;
  96. moveable_regs[i].size = reg->size;
  97. moveable_regs[i].type = PKVM_MREG_MEMORY;
  98. i++;
  99. }
  100. for_each_compatible_node(np, NULL, "pkvm,protected-region") {
  101. struct resource res;
  102. u64 start, size;
  103. int ret;
  104. if (i >= PKVM_NR_MOVEABLE_REGS)
  105. return -ENOMEM;
  106. ret = of_address_to_resource(np, 0, &res);
  107. if (ret)
  108. return ret;
  109. start = res.start;
  110. size = resource_size(&res);
  111. if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
  112. return -EINVAL;
  113. moveable_regs[i].start = start;
  114. moveable_regs[i].size = size;
  115. moveable_regs[i].type = PKVM_MREG_PROTECTED_RANGE;
  116. i++;
  117. }
  118. kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
  119. sort_moveable_regs();
  120. return 0;
  121. }
  122. void __init kvm_hyp_reserve(void)
  123. {
  124. u64 hyp_mem_pages = 0;
  125. int ret;
  126. if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
  127. return;
  128. if (kvm_get_mode() != KVM_MODE_PROTECTED)
  129. return;
  130. ret = register_memblock_regions();
  131. if (ret) {
  132. *hyp_memblock_nr_ptr = 0;
  133. kvm_err("Failed to register hyp memblocks: %d\n", ret);
  134. return;
  135. }
  136. ret = register_moveable_regions();
  137. if (ret) {
  138. *hyp_memblock_nr_ptr = 0;
  139. kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
  140. return;
  141. }
  142. hyp_mem_pages += hyp_s1_pgtable_pages();
  143. hyp_mem_pages += host_s2_pgtable_pages();
  144. hyp_mem_pages += hyp_vm_table_pages();
  145. hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
  146. hyp_mem_pages += hyp_ffa_proxy_pages();
  147. /*
  148. * Try to allocate a PMD-aligned region to reduce TLB pressure once
  149. * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
  150. */
  151. hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
  152. hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
  153. PMD_SIZE);
  154. if (!hyp_mem_base)
  155. hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
  156. else
  157. hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
  158. if (!hyp_mem_base) {
  159. kvm_err("Failed to reserve hyp memory\n");
  160. return;
  161. }
  162. kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
  163. hyp_mem_base);
  164. }
  165. /*
  166. * Allocates and donates memory for hypervisor VM structs at EL2.
  167. *
  168. * Allocates space for the VM state, which includes the hyp vm as well as
  169. * the hyp vcpus.
  170. *
  171. * Stores an opaque handler in the kvm struct for future reference.
  172. *
  173. * Return 0 on success, negative error code on failure.
  174. */
  175. static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
  176. {
  177. size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz, last_ran_sz, total_sz;
  178. struct kvm_vcpu *host_vcpu;
  179. pkvm_handle_t handle;
  180. void *pgd, *hyp_vm, *last_ran;
  181. unsigned long idx;
  182. int ret;
  183. if (host_kvm->created_vcpus < 1)
  184. return -EINVAL;
  185. pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
  186. /*
  187. * The PGD pages will be reclaimed using a hyp_memcache which implies
  188. * page granularity. So, use alloc_pages_exact() to get individual
  189. * refcounts.
  190. */
  191. pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
  192. if (!pgd)
  193. return -ENOMEM;
  194. /* Allocate memory to donate to hyp for vm and vcpu pointers. */
  195. hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
  196. size_mul(sizeof(void *),
  197. host_kvm->created_vcpus)));
  198. hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
  199. if (!hyp_vm) {
  200. ret = -ENOMEM;
  201. goto free_pgd;
  202. }
  203. /* Allocate memory to donate to hyp for tracking mmu->last_vcpu_ran. */
  204. last_ran_sz = PAGE_ALIGN(array_size(num_possible_cpus(), sizeof(int)));
  205. last_ran = alloc_pages_exact(last_ran_sz, GFP_KERNEL_ACCOUNT);
  206. if (!last_ran) {
  207. ret = -ENOMEM;
  208. goto free_vm;
  209. }
  210. /* Donate the VM memory to hyp and let hyp initialize it. */
  211. ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd, last_ran);
  212. if (ret < 0)
  213. goto free_last_ran;
  214. handle = ret;
  215. host_kvm->arch.pkvm.handle = handle;
  216. total_sz = hyp_vm_sz + last_ran_sz + pgd_sz;
  217. /* Donate memory for the vcpus at hyp and initialize it. */
  218. hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
  219. kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
  220. void *hyp_vcpu;
  221. /* Indexing of the vcpus to be sequential starting at 0. */
  222. if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
  223. ret = -EINVAL;
  224. goto destroy_vm;
  225. }
  226. hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
  227. if (!hyp_vcpu) {
  228. ret = -ENOMEM;
  229. goto destroy_vm;
  230. }
  231. total_sz += hyp_vcpu_sz;
  232. ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
  233. hyp_vcpu);
  234. if (ret) {
  235. free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
  236. goto destroy_vm;
  237. }
  238. }
  239. atomic64_set(&host_kvm->stat.protected_hyp_mem, total_sz);
  240. kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
  241. return 0;
  242. destroy_vm:
  243. pkvm_destroy_hyp_vm(host_kvm);
  244. return ret;
  245. free_last_ran:
  246. free_pages_exact(last_ran, last_ran_sz);
  247. free_vm:
  248. free_pages_exact(hyp_vm, hyp_vm_sz);
  249. free_pgd:
  250. free_pages_exact(pgd, pgd_sz);
  251. return ret;
  252. }
  253. int pkvm_create_hyp_vm(struct kvm *host_kvm)
  254. {
  255. int ret = 0;
  256. mutex_lock(&host_kvm->lock);
  257. if (!host_kvm->arch.pkvm.handle)
  258. ret = __pkvm_create_hyp_vm(host_kvm);
  259. mutex_unlock(&host_kvm->lock);
  260. return ret;
  261. }
  262. void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
  263. {
  264. struct kvm_pinned_page *ppage;
  265. struct mm_struct *mm = current->mm;
  266. struct rb_node *node;
  267. if (!host_kvm->arch.pkvm.handle)
  268. goto out_free;
  269. WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
  270. node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
  271. while (node) {
  272. ppage = rb_entry(node, struct kvm_pinned_page, node);
  273. WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
  274. host_kvm->arch.pkvm.handle,
  275. page_to_pfn(ppage->page),
  276. ppage->ipa));
  277. cond_resched();
  278. account_locked_vm(mm, 1, false);
  279. unpin_user_pages_dirty_lock(&ppage->page, 1, true);
  280. node = rb_next(node);
  281. rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
  282. kfree(ppage);
  283. }
  284. WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
  285. out_free:
  286. host_kvm->arch.pkvm.handle = 0;
  287. free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, host_kvm);
  288. free_hyp_stage2_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
  289. host_kvm);
  290. }
  291. int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
  292. {
  293. mutex_init(&host_kvm->lock);
  294. if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
  295. return 0;
  296. if (!is_protected_kvm_enabled())
  297. return -EINVAL;
  298. host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
  299. host_kvm->arch.pkvm.enabled = true;
  300. return 0;
  301. }
  302. static int rb_ppage_cmp(const void *key, const struct rb_node *node)
  303. {
  304. struct kvm_pinned_page *p = container_of(node, struct kvm_pinned_page, node);
  305. phys_addr_t ipa = (phys_addr_t)key;
  306. return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
  307. }
  308. void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
  309. {
  310. struct kvm_pinned_page *ppage;
  311. struct mm_struct *mm = current->mm;
  312. struct rb_node *node;
  313. write_lock(&host_kvm->mmu_lock);
  314. node = rb_find((void *)ipa, &host_kvm->arch.pkvm.pinned_pages,
  315. rb_ppage_cmp);
  316. if (node)
  317. rb_erase(node, &host_kvm->arch.pkvm.pinned_pages);
  318. write_unlock(&host_kvm->mmu_lock);
  319. WARN_ON(!node);
  320. if (!node)
  321. return;
  322. ppage = container_of(node, struct kvm_pinned_page, node);
  323. account_locked_vm(mm, 1, false);
  324. unpin_user_pages_dirty_lock(&ppage->page, 1, true);
  325. kfree(ppage);
  326. }
  327. static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
  328. const char *reason)
  329. {
  330. phys_addr_t end = rmem->base + rmem->size;
  331. kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
  332. &rmem->base, &end, reason);
  333. return -EINVAL;
  334. }
  335. static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
  336. {
  337. unsigned long node = rmem->fdt_node;
  338. if (pkvm_firmware_mem)
  339. return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
  340. if (!of_get_flat_dt_prop(node, "no-map", NULL))
  341. return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
  342. if (of_get_flat_dt_prop(node, "reusable", NULL))
  343. return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
  344. if (!PAGE_ALIGNED(rmem->base))
  345. return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
  346. if (!PAGE_ALIGNED(rmem->size))
  347. return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
  348. *pvmfw_size = rmem->size;
  349. *pvmfw_base = rmem->base;
  350. pkvm_firmware_mem = rmem;
  351. return 0;
  352. }
  353. RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
  354. pkvm_firmware_rmem_init);
  355. static int __init pkvm_firmware_rmem_clear(void)
  356. {
  357. void *addr;
  358. phys_addr_t size;
  359. if (likely(!pkvm_firmware_mem))
  360. return 0;
  361. kvm_info("Clearing unused pKVM firmware memory\n");
  362. size = pkvm_firmware_mem->size;
  363. addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
  364. if (!addr)
  365. return -EINVAL;
  366. memset(addr, 0, size);
  367. /* Clear so user space doesn't get stale info via IOCTL. */
  368. pkvm_firmware_mem = NULL;
  369. dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
  370. memunmap(addr);
  371. return 0;
  372. }
  373. static void _kvm_host_prot_finalize(void *arg)
  374. {
  375. int *err = arg;
  376. if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
  377. WRITE_ONCE(*err, -EINVAL);
  378. }
  379. static int pkvm_drop_host_privileges(void)
  380. {
  381. int ret = 0;
  382. /*
  383. * Flip the static key upfront as that may no longer be possible
  384. * once the host stage 2 is installed.
  385. */
  386. static_branch_enable(&kvm_protected_mode_initialized);
  387. /*
  388. * Fixup the boot mode so that we don't take spurious round
  389. * trips via EL2 on cpu_resume. Flush to the PoC for a good
  390. * measure, so that it can be observed by a CPU coming out of
  391. * suspend with the MMU off.
  392. */
  393. __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
  394. dcache_clean_poc((unsigned long)__boot_cpu_mode,
  395. (unsigned long)(__boot_cpu_mode + 2));
  396. on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
  397. return ret;
  398. }
  399. static int __init finalize_pkvm(void)
  400. {
  401. int ret;
  402. if (!is_protected_kvm_enabled()) {
  403. pkvm_firmware_rmem_clear();
  404. return 0;
  405. }
  406. /*
  407. * Modules can play an essential part in the pKVM protection. All of
  408. * them must properly load to enable protected VMs.
  409. */
  410. if (pkvm_load_early_modules())
  411. pkvm_firmware_rmem_clear();
  412. /*
  413. * Exclude HYP sections from kmemleak so that they don't get peeked
  414. * at, which would end badly once inaccessible.
  415. */
  416. kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
  417. kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
  418. kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
  419. flush_deferred_probe_now();
  420. /* If no DMA protection. */
  421. if (!pkvm_iommu_finalized())
  422. pkvm_firmware_rmem_clear();
  423. ret = pkvm_drop_host_privileges();
  424. if (ret) {
  425. pr_err("Failed to de-privilege the host kernel: %d\n", ret);
  426. pkvm_firmware_rmem_clear();
  427. }
  428. #ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC
  429. if (!ret)
  430. ret = pkvm_register_early_nc_mappings();
  431. #endif
  432. return ret;
  433. }
  434. device_initcall_sync(finalize_pkvm);
  435. static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
  436. {
  437. int ret = 0;
  438. if (!pkvm_firmware_mem)
  439. return -EINVAL;
  440. mutex_lock(&kvm->lock);
  441. if (kvm->arch.pkvm.handle) {
  442. ret = -EBUSY;
  443. goto out_unlock;
  444. }
  445. kvm->arch.pkvm.pvmfw_load_addr = ipa;
  446. out_unlock:
  447. mutex_unlock(&kvm->lock);
  448. return ret;
  449. }
  450. static int pkvm_vm_ioctl_info(struct kvm *kvm,
  451. struct kvm_protected_vm_info __user *info)
  452. {
  453. struct kvm_protected_vm_info kinfo = {
  454. .firmware_size = pkvm_firmware_mem ?
  455. pkvm_firmware_mem->size :
  456. 0,
  457. };
  458. return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
  459. }
  460. int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
  461. {
  462. if (!kvm_vm_is_protected(kvm))
  463. return -EINVAL;
  464. if (cap->args[1] || cap->args[2] || cap->args[3])
  465. return -EINVAL;
  466. switch (cap->flags) {
  467. case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
  468. return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
  469. case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
  470. return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
  471. default:
  472. return -EINVAL;
  473. }
  474. return 0;
  475. }
  476. #ifdef CONFIG_MODULES
  477. static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
  478. static int __init early_pkvm_modules_cfg(char *arg)
  479. {
  480. /*
  481. * Loading pKVM modules with kvm-arm.protected_modules is deprecated
  482. * Use kvm-arm.protected_modules=<module1>,<module2>
  483. */
  484. if (!arg)
  485. return -EINVAL;
  486. strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
  487. return 0;
  488. }
  489. early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
  490. static void free_modprobe_argv(struct subprocess_info *info)
  491. {
  492. kfree(info->argv);
  493. }
  494. /*
  495. * Heavily inspired by request_module(). The latest couldn't be reused though as
  496. * the feature can be disabled depending on umh configuration. Here some
  497. * security is enforced by making sure this can be called only when pKVM is
  498. * enabled, not yet completely initialized.
  499. */
  500. static int __init __pkvm_request_early_module(char *module_name,
  501. char *module_path)
  502. {
  503. char *modprobe_path = CONFIG_MODPROBE_PATH;
  504. struct subprocess_info *info;
  505. static char *envp[] = {
  506. "HOME=/",
  507. "TERM=linux",
  508. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  509. NULL
  510. };
  511. char **argv;
  512. int idx = 0;
  513. if (!is_protected_kvm_enabled())
  514. return -EACCES;
  515. if (static_branch_likely(&kvm_protected_mode_initialized))
  516. return -EACCES;
  517. argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
  518. if (!argv)
  519. return -ENOMEM;
  520. argv[idx++] = modprobe_path;
  521. argv[idx++] = "-q";
  522. if (*module_path != '\0') {
  523. argv[idx++] = "-d";
  524. argv[idx++] = module_path;
  525. }
  526. argv[idx++] = "--";
  527. argv[idx++] = module_name;
  528. argv[idx++] = NULL;
  529. info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
  530. NULL, free_modprobe_argv, NULL);
  531. if (!info)
  532. goto err;
  533. /* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
  534. info->path = modprobe_path;
  535. return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
  536. err:
  537. kfree(argv);
  538. return -ENOMEM;
  539. }
  540. static int __init pkvm_request_early_module(char *module_name, char *module_path)
  541. {
  542. int err = __pkvm_request_early_module(module_name, module_path);
  543. if (!err)
  544. return 0;
  545. /* Already tried the default path */
  546. if (*module_path == '\0')
  547. return err;
  548. pr_info("loading %s from %s failed, fallback to the default path\n",
  549. module_name, module_path);
  550. return __pkvm_request_early_module(module_name, "");
  551. }
  552. int __init pkvm_load_early_modules(void)
  553. {
  554. char *token, *buf = early_pkvm_modules;
  555. char *module_path = CONFIG_PKVM_MODULE_PATH;
  556. int err = init_mount("proc", "/proc", "proc",
  557. MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL);
  558. if (err)
  559. return err;
  560. while (true) {
  561. token = strsep(&buf, ",");
  562. if (!token)
  563. break;
  564. if (*token) {
  565. err = pkvm_request_early_module(token, module_path);
  566. if (err) {
  567. pr_err("Failed to load pkvm module %s: %d\n",
  568. token, err);
  569. return err;
  570. }
  571. }
  572. if (buf)
  573. *(buf - 1) = ',';
  574. }
  575. return 0;
  576. }
  577. struct pkvm_mod_sec_mapping {
  578. struct pkvm_module_section *sec;
  579. enum kvm_pgtable_prot prot;
  580. };
  581. static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
  582. {
  583. size_t offset;
  584. u64 pfn;
  585. for (offset = 0; offset < size; offset += PAGE_SIZE) {
  586. pfn = vmalloc_to_pfn(kern_va + offset);
  587. kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
  588. hyp_va + offset);
  589. }
  590. }
  591. static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
  592. {
  593. size_t offset, size;
  594. void *start;
  595. int i;
  596. for (i = 0; i < nr_secs; i++) {
  597. start = secs_map[i].sec->start;
  598. size = secs_map[i].sec->end - start;
  599. offset = start - secs_map[0].sec->start;
  600. pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
  601. }
  602. }
  603. static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
  604. {
  605. size_t offset, size = sec_map->sec->end - sec_map->sec->start;
  606. int ret;
  607. u64 pfn;
  608. for (offset = 0; offset < size; offset += PAGE_SIZE) {
  609. pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
  610. ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
  611. hyp_va + offset, sec_map->prot);
  612. if (ret) {
  613. pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
  614. return ret;
  615. }
  616. }
  617. return 0;
  618. }
  619. static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
  620. {
  621. size_t offset;
  622. int i, ret;
  623. for (i = 0; i < nr_secs; i++) {
  624. offset = secs_map[i].sec->start - secs_map[0].sec->start;
  625. ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
  626. if (ret) {
  627. pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
  628. return ret;
  629. }
  630. }
  631. return 0;
  632. }
  633. static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
  634. {
  635. struct pkvm_mod_sec_mapping const *s1 = p1;
  636. struct pkvm_mod_sec_mapping const *s2 = p2;
  637. return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
  638. }
  639. int __pkvm_load_el2_module(struct module *this, unsigned long *token)
  640. {
  641. struct pkvm_el2_module *mod = &this->arch.hyp;
  642. struct pkvm_mod_sec_mapping secs_map[] = {
  643. { &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
  644. { &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
  645. { &mod->rodata, KVM_PGTABLE_PROT_R },
  646. { &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
  647. };
  648. void *start, *end, *hyp_va;
  649. struct arm_smccc_res res;
  650. kvm_nvhe_reloc_t *endrel;
  651. int ret, i, secs_first;
  652. size_t offset, size;
  653. /* The pKVM hyp only allows loading before it is fully initialized */
  654. if (!is_protected_kvm_enabled() || is_pkvm_initialized())
  655. return -EOPNOTSUPP;
  656. for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
  657. if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
  658. kvm_err("EL2 sections are not page-aligned\n");
  659. return -EINVAL;
  660. }
  661. }
  662. if (!try_module_get(this)) {
  663. kvm_err("Kernel module has been unloaded\n");
  664. return -ENODEV;
  665. }
  666. /* Missing or empty module sections are placed first */
  667. sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
  668. for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
  669. start = secs_map[secs_first].sec->start;
  670. if (start)
  671. break;
  672. }
  673. end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
  674. size = end - start;
  675. arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
  676. size >> PAGE_SHIFT, &res);
  677. if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
  678. kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
  679. module_put(this);
  680. return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
  681. }
  682. hyp_va = (void *)res.a1;
  683. /*
  684. * The token can be used for other calls related to this module.
  685. * Conveniently the only information needed is this addr so let's use it
  686. * as an identifier.
  687. */
  688. if (token)
  689. *token = (unsigned long)hyp_va;
  690. endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
  691. kvm_apply_hyp_module_relocations(start, hyp_va, mod->relocs, endrel);
  692. /*
  693. * Exclude EL2 module sections from kmemleak before making them
  694. * inaccessible.
  695. */
  696. kmemleak_free_part(start, size);
  697. ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
  698. ARRAY_SIZE(secs_map) - secs_first);
  699. if (ret) {
  700. kvm_err("Failed to map EL2 module page: %d\n", ret);
  701. module_put(this);
  702. return ret;
  703. }
  704. offset = (size_t)((void *)mod->init - start);
  705. ret = kvm_call_hyp_nvhe(__pkvm_init_module, hyp_va + offset);
  706. if (ret) {
  707. kvm_err("Failed to init EL2 module: %d\n", ret);
  708. pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
  709. module_put(this);
  710. return ret;
  711. }
  712. return 0;
  713. }
  714. EXPORT_SYMBOL(__pkvm_load_el2_module);
  715. int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
  716. {
  717. return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
  718. }
  719. EXPORT_SYMBOL(__pkvm_register_el2_call);
  720. #endif /* CONFIG_MODULES */