kvm_mmu.h 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Copyright (C) 2012,2013 - ARM Ltd
  4. * Author: Marc Zyngier <[email protected]>
  5. */
  6. #ifndef __ARM64_KVM_MMU_H__
  7. #define __ARM64_KVM_MMU_H__
  8. #include <asm/page.h>
  9. #include <asm/memory.h>
  10. #include <asm/mmu.h>
  11. #include <asm/cpufeature.h>
  12. /*
  13. * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
  14. * "negative" addresses. This makes it impossible to directly share
  15. * mappings with the kernel.
  16. *
  17. * Instead, give the HYP mode its own VA region at a fixed offset from
  18. * the kernel by just masking the top bits (which are all ones for a
  19. * kernel address). We need to find out how many bits to mask.
  20. *
  21. * We want to build a set of page tables that cover both parts of the
  22. * idmap (the trampoline page used to initialize EL2), and our normal
  23. * runtime VA space, at the same time.
  24. *
  25. * Given that the kernel uses VA_BITS for its entire address space,
  26. * and that half of that space (VA_BITS - 1) is used for the linear
  27. * mapping, we can also limit the EL2 space to (VA_BITS - 1).
  28. *
  29. * The main question is "Within the VA_BITS space, does EL2 use the
  30. * top or the bottom half of that space to shadow the kernel's linear
  31. * mapping?". As we need to idmap the trampoline page, this is
  32. * determined by the range in which this page lives.
  33. *
  34. * If the page is in the bottom half, we have to use the top half. If
  35. * the page is in the top half, we have to use the bottom half:
  36. *
  37. * T = __pa_symbol(__hyp_idmap_text_start)
  38. * if (T & BIT(VA_BITS - 1))
  39. * HYP_VA_MIN = 0 //idmap in upper half
  40. * else
  41. * HYP_VA_MIN = 1 << (VA_BITS - 1)
  42. * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
  43. *
  44. * When using VHE, there are no separate hyp mappings and all KVM
  45. * functionality is already mapped as part of the main kernel
  46. * mappings, and none of this applies in that case.
  47. */
  48. #ifdef __ASSEMBLY__
  49. #include <asm/alternative.h>
  50. /*
  51. * Convert a kernel VA into a HYP VA.
  52. * reg: VA to be converted.
  53. *
  54. * The actual code generation takes place in kvm_update_va_mask, and
  55. * the instructions below are only there to reserve the space and
  56. * perform the register allocation (kvm_update_va_mask uses the
  57. * specific registers encoded in the instructions).
  58. */
  59. .macro kern_hyp_va reg
  60. alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
  61. and \reg, \reg, #1 /* mask with va_mask */
  62. ror \reg, \reg, #1 /* rotate to the first tag bit */
  63. add \reg, \reg, #0 /* insert the low 12 bits of the tag */
  64. add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
  65. ror \reg, \reg, #63 /* rotate back */
  66. alternative_cb_end
  67. .endm
  68. /*
  69. * Convert a hypervisor VA to a PA
  70. * reg: hypervisor address to be converted in place
  71. * tmp: temporary register
  72. */
  73. .macro hyp_pa reg, tmp
  74. ldr_l \tmp, hyp_physvirt_offset
  75. add \reg, \reg, \tmp
  76. .endm
  77. /*
  78. * Convert a hypervisor VA to a kernel image address
  79. * reg: hypervisor address to be converted in place
  80. * tmp: temporary register
  81. *
  82. * The actual code generation takes place in kvm_get_kimage_voffset, and
  83. * the instructions below are only there to reserve the space and
  84. * perform the register allocation (kvm_get_kimage_voffset uses the
  85. * specific registers encoded in the instructions).
  86. */
  87. .macro hyp_kimg_va reg, tmp
  88. /* Convert hyp VA -> PA. */
  89. hyp_pa \reg, \tmp
  90. /* Load kimage_voffset. */
  91. alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset
  92. movz \tmp, #0
  93. movk \tmp, #0, lsl #16
  94. movk \tmp, #0, lsl #32
  95. movk \tmp, #0, lsl #48
  96. alternative_cb_end
  97. /* Convert PA -> kimg VA. */
  98. add \reg, \reg, \tmp
  99. .endm
  100. #else
  101. #include <linux/pgtable.h>
  102. #include <asm/pgalloc.h>
  103. #include <asm/cache.h>
  104. #include <asm/cacheflush.h>
  105. #include <asm/mmu_context.h>
  106. #include <asm/kvm_host.h>
  107. #include <asm/kvm_pkvm_module.h>
  108. void kvm_update_va_mask(struct alt_instr *alt,
  109. __le32 *origptr, __le32 *updptr, int nr_inst);
  110. void kvm_compute_layout(void);
  111. void kvm_apply_hyp_relocations(void);
  112. #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
  113. static __always_inline unsigned long __kern_hyp_va(unsigned long v)
  114. {
  115. asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
  116. "ror %0, %0, #1\n"
  117. "add %0, %0, #0\n"
  118. "add %0, %0, #0, lsl 12\n"
  119. "ror %0, %0, #63\n",
  120. ARM64_ALWAYS_SYSTEM,
  121. kvm_update_va_mask)
  122. : "+r" (v));
  123. return v;
  124. }
  125. #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
  126. /*
  127. * We currently support using a VM-specified IPA size. For backward
  128. * compatibility, the default IPA size is fixed to 40bits.
  129. */
  130. #define KVM_PHYS_SHIFT (40)
  131. #define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
  132. #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
  133. #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
  134. #include <asm/kvm_pgtable.h>
  135. #include <asm/stage2_pgtable.h>
  136. int kvm_share_hyp(void *from, void *to);
  137. void kvm_unshare_hyp(void *from, void *to);
  138. int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
  139. int __create_hyp_mappings(unsigned long start, unsigned long size,
  140. unsigned long phys, enum kvm_pgtable_prot prot);
  141. int hyp_alloc_private_va_range(size_t size, unsigned long *haddr);
  142. int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
  143. void __iomem **kaddr,
  144. void __iomem **haddr);
  145. int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
  146. void **haddr);
  147. void free_hyp_pgds(void);
  148. void stage2_unmap_vm(struct kvm *kvm);
  149. int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
  150. void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
  151. int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
  152. phys_addr_t pa, unsigned long size, bool writable);
  153. int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
  154. phys_addr_t kvm_mmu_get_httbr(void);
  155. phys_addr_t kvm_get_idmap_vector(void);
  156. int kvm_mmu_init(u32 *hyp_va_bits);
  157. static inline void *__kvm_vector_slot2addr(void *base,
  158. enum arm64_hyp_spectre_vector slot)
  159. {
  160. int idx = slot - (slot != HYP_VECTOR_DIRECT);
  161. return base + (idx * SZ_2K);
  162. }
  163. struct kvm;
  164. #define kvm_flush_dcache_to_poc(a, l) do { \
  165. unsigned long __a = (unsigned long)(a); \
  166. unsigned long __l = (unsigned long)(l); \
  167. \
  168. if (__l) \
  169. dcache_clean_inval_poc(__a, __a + __l); \
  170. } while (0)
  171. static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
  172. {
  173. return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
  174. }
  175. static inline void __clean_dcache_guest_page(void *va, size_t size)
  176. {
  177. /*
  178. * With FWB, we ensure that the guest always accesses memory using
  179. * cacheable attributes, and we don't have to clean to PoC when
  180. * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
  181. * PoU is not required either in this case.
  182. */
  183. if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
  184. return;
  185. kvm_flush_dcache_to_poc(va, size);
  186. }
  187. static inline void __invalidate_icache_guest_page(void *va, size_t size)
  188. {
  189. if (icache_is_aliasing()) {
  190. /* any kind of VIPT cache */
  191. icache_inval_all_pou();
  192. } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
  193. /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
  194. icache_inval_pou((unsigned long)va, (unsigned long)va + size);
  195. }
  196. }
  197. void kvm_set_way_flush(struct kvm_vcpu *vcpu);
  198. void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
  199. static inline unsigned int kvm_get_vmid_bits(void)
  200. {
  201. int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
  202. return get_vmid_bits(reg);
  203. }
  204. /*
  205. * We are not in the kvm->srcu critical section most of the time, so we take
  206. * the SRCU read lock here. Since we copy the data from the user page, we
  207. * can immediately drop the lock again.
  208. */
  209. static inline int kvm_read_guest_lock(struct kvm *kvm,
  210. gpa_t gpa, void *data, unsigned long len)
  211. {
  212. int srcu_idx = srcu_read_lock(&kvm->srcu);
  213. int ret = kvm_read_guest(kvm, gpa, data, len);
  214. srcu_read_unlock(&kvm->srcu, srcu_idx);
  215. return ret;
  216. }
  217. static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
  218. const void *data, unsigned long len)
  219. {
  220. int srcu_idx = srcu_read_lock(&kvm->srcu);
  221. int ret = kvm_write_guest(kvm, gpa, data, len);
  222. srcu_read_unlock(&kvm->srcu, srcu_idx);
  223. return ret;
  224. }
  225. #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
  226. /*
  227. * When this is (directly or indirectly) used on the TLB invalidation
  228. * path, we rely on a previously issued DSB so that page table updates
  229. * and VMID reads are correctly ordered.
  230. */
  231. static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
  232. {
  233. struct kvm_vmid *vmid = &mmu->vmid;
  234. u64 vmid_field, baddr;
  235. u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
  236. baddr = mmu->pgd_phys;
  237. vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
  238. vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits);
  239. return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
  240. }
  241. /*
  242. * Must be called from hyp code running at EL2 with an updated VTTBR
  243. * and interrupts disabled.
  244. */
  245. static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
  246. struct kvm_arch *arch)
  247. {
  248. write_sysreg(arch->vtcr, vtcr_el2);
  249. write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
  250. /*
  251. * ARM errata 1165522 and 1530923 require the actual execution of the
  252. * above before we can switch to the EL1/EL0 translation regime used by
  253. * the guest.
  254. */
  255. asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
  256. }
  257. static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
  258. {
  259. return container_of(mmu->arch, struct kvm, arch);
  260. }
  261. #endif /* __ASSEMBLY__ */
  262. #endif /* __ARM64_KVM_MMU_H__ */