kvm_pkvm.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2020 - Google LLC
  4. * Author: Quentin Perret <[email protected]>
  5. * Author: Fuad Tabba <[email protected]>
  6. */
  7. #ifndef __ARM64_KVM_PKVM_H__
  8. #define __ARM64_KVM_PKVM_H__
  9. #include <linux/arm_ffa.h>
  10. #include <linux/memblock.h>
  11. #include <linux/scatterlist.h>
  12. #include <asm/kvm_pgtable.h>
  13. #include <asm/sysreg.h>
  14. /*
  15. * Stores the sve state for the host in protected mode.
  16. */
  17. struct kvm_host_sve_state {
  18. u64 zcr_el1;
  19. /*
  20. * Ordering is important since __sve_save_state/__sve_restore_state
  21. * relies on it.
  22. */
  23. u32 fpsr;
  24. u32 fpcr;
  25. /* Must be SVE_VQ_BYTES (128 bit) aligned. */
  26. char sve_regs[];
  27. };
  28. /* Maximum number of VMs that can co-exist under pKVM. */
  29. #define KVM_MAX_PVMS 255
  30. #define HYP_MEMBLOCK_REGIONS 128
  31. #define PVMFW_INVALID_LOAD_ADDR (-1)
  32. int pkvm_vm_ioctl_enable_cap(struct kvm *kvm,struct kvm_enable_cap *cap);
  33. int pkvm_init_host_vm(struct kvm *kvm, unsigned long type);
  34. int pkvm_create_hyp_vm(struct kvm *kvm);
  35. void pkvm_destroy_hyp_vm(struct kvm *kvm);
  36. void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa);
  37. /*
  38. * Definitions for features to be allowed or restricted for guest virtual
  39. * machines, depending on the mode KVM is running in and on the type of guest
  40. * that is running.
  41. *
  42. * The ALLOW masks represent a bitmask of feature fields that are allowed
  43. * without any restrictions as long as they are supported by the system.
  44. *
  45. * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
  46. * features that are restricted to support at most the specified feature.
  47. *
  48. * If a feature field is not present in either, than it is not supported.
  49. *
  50. * The approach taken for protected VMs is to allow features that are:
  51. * - Needed by common Linux distributions (e.g., floating point)
  52. * - Trivial to support, e.g., supporting the feature does not introduce or
  53. * require tracking of additional state in KVM
  54. * - Cannot be trapped or prevent the guest from using anyway
  55. */
  56. /*
  57. * Allow for protected VMs:
  58. * - Floating-point and Advanced SIMD
  59. * - GICv3(+) system register interface
  60. * - Data Independent Timing
  61. * - Spectre/Meltdown Mitigation
  62. */
  63. #define PVM_ID_AA64PFR0_ALLOW (\
  64. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
  65. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
  66. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC) | \
  67. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
  68. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
  69. ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
  70. )
  71. /*
  72. * Restrict to the following *unsigned* features for protected VMs:
  73. * - AArch64 guests only (no support for AArch32 guests):
  74. * AArch32 adds complexity in trap handling, emulation, condition codes,
  75. * etc...
  76. * - RAS (v1)
  77. * Supported by KVM
  78. */
  79. #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
  80. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
  81. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
  82. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
  83. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
  84. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
  85. )
  86. /*
  87. * Allow for protected VMs:
  88. * - Branch Target Identification
  89. * - Speculative Store Bypassing
  90. */
  91. #define PVM_ID_AA64PFR1_ALLOW (\
  92. ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
  93. ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
  94. )
  95. /*
  96. * Allow for protected VMs:
  97. * - Mixed-endian
  98. * - Distinction between Secure and Non-secure Memory
  99. * - Mixed-endian at EL0 only
  100. * - Non-context synchronizing exception entry and exit
  101. */
  102. #define PVM_ID_AA64MMFR0_ALLOW (\
  103. ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
  104. ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
  105. ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
  106. ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
  107. )
  108. /*
  109. * Restrict to the following *unsigned* features for protected VMs:
  110. * - 40-bit IPA
  111. * - 16-bit ASID
  112. */
  113. #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
  114. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
  115. FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
  116. )
  117. /*
  118. * Allow for protected VMs:
  119. * - Hardware translation table updates to Access flag and Dirty state
  120. * - Number of VMID bits from CPU
  121. * - Hierarchical Permission Disables
  122. * - Privileged Access Never
  123. * - SError interrupt exceptions from speculative reads
  124. * - Enhanced Translation Synchronization
  125. */
  126. #define PVM_ID_AA64MMFR1_ALLOW (\
  127. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
  128. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
  129. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
  130. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
  131. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
  132. ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
  133. )
  134. /*
  135. * Allow for protected VMs:
  136. * - Common not Private translations
  137. * - User Access Override
  138. * - IESB bit in the SCTLR_ELx registers
  139. * - Unaligned single-copy atomicity and atomic functions
  140. * - ESR_ELx.EC value on an exception by read access to feature ID space
  141. * - TTL field in address operations.
  142. * - Break-before-make sequences when changing translation block size
  143. * - E0PDx mechanism
  144. */
  145. #define PVM_ID_AA64MMFR2_ALLOW (\
  146. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
  147. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
  148. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
  149. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
  150. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
  151. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
  152. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
  153. ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
  154. )
  155. /*
  156. * No support for Scalable Vectors for protected VMs:
  157. * Requires additional support from KVM, e.g., context-switching and
  158. * trapping at EL2
  159. */
  160. #define PVM_ID_AA64ZFR0_ALLOW (0ULL)
  161. /*
  162. * No support for debug, including breakpoints, and watchpoints for protected
  163. * VMs:
  164. * The Arm architecture mandates support for at least the Armv8 debug
  165. * architecture, which would include at least 2 hardware breakpoints and
  166. * watchpoints. Providing that support to protected guests adds
  167. * considerable state and complexity. Therefore, the reserved value of 0 is
  168. * used for debug-related fields.
  169. */
  170. #define PVM_ID_AA64DFR0_ALLOW (0ULL)
  171. #define PVM_ID_AA64DFR1_ALLOW (0ULL)
  172. /*
  173. * No support for implementation defined features.
  174. */
  175. #define PVM_ID_AA64AFR0_ALLOW (0ULL)
  176. #define PVM_ID_AA64AFR1_ALLOW (0ULL)
  177. /*
  178. * No restrictions on instructions implemented in AArch64.
  179. */
  180. #define PVM_ID_AA64ISAR0_ALLOW (\
  181. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
  182. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
  183. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
  184. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
  185. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
  186. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
  187. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
  188. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
  189. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
  190. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
  191. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
  192. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
  193. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
  194. ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
  195. )
  196. #define PVM_ID_AA64ISAR1_ALLOW (\
  197. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
  198. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
  199. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
  200. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
  201. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
  202. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
  203. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
  204. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
  205. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
  206. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
  207. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
  208. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
  209. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
  210. ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
  211. )
  212. #define PVM_ID_AA64ISAR2_ALLOW (\
  213. ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
  214. ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
  215. )
  216. /*
  217. * Returns the maximum number of breakpoints supported for protected VMs.
  218. */
  219. static inline int pkvm_get_max_brps(void)
  220. {
  221. int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs),
  222. PVM_ID_AA64DFR0_ALLOW);
  223. /*
  224. * If breakpoints are supported, the maximum number is 1 + the field.
  225. * Otherwise, return 0, which is not compliant with the architecture,
  226. * but is reserved and is used here to indicate no debug support.
  227. */
  228. return num ? num + 1 : 0;
  229. }
  230. /*
  231. * Returns the maximum number of watchpoints supported for protected VMs.
  232. */
  233. static inline int pkvm_get_max_wrps(void)
  234. {
  235. int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs),
  236. PVM_ID_AA64DFR0_ALLOW);
  237. return num ? num + 1 : 0;
  238. }
  239. enum pkvm_moveable_reg_type {
  240. PKVM_MREG_MEMORY,
  241. PKVM_MREG_PROTECTED_RANGE,
  242. };
  243. struct pkvm_moveable_reg {
  244. phys_addr_t start;
  245. u64 size;
  246. enum pkvm_moveable_reg_type type;
  247. };
  248. #define PKVM_NR_MOVEABLE_REGS 512
  249. extern struct pkvm_moveable_reg kvm_nvhe_sym(pkvm_moveable_regs)[];
  250. extern unsigned int kvm_nvhe_sym(pkvm_moveable_regs_nr);
  251. extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
  252. extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
  253. extern phys_addr_t kvm_nvhe_sym(pvmfw_base);
  254. extern phys_addr_t kvm_nvhe_sym(pvmfw_size);
  255. static inline unsigned long
  256. hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
  257. {
  258. unsigned long nr_pages = reg->size >> PAGE_SHIFT;
  259. unsigned long start, end;
  260. start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
  261. end = start + nr_pages * vmemmap_entry_size;
  262. start = ALIGN_DOWN(start, PAGE_SIZE);
  263. end = ALIGN(end, PAGE_SIZE);
  264. return end - start;
  265. }
  266. static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
  267. {
  268. unsigned long res = 0, i;
  269. for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
  270. res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
  271. vmemmap_entry_size);
  272. }
  273. return res >> PAGE_SHIFT;
  274. }
  275. static inline unsigned long hyp_vm_table_pages(void)
  276. {
  277. return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
  278. }
  279. static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
  280. {
  281. unsigned long total = 0, i;
  282. /* Provision the worst case scenario */
  283. for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
  284. nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
  285. total += nr_pages;
  286. }
  287. return total;
  288. }
  289. static inline unsigned long __hyp_pgtable_moveable_regs_pages(void)
  290. {
  291. unsigned long res = 0, i;
  292. /* Cover all of moveable regions with page-granularity */
  293. for (i = 0; i < kvm_nvhe_sym(pkvm_moveable_regs_nr); i++) {
  294. struct pkvm_moveable_reg *reg = &kvm_nvhe_sym(pkvm_moveable_regs)[i];
  295. res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
  296. }
  297. return res;
  298. }
  299. #define __PKVM_PRIVATE_SZ SZ_1G
  300. static inline unsigned long hyp_s1_pgtable_pages(void)
  301. {
  302. unsigned long res;
  303. res = __hyp_pgtable_moveable_regs_pages();
  304. res += __hyp_pgtable_max_pages(__PKVM_PRIVATE_SZ >> PAGE_SHIFT);
  305. return res;
  306. }
  307. static inline unsigned long host_s2_pgtable_pages(void)
  308. {
  309. unsigned long res;
  310. /*
  311. * Include an extra 16 pages to safely upper-bound the worst case of
  312. * concatenated pgds.
  313. */
  314. res = __hyp_pgtable_moveable_regs_pages() + 16;
  315. /* Allow 1 GiB for non-moveable regions */
  316. res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
  317. return res;
  318. }
  319. #define KVM_FFA_MBOX_NR_PAGES 1
  320. /*
  321. * Maximum number of consitutents allowed in a descriptor. This number is
  322. * arbitrary, see comment below on SG_MAX_SEGMENTS in hyp_ffa_proxy_pages().
  323. */
  324. #define KVM_FFA_MAX_NR_CONSTITUENTS 4096
  325. static inline unsigned long hyp_ffa_proxy_pages(void)
  326. {
  327. size_t desc_max;
  328. /*
  329. * SG_MAX_SEGMENTS is supposed to bound the number of elements in an
  330. * sglist, which should match the number of consituents in the
  331. * corresponding FFA descriptor. As such, the EL2 buffer needs to be
  332. * large enough to hold a descriptor with SG_MAX_SEGMENTS consituents
  333. * at least. But the kernel's DMA code doesn't enforce the limit, and
  334. * it is sometimes abused, so let's allow larger descriptors and hope
  335. * for the best.
  336. */
  337. BUILD_BUG_ON(KVM_FFA_MAX_NR_CONSTITUENTS < SG_MAX_SEGMENTS);
  338. /*
  339. * The hypervisor FFA proxy needs enough memory to buffer a fragmented
  340. * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
  341. */
  342. desc_max = sizeof(struct ffa_mem_region) +
  343. sizeof(struct ffa_mem_region_attributes) +
  344. sizeof(struct ffa_composite_mem_region) +
  345. KVM_FFA_MAX_NR_CONSTITUENTS * sizeof(struct ffa_mem_region_addr_range);
  346. /* Plus a page each for the hypervisor's RX and TX mailboxes. */
  347. return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
  348. }
  349. static inline size_t pkvm_host_fp_state_size(void)
  350. {
  351. if (system_supports_sve())
  352. return size_add(sizeof(struct kvm_host_sve_state),
  353. SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
  354. else
  355. return sizeof(struct user_fpsimd_state);
  356. }
  357. #endif /* __ARM64_KVM_PKVM_H__ */