Merge tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Radim Krčmář:
 "ARM:
   - Improved guest IPA space support (32 to 52 bits)

   - RAS event delivery for 32bit

   - PMU fixes

   - Guest entry hardening

   - Various cleanups

   - Port of dirty_log_test selftest

  PPC:
   - Nested HV KVM support for radix guests on POWER9. The performance
     is much better than with PR KVM. Migration and arbitrary level of
     nesting is supported.

   - Disable nested HV-KVM on early POWER9 chips that need a particular
     hardware bug workaround

   - One VM per core mode to prevent potential data leaks

   - PCI pass-through optimization

   - merge ppc-kvm topic branch and kvm-ppc-fixes to get a better base

  s390:
   - Initial version of AP crypto virtualization via vfio-mdev

   - Improvement for vfio-ap

   - Set the host program identifier

   - Optimize page table locking

  x86:
   - Enable nested virtualization by default

   - Implement Hyper-V IPI hypercalls

   - Improve #PF and #DB handling

   - Allow guests to use Enlightened VMCS

   - Add migration selftests for VMCS and Enlightened VMCS

   - Allow coalesced PIO accesses

   - Add an option to perform nested VMCS host state consistency check
     through hardware

   - Automatic tuning of lapic_timer_advance_ns

   - Many fixes, minor improvements, and cleanups"

* tag 'kvm-4.20-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
  KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned
  Revert "kvm: x86: optimize dr6 restore"
  KVM: PPC: Optimize clearing TCEs for sparse tables
  x86/kvm/nVMX: tweak shadow fields
  selftests/kvm: add missing executables to .gitignore
  KVM: arm64: Safety check PSTATE when entering guest and handle IL
  KVM: PPC: Book3S HV: Don't use streamlined entry path on early POWER9 chips
  arm/arm64: KVM: Enable 32 bits kvm vcpu events support
  arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension()
  KVM: arm64: Fix caching of host MDCR_EL2 value
  KVM: VMX: enable nested virtualization by default
  KVM/x86: Use 32bit xor to clear registers in svm.c
  kvm: x86: Introduce KVM_CAP_EXCEPTION_PAYLOAD
  kvm: vmx: Defer setting of DR6 until #DB delivery
  kvm: x86: Defer setting of CR2 until #PF delivery
  kvm: x86: Add payload operands to kvm_multiple_exception
  kvm: x86: Add exception payload fields to kvm_vcpu_events
  kvm: x86: Add has_payload and payload to kvm_queued_exception
  KVM: Documentation: Fix omission in struct kvm_vcpu_events
  KVM: selftests: add Enlightened VMCS test
  ...
This commit is contained in:
Linus Torvalds
2018-10-25 17:57:35 -07:00
138 changed files with 12445 additions and 3248 deletions

View File

@@ -133,8 +133,7 @@
* space.
*/
#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL))
#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
/* Virtualization Translation Control Register (VTCR) bits */

View File

@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void)
kvm_call_hyp(__init_stage2_translation);
}
static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
return 0;
}
@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
/*
* On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
* so any non-zero value used as type is illegal.
*/
if (type)
return -EINVAL;
return 0;
}
#endif /* __ARM_KVM_HOST_H__ */

View File

@@ -35,16 +35,12 @@
addr; \
})
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
*/
#define KVM_MMU_CACHE_MIN_PAGES 2
#ifndef __ASSEMBLY__
#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
@@ -52,6 +48,13 @@
/* Ensure compatibility with arm64 */
#define VA_BITS 32
#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT
#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL)
#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK
#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) (addr)
static inline void kvm_set_ipa_limit(void) {}
static inline bool kvm_cpu_has_cnp(void)
{
return false;

View File

@@ -19,43 +19,53 @@
#ifndef __ARM_S2_PGTABLE_H_
#define __ARM_S2_PGTABLE_H_
#define stage2_pgd_none(pgd) pgd_none(pgd)
#define stage2_pgd_clear(pgd) pgd_clear(pgd)
#define stage2_pgd_present(pgd) pgd_present(pgd)
#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(pud) pud_free(NULL, pud)
/*
* kvm_mmu_cache_min_pages() is the number of pages required
* to install a stage-2 translation. We pre-allocate the entry
* level table at VM creation. Since we have a 3 level page-table,
* we need only two pages to add a new mapping.
*/
#define kvm_mmu_cache_min_pages(kvm) 2
#define stage2_pud_none(pud) pud_none(pud)
#define stage2_pud_clear(pud) pud_clear(pud)
#define stage2_pud_present(pud) pud_present(pud)
#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
#define stage2_pud_huge(pud) pud_huge(pud)
#define stage2_pud_none(kvm, pud) pud_none(pud)
#define stage2_pud_clear(kvm, pud) pud_clear(pud)
#define stage2_pud_present(kvm, pud) pud_present(pud)
#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
#define stage2_pud_huge(kvm, pud) pud_huge(pud)
/* Open coded p*d_addr_end that can deal with 64bit addresses */
static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
#define stage2_pud_addr_end(addr, end) (end)
#define stage2_pud_addr_end(kvm, addr, end) (end)
static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
#define stage2_pgd_index(addr) pgd_index(addr)
#define stage2_pgd_index(kvm, addr) pgd_index(addr)
#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#define stage2_pud_table_empty(pudp) false
#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define stage2_pud_table_empty(kvm, pudp) false
#endif /* __ARM_S2_PGTABLE_H_ */

View File

@@ -537,6 +537,27 @@ static inline void arm64_set_ssbd_mitigation(bool state) {}
#endif
extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
case 0: return 32;
case 1: return 36;
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5: return 48;
case 6: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
* for fields in ID registers", ARM DDI 0487C.a, any new
* value is guaranteed to be higher than what we know already.
* As a safe limit, we return the limit supported by the kernel.
*/
default: return CONFIG_ARM64_PA_BITS;
}
}
#endif /* __ASSEMBLY__ */
#endif

View File

@@ -107,6 +107,7 @@
#define VTCR_EL2_RES1 (1 << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
#define VTCR_EL2_TG0_4K TCR_TG0_4K
@@ -120,63 +121,150 @@
#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
#define VTCR_EL2_SL0_SHIFT 6
#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS_SHIFT 19
#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
/*
* We configure the Stage-2 page tables to always restrict the IPA space to be
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
* (see hyp-init.S).
* The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
*
* The magic numbers used for VTTBR_X in this patch can be found in Tables
* D4-23 and D4-25 in ARM DDI 0487A.b.
*/
#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
/*
* VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
* Interestingly, it depends on the page size.
* See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
*
* -----------------------------------------
* | Entry level | 4K | 16K/64K |
* ------------------------------------------
* | Level: 0 | 2 | - |
* ------------------------------------------
* | Level: 1 | 1 | 2 |
* ------------------------------------------
* | Level: 2 | 0 | 1 |
* ------------------------------------------
* | Level: 3 | - | 0 |
* ------------------------------------------
*
* The table roughly translates to :
*
* SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
*
* Where TGRAN_SL0_BASE is a magic number depending on the page size:
* TGRAN_SL0_BASE(4K) = 2
* TGRAN_SL0_BASE(16K) = 3
* TGRAN_SL0_BASE(64K) = 3
* provided we take care of ruling out the unsupported cases and
* Entry_Level = 4 - Number_of_levels.
*
*/
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 38
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K
#define VTCR_EL2_TGRAN_SL0_BASE 3UL
#elif defined(CONFIG_ARM64_16K_PAGES)
/*
* Stage2 translation configuration:
* 16kB pages (TG0 = 2)
* 2 level page tables (SL = 1)
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 42
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K
#define VTCR_EL2_TGRAN_SL0_BASE 3UL
#else /* 4K */
/*
* Stage2 translation configuration:
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 37
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K
#define VTCR_EL2_TGRAN_SL0_BASE 2UL
#endif
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
#define VTCR_EL2_LVLS_TO_SL0(levels) \
((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_SL0_TO_LVLS(sl0) \
((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
#define VTCR_EL2_LVLS(vtcr) \
VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
/*
* ARM VMSAv8-64 defines an algorithm for finding the translation table
* descriptors in section D4.2.8 in ARM DDI 0487C.a.
*
* The algorithm defines the expectations on the translation table
* addresses for each level, based on PAGE_SIZE, entry level
* and the translation table size (T0SZ). The variable "x" in the
* algorithm determines the alignment of a table base address at a given
* level and thus determines the alignment of VTTBR:BADDR for stage2
* page table entry level.
* Since the number of bits resolved at the entry level could vary
* depending on the T0SZ, the value of "x" is defined based on a
* Magic constant for a given PAGE_SIZE and Entry Level. The
* intermediate levels must be always aligned to the PAGE_SIZE (i.e,
* x = PAGE_SHIFT).
*
* The value of "x" for entry level is calculated as :
* x = Magic_N - T0SZ
*
* where Magic_N is an integer depending on the page size and the entry
* level of the page table as below:
*
* --------------------------------------------
* | Entry level | 4K 16K 64K |
* --------------------------------------------
* | Level: 0 (4 levels) | 28 | - | - |
* --------------------------------------------
* | Level: 1 (3 levels) | 37 | 31 | 25 |
* --------------------------------------------
* | Level: 2 (2 levels) | 46 | 42 | 38 |
* --------------------------------------------
* | Level: 3 (1 level) | - | 53 | 51 |
* --------------------------------------------
*
* We have a magic formula for the Magic_N below:
*
* Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
*
* where Number_of_levels = (4 - Level). We are only interested in the
* value for Entry_Level for the stage2 page table.
*
* So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
*
* x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
* = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
*
* Here is one way to explain the Magic Formula:
*
* x = log2(Size_of_Entry_Level_Table)
*
* Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
* PAGE_SHIFT bits in the PTE, we have :
*
* Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
* = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
* where n = number of levels, and since each pointer is 8bytes, we have:
*
* x = Bits_Entry_Level + 3
* = IPA_SHIFT - (PAGE_SHIFT - 3) * n
*
* The only constraint here is that, we have to find the number of page table
* levels for a given IPA size (which we do, see stage2_pt_levels())
*/
#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3)))
#define VTTBR_CNP_BIT (UL(1))
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
@@ -224,6 +312,13 @@
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*
* We have
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
*/
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
#define kvm_arm_exception_type \
{0, "IRQ" }, \

View File

@@ -30,6 +30,7 @@
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_EL1_SERROR 1
#define ARM_EXCEPTION_TRAP 2
#define ARM_EXCEPTION_IL 3
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
#define __hyp_this_cpu_ptr(sym) \
({ \

View File

@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
struct kvm_arch {
@@ -61,11 +61,13 @@ struct kvm_arch {
u64 vmid_gen;
u32 vmid;
/* 1-level 2nd stage table, protected by kvm->mmu_lock */
/* stage2 entry level table */
pgd_t *pgd;
/* VTTBR value associated with above pgd and vmid */
u64 vttbr;
/* VTCR_EL2 value for this VM */
u64 vtcr;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
@@ -451,13 +453,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
static inline void __cpu_init_stage2(void)
{
u32 parange = kvm_call_hyp(__init_stage2_translation);
WARN_ONCE(parange < 40,
"PARange is %d bits, unsupported configuration!", parange);
}
static inline void __cpu_init_stage2(void) {}
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -520,8 +516,12 @@ static inline int kvm_arm_have_ssbd(void)
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
void kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
#endif /* __ARM64_KVM_HOST_H__ */

View File

@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
/*
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
{
write_sysreg(kvm->arch.vtcr, vtcr_el2);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
#endif /* __ARM64_KVM_HYP_H__ */

View File

@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#include <asm/stage2_pgtable.h>
@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
@@ -517,6 +519,30 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
/*
* Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
* With v8.2 LVA extensions, 'x' should be a minimum of 6 with
* 52bit IPS.
*/
static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
{
int x = ARM64_VTTBR_X(ipa_shift, levels);
return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
}
static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
{
unsigned int x = arm64_vttbr_x(ipa_shift, levels);
return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
}
static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
{
return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
}
static inline bool kvm_cpu_has_cnp(void)
{
return system_supports_cnp();

View File

@@ -25,6 +25,9 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_IL_BIT (1 << 20)
/* AArch32-specific ptrace requests */
#define COMPAT_PTRACE_GETREGS 12
#define COMPAT_PTRACE_SETREGS 13

View File

@@ -1,42 +0,0 @@
/*
* Copyright (C) 2016 - ARM Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
#define __ARM64_S2_PGTABLE_NOPMD_H_
#include <asm/stage2_pgtable-nopud.h>
#define __S2_PGTABLE_PMD_FOLDED
#define S2_PMD_SHIFT S2_PUD_SHIFT
#define S2_PTRS_PER_PMD 1
#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
#define stage2_pud_none(pud) (0)
#define stage2_pud_present(pud) (1)
#define stage2_pud_clear(pud) do { } while (0)
#define stage2_pud_populate(pud, pmd) do { } while (0)
#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
#define stage2_pmd_free(pmd) do { } while (0)
#define stage2_pmd_addr_end(addr, end) (end)
#define stage2_pud_huge(pud) (0)
#define stage2_pmd_table_empty(pmdp) (0)
#endif

View File

@@ -1,39 +0,0 @@
/*
* Copyright (C) 2016 - ARM Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
#define __ARM64_S2_PGTABLE_NOPUD_H_
#define __S2_PGTABLE_PUD_FOLDED
#define S2_PUD_SHIFT S2_PGDIR_SHIFT
#define S2_PTRS_PER_PUD 1
#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
#define stage2_pgd_none(pgd) (0)
#define stage2_pgd_present(pgd) (1)
#define stage2_pgd_clear(pgd) do { } while (0)
#define stage2_pgd_populate(pgd, pud) do { } while (0)
#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
#define stage2_pud_free(x) do { } while (0)
#define stage2_pud_addr_end(addr, end) (end)
#define stage2_pud_table_empty(pmdp) (0)
#endif

View File

@@ -19,8 +19,16 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_
#include <linux/hugetlb.h>
#include <asm/pgtable.h>
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* and depends on the number of levels in the page table. Compute the
* PGDIR_SHIFT for a given number of levels.
*/
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
/*
* The hardware supports concatenation of up to 16 tables at stage2 entry level
* and we use the feature whenever possible.
@@ -29,112 +37,208 @@
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
* This implies, the total number of page table levels at stage2 expected
* by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
* by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
* in normal translations(e.g, stage1), since we cannot have another level in
* the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
* the range (IPA_SHIFT, IPA_SHIFT - 4).
*/
#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
/*
* With all the supported VA_BITs and 40bit guest IPA, the following condition
* is always true:
*
* STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
*
* We base our stage-2 page table walker helpers on this assumption and
* fall back to using the host version of the helper wherever possible.
* i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
* to using the host version, since it is guaranteed it is not folded at host.
*
* If the condition breaks in the future, we can rearrange the host level
* definitions and reuse them for stage2. Till then...
*/
#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
#error "Unsupported combination of guest IPA and host VA_BITS."
#endif
/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
/*
* The number of PTRS across all concatenated stage2 tables given by the
* number of bits resolved at the initial level.
* If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
* in which case, stage2_pgd_ptrs will have one entry.
*/
#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
#define pgd_ptrs_shift(ipa, pgdir_shift) \
((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
#define __s2_pgd_ptrs(ipa, lvls) \
(1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
* levels in addition to the PGD.
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
#if STAGE2_PGTABLE_LEVELS > 3
/* Stage2 PUD definitions when the level is present */
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
}
#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
#define stage2_pgd_none(pgd) pgd_none(pgd)
#define stage2_pgd_clear(pgd) pgd_clear(pgd)
#define stage2_pgd_present(pgd) pgd_present(pgd)
#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(pud) pud_free(NULL, pud)
#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
{
phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
if (kvm_stage2_has_pud(kvm))
return pgd_none(pgd);
else
return 0;
}
#endif /* STAGE2_PGTABLE_LEVELS > 3 */
static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
{
if (kvm_stage2_has_pud(kvm))
pgd_clear(pgdp);
}
static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
{
if (kvm_stage2_has_pud(kvm))
return pgd_present(pgd);
else
return 1;
}
#if STAGE2_PGTABLE_LEVELS > 2
static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
pgd_populate(NULL, pgd, pud);
}
static inline pud_t *stage2_pud_offset(struct kvm *kvm,
pgd_t *pgd, unsigned long address)
{
if (kvm_stage2_has_pud(kvm))
return pud_offset(pgd, address);
else
return (pud_t *)pgd;
}
static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
pud_free(NULL, pud);
}
static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
{
if (kvm_stage2_has_pud(kvm))
return kvm_page_empty(pudp);
else
return false;
}
static inline phys_addr_t
stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pud(kvm)) {
phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
/* Stage2 PMD definitions when the level is present */
static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
}
#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
#define stage2_pud_none(pud) pud_none(pud)
#define stage2_pud_clear(pud) pud_clear(pud)
#define stage2_pud_present(pud) pud_present(pud)
#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
#define stage2_pud_huge(pud) pud_huge(pud)
#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
{
phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
if (kvm_stage2_has_pmd(kvm))
return pud_none(pud);
else
return 0;
}
#endif /* STAGE2_PGTABLE_LEVELS > 2 */
#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
#if STAGE2_PGTABLE_LEVELS == 2
#include <asm/stage2_pgtable-nopmd.h>
#elif STAGE2_PGTABLE_LEVELS == 3
#include <asm/stage2_pgtable-nopud.h>
#endif
#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
{
phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
if (kvm_stage2_has_pmd(kvm))
pud_clear(pud);
}
static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_present(pud);
else
return 1;
}
static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
pud_populate(NULL, pud, pmd);
}
static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
pud_t *pud, unsigned long address)
{
if (kvm_stage2_has_pmd(kvm))
return pmd_offset(pud, address);
else
return (pmd_t *)pud;
}
static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
pmd_free(NULL, pmd);
}
static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_huge(pud);
else
return 0;
}
static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
{
if (kvm_stage2_has_pmd(kvm))
return kvm_page_empty(pmdp);
else
return 0;
}
static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pmd(kvm)) {
phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
{
return kvm_page_empty(ptep);
}
static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
{
return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
}
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
return (boundary - 1 < end - 1) ? boundary : end;
}

View File

@@ -391,15 +391,15 @@ int __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_CORTEX_A53;
case ARM_CPU_PART_CORTEX_A57:
return KVM_ARM_TARGET_CORTEX_A57;
};
}
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
case APM_CPU_PART_POTENZA:
return KVM_ARM_TARGET_XGENE_POTENZA;
};
}
break;
};
}
/* Return a default generic target */
return KVM_ARM_TARGET_GENERIC_V8;

View File

@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;
case ARM_EXCEPTION_IL:
/*
* We attempted an illegal exception return. Guest state must
* have been corrupted somehow. Give up.
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return -EINVAL;
default:
kvm_pr_unimpl("Unsupported exception type: %d",
exception_index);

View File

@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
# KVM code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may

View File

@@ -162,6 +162,20 @@ el1_error:
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
el2_sync:
/* Check for illegal exception return, otherwise panic */
mrs x0, spsr_el2
/* if this was something else, then panic! */
tst x0, #PSR_IL_BIT
b.eq __hyp_panic
/* Let's attempt a recovery from the illegal exception return */
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IL
b __guest_exit
el2_error:
ldp x0, x1, [sp], #16
@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector)
invalid_vect el2t_fiq_invalid // FIQ EL2t
invalid_vect el2t_error_invalid // Error EL2t
invalid_vect el2h_sync_invalid // Synchronous EL2h
valid_vect el2_sync // Synchronous EL2h
invalid_vect el2h_irq_invalid // IRQ EL2h
invalid_vect el2h_fiq_invalid // FIQ EL2h
valid_vect el2_error // Error EL2h

View File

@@ -1,90 +0,0 @@
/*
* Copyright (C) 2016 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
u32 __hyp_text __init_stage2_translation(void)
{
u64 val = VTCR_EL2_FLAGS;
u64 parange;
u64 tmp;
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
* bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
* PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
*/
parange = read_sysreg(id_aa64mmfr0_el1) & 7;
if (parange > ID_AA64MMFR0_PARANGE_MAX)
parange = ID_AA64MMFR0_PARANGE_MAX;
val |= parange << 16;
/* Compute the actual PARange... */
switch (parange) {
case 0:
parange = 32;
break;
case 1:
parange = 36;
break;
case 2:
parange = 40;
break;
case 3:
parange = 42;
break;
case 4:
parange = 44;
break;
case 5:
default:
parange = 48;
break;
}
/*
* ... and clamp it to 40 bits, unless we have some braindead
* HW that implements less than that. In all cases, we'll
* return that value for the rest of the kernel to decide what
* to do.
*/
val |= 64 - (parange > 40 ? 40 : parange);
/*
* Check the availability of Hardware Access Flag / Dirty Bit
* Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
if (tmp)
val |= VTCR_EL2_HA;
/*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
* bit in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT;
write_sysreg(val, vtcr_el2);
return parange;
}

View File

@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
static void __hyp_text __activate_vm(struct kvm *kvm)
{
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
*hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
*hpfar = PAR_TO_HPFAR(tmp);
return true;
}

View File

@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
static void __hyp_text
__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
u64 pstate = ctxt->gp_regs.regs.pstate;
u64 mode = pstate & PSR_AA32_MODE_MASK;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
* in a less privileged mode.
*
* If we are attempting a return to EL2 or higher in AArch64 state,
* program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
* we'll take an illegal exception state exception immediately after
* the ERET to the guest. Attempts to return to AArch32 Hyp will
* result in an illegal exception return because EL2's execution state
* is determined by SCR_EL3.RW.
*/
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
write_sysreg_el2(pstate, spsr);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);

View File

@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
* bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
* let's flip TGE before executing the TLB operation.
*/
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
{
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
isb();
}

View File

@@ -26,6 +26,7 @@
#include <kvm/arm_arch_timer.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
@@ -33,6 +34,9 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
/* Maximum phys_shift supported for any VM on this host */
static u32 kvm_ipa_limit;
/*
* ARMv8 Reset Values
*/
@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void)
}
/**
* kvm_arch_dev_ioctl_check_extension
* kvm_arch_vm_ioctl_check_extension
*
* We currently assume that the number of HW registers is uniform
* across all CPUs (see cpuinfo_sanity_check).
*/
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
case KVM_CAP_ARM_VM_IPA_SIZE:
r = kvm_ipa_limit;
break;
default:
r = 0;
}
@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu);
}
void kvm_set_ipa_limit(void)
{
unsigned int ipa_max, pa_max, va_max, parange;
parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
/* Clamp the IPA limit to the PA size supported by the kernel */
ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
/*
* Since our stage2 table is dependent on the stage1 page table code,
* we must always honor the following condition:
*
* Number of levels in Stage1 >= Number of levels in Stage2.
*
* So clamp the ipa limit further down to limit the number of levels.
* Since we can concatenate upto 16 tables at entry level, we could
* go upto 4bits above the maximum VA addressible with the current
* number of levels.
*/
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
va_max += 4;
if (va_max < ipa_max)
ipa_max = va_max;
/*
* If the final limit is lower than the real physical address
* limit of the CPUs, report the reason.
*/
if (ipa_max < pa_max)
pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
(va_max < pa_max) ? "Virtual" : "Physical");
WARN(ipa_max < KVM_PHYS_SHIFT,
"KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
kvm_ipa_limit = ipa_max;
kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
}
/*
* Configure the VTCR_EL2 for this VM. The VTCR value is common
* across all the physical CPUs on the system. We use system wide
* sanitised values to fill in different fields, except for Hardware
* Management of Access Flags. HA Flag is set unconditionally on
* all CPUs, as it is safe to run with or without the feature and
* the bit is RES0 on CPUs that don't support it.
*/
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
u64 vtcr = VTCR_EL2_FLAGS;
u32 parange, phys_shift;
u8 lvls;
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
return -EINVAL;
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
phys_shift < 32)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
}
parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
if (parange > ID_AA64MMFR0_PARANGE_MAX)
parange = ID_AA64MMFR0_PARANGE_MAX;
vtcr |= parange << VTCR_EL2_PS_SHIFT;
vtcr |= VTCR_EL2_T0SZ(phys_shift);
/*
* Use a minimum 2 level page table to prevent splitting
* host PMD huge pages at stage2.
*/
lvls = stage2_pgtable_levels(phys_shift);
if (lvls < 2)
lvls = 2;
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
/*
* Enable the Hardware Access Flag management, unconditionally
* on all CPUs. The features is RES0 on CPUs without the support
* and must be ignored by the CPUs.
*/
vtcr |= VTCR_EL2_HA;
/* Set the vmid bits */
vtcr |= (kvm_get_vmid_bits() == 16) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT;
kvm->arch.vtcr = vtcr;
return 0;
}

View File

@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
#else
static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
void kvmhv_save_host_pmu(void);
void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
unsigned long dabrx);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */

View File

@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG();
}
static inline unsigned int ap_to_shift(unsigned long ap)
{
int psize;
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
if (mmu_psize_defs[psize].ap == ap)
return mmu_psize_defs[psize].shift;
}
return -1;
}
static inline unsigned long get_sllp_encoding(int psize)
{
unsigned long sllp;

View File

@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);

View File

@@ -322,6 +322,11 @@
#define H_GET_24X7_DATA 0xF07C
#define H_GET_PERF_COUNTER_INFO 0xF080
/* Platform-specific hcalls used for nested HV KVM */
#define H_SET_PARTITION_TABLE 0xF800
#define H_ENTER_NESTED 0xF804
#define H_TLB_INVALIDATE 0xF808
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
#define H_SET_MODE_RESOURCE_SET_DAWR 2
@@ -461,6 +466,42 @@ struct h_cpu_char_result {
u64 behaviour;
};
/* Register state for entering a nested guest with H_ENTER_NESTED */
struct hv_guest_state {
u64 version; /* version of this structure layout */
u32 lpid;
u32 vcpu_token;
/* These registers are hypervisor privileged (at least for writing) */
u64 lpcr;
u64 pcr;
u64 amor;
u64 dpdes;
u64 hfscr;
s64 tb_offset;
u64 dawr0;
u64 dawrx0;
u64 ciabr;
u64 hdec_expiry;
u64 purr;
u64 spurr;
u64 ic;
u64 vtb;
u64 hdar;
u64 hdsisr;
u64 heir;
u64 asdr;
/* These are OS privileged but need to be set late in guest entry */
u64 srr0;
u64 srr1;
u64 sprg[4];
u64 pidr;
u64 cfar;
u64 ppr;
};
/* Latest version of hv_guest_state structure */
#define HV_GUEST_STATE_VERSION 1
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */

View File

@@ -126,7 +126,7 @@ struct iommu_table {
int it_nid;
};
#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
((tbl)->it_ops->useraddrptr((tbl), (entry), false))
#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
((tbl)->it_ops->useraddrptr((tbl), (entry), true))

View File

@@ -84,7 +84,6 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700
@@ -134,8 +133,7 @@
#define BOOK3S_IRQPRIO_EXTERNAL 14
#define BOOK3S_IRQPRIO_DECREMENTER 15
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17
#define BOOK3S_IRQPRIO_MAX 18
#define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2

View File

@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr);
extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 root,
u64 *pte_ret_p);
extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 table,
int table_index, u64 *pte_ret_p);
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
unsigned int shift, struct kvm_memory_slot *memslot,
unsigned int lpid);
extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
bool writing, unsigned long gpa,
unsigned int lpid);
extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
unsigned long gpa,
struct kvm_memory_slot *memslot,
bool writing, bool kvm_ro,
pte_t *inserted_pte, unsigned int *levelp);
extern int kvmppc_init_vm_radix(struct kvm *kvm);
extern void kvmppc_free_radix(struct kvm *kvm);
extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
unsigned int lpid);
extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void);
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
unsigned long gpa, unsigned int shift,
struct kvm_memory_slot *memslot,
unsigned int lpid);
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
#endif
long kvmhv_nested_init(void);
void kvmhv_nested_exit(void);
void kvmhv_vm_nested_init(struct kvm *kvm);
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
struct hv_guest_state *hr);
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
extern int kvm_irq_bypass;
@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
vcpu->arch.cr = val;
vcpu->arch.regs.ccr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr;
return vcpu->arch.regs.ccr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
/* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000

View File

@@ -23,6 +23,108 @@
#include <linux/string.h>
#include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h>
#include <asm/cpu_has_feature.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_PPC_PSERIES
static inline bool kvmhv_on_pseries(void)
{
return !cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool kvmhv_on_pseries(void)
{
return false;
}
#endif
/*
* Structure for a nested guest, that is, for a guest that is managed by
* one of our guests.
*/
struct kvm_nested_guest {
struct kvm *l1_host; /* L1 VM that owns this nested guest */
int l1_lpid; /* lpid L1 guest thinks this guest is */
int shadow_lpid; /* real lpid of this nested guest */
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
};
/*
* We define a nested rmap entry as a single 64-bit quantity
* 0xFFF0000000000000 12-bit lpid field
* 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
* 0x0000000000000001 1-bit single entry flag
*/
#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
#define RMAP_NESTED_LPID_SHIFT (52)
#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
/* Structure for a nested guest rmap entry */
struct rmap_nested {
struct llist_node list;
u64 rmap;
};
/*
* for_each_nest_rmap_safe - iterate over the list of nested rmap entries
* safe against removal of the list entry or NULL list
* @pos: a (struct rmap_nested *) to use as a loop cursor
* @node: pointer to the first entry
* NOTE: this can be NULL
* @rmapp: an (unsigned long *) in which to return the rmap entries on each
* iteration
* NOTE: this must point to already allocated memory
*
* The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
* rmap entry in the memslot. The list is always terminated by a "single entry"
* stored in the list element of the final entry of the llist. If there is ONLY
* a single entry then this is itself in the rmap entry of the memslot, not a
* llist head pointer.
*
* Note that the iterator below assumes that a nested rmap entry is always
* non-zero. This is true for our usage because the LPID field is always
* non-zero (zero is reserved for the host).
*
* This should be used to iterate over the list of rmap_nested entries with
* processing done on the u64 rmap value given by each iteration. This is safe
* against removal of list entries and it is always safe to call free on (pos).
*
* e.g.
* struct rmap_nested *cursor;
* struct llist_node *first;
* unsigned long rmap;
* for_each_nest_rmap_safe(cursor, first, &rmap) {
* do_something(rmap);
* free(cursor);
* }
*/
#define for_each_nest_rmap_safe(pos, node, rmapp) \
for ((pos) = llist_entry((node), typeof(*(pos)), list); \
(node) && \
(*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((u64) (node)) : ((pos)->rmap))) && \
(((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((struct llist_node *) ((pos) = NULL)) : \
(pos)->list.next)), true); \
(pos) = llist_entry((node), typeof(*(pos)), list))
struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
bool create);
void kvmhv_put_nested(struct kvm_nested_guest *gp);
int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
/* Encoding of first parameter for H_TLB_INVALIDATE */
#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
___PPC_R(r))
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
#define PPC_MIN_HPT_ORDER 18
@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
extern void kvmhv_rm_send_ipi(int cpu);
@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
{
vcpu->arch.cr = vcpu->arch.cr_tm;
vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
{
vcpu->arch.cr_tm = vcpu->arch.cr;
vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
unsigned long gpa, unsigned int level,
unsigned long mmu_seq, unsigned int lpid,
unsigned long *rmapp, struct rmap_nested **n_rmap);
extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
struct rmap_nested **n_rmap);
extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
struct kvm_memory_slot *memslot,
unsigned long gpa, unsigned long hpa,
unsigned long nbytes);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */

View File

@@ -25,6 +25,9 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
/* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8

View File

@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
vcpu->arch.cr = val;
vcpu->arch.regs.ccr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr;
return vcpu->arch.regs.ccr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)

View File

@@ -46,6 +46,7 @@
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
#else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -94,6 +95,7 @@ struct dtl_entry;
struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu;
struct kvm_nested_guest;
struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -287,10 +289,12 @@ struct kvm_arch {
u8 radix;
u8 fwnmi_enabled;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
struct dentry *radix_dentry;
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -311,6 +315,9 @@ struct kvm_arch {
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u64 l1_ptcr;
int max_nested_lpid;
struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
/* This array can grow quite large, keep it at the end */
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif
@@ -360,7 +367,9 @@ struct kvmppc_pte {
bool may_write : 1;
bool may_execute : 1;
unsigned long wimg;
unsigned long rc;
u8 page_size; /* MMU_PAGE_xxx */
u8 page_shift;
};
struct kvmppc_mmu {
@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
ulong tar;
#endif
u32 cr;
#ifdef CONFIG_PPC_BOOK3S
ulong hflags;
ulong guest_owned_ext;
@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
u8 external_oneshot; /* clear external irq after delivery */
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
u32 emul_inst;
u32 online;
/* For support of nested guests */
struct kvm_nested_guest *nested;
u32 nested_vcpu_id;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING

View File

@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
@@ -327,6 +325,7 @@ struct kvmppc_ops {
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
unsigned long flags);
void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
int (*enable_nested)(struct kvm *kvm);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */
/*
@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
/*
* Host-side operations we want to set up while running in real

View File

@@ -104,6 +104,7 @@
#define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MSGSNDP 142
#define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_TLBIE 306
#define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343

View File

@@ -415,6 +415,7 @@
#define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
#define HFSCR_FP __MASK(FSCR_FP_LG)
#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
#define SPRN_TAR 0x32f /* Target Address Register */
#define SPRN_LPCR 0x13E /* LPAR Control Register */
#define LPCR_VPM0 ASM_CONST(0x8000000000000000)
@@ -766,6 +767,7 @@
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
#define HSRR1_DENORM 0x00100000 /* Denorm exception */
#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */

View File

@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs

View File

@@ -438,7 +438,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif
OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -503,6 +503,7 @@ int main(void)
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -695,7 +696,7 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);

View File

@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63
bnelr
ld r5,CPU_SPEC_FEATURES(r4)
LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
xor r5,r5,r6
LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4)
blr

View File

@@ -75,7 +75,8 @@ kvm-hv-y += \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o
book3s_64_mmu_radix.o \
book3s_hv_nested.o
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o

View File

@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
ulong pc = kvmppc_get_pc(vcpu);
ulong lr = kvmppc_get_lr(vcpu);
if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
}
}
@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
/*
* This case (KVM_INTERRUPT_SET) should never actually arise for
* a pseries guest (because pseries guests expect their interrupt
* controllers to continue asserting an external interrupt request
* until it is acknowledged at the interrupt controller), but is
* included to avoid ABI breakage and potentially for other
* sorts of guest.
*
* There is a subtlety here: HV KVM does not test the
* external_oneshot flag in the code that synthesizes
* external interrupts for the guest just before entering
* the guest. That is OK even if userspace did do a
* KVM_INTERRUPT_SET on a pseries guest vcpu, because the
* caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
* which ends up doing a smp_send_reschedule(), which will
* pull the guest all the way out to the host, meaning that
* we will call kvmppc_core_prepare_to_enter() before entering
* the guest again, and that will handle the external_oneshot
* flag correctly.
*/
if (irq->irq == KVM_INTERRUPT_SET)
vcpu->arch.external_oneshot = 1;
if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
kvmppc_book3s_queue_irqprio(vcpu, vec);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
}
void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_DECREMENTER:
/* DEC interrupts get cleared by mtdec */
return false;
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
/* External interrupts get cleared by userspace */
case BOOK3S_IRQPRIO_EXTERNAL:
/*
* External interrupts get cleared by userspace
* except when set by the KVM_INTERRUPT ioctl with
* KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
*/
if (vcpu->arch.external_oneshot) {
vcpu->arch.external_oneshot = 0;
return true;
}
return false;
}

View File

@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void)
{
unsigned long host_lpid, rsvd_lpid;
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = mfspr(SPRN_LPID);
host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID);
rsvd_lpid = LPID_RSVD;
kvmppc_init_lpid(rsvd_lpid + 1);

File diff suppressed because it is too large Load Diff

View File

@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return ret;
}
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
return H_SUCCESS;
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS;
}
static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
{
unsigned long hpa = 0;
@@ -376,11 +410,10 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
{
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
/* it_userspace allocation might be delayed */
return H_TOO_HARD;
return H_SUCCESS;
mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
if (!mem)
@@ -401,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
long ret;
if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
return H_HARDWARE;
return H_TOO_HARD;
if (dir == DMA_NONE)
return H_SUCCESS;
@@ -449,15 +482,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
return H_TOO_HARD;
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
return H_HARDWARE;
return H_TOO_HARD;
if (mm_iommu_mapped_inc(mem))
return H_CLOSED;
return H_TOO_HARD;
ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_HARDWARE;
return H_TOO_HARD;
}
if (dir != DMA_NONE)
@@ -517,8 +550,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = srcu_read_lock(&vcpu->kvm->srcu);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -533,14 +565,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
if (ret == H_SUCCESS)
continue;
if (ret == H_TOO_HARD)
if (ret != H_SUCCESS) {
kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry);
}
}
kvmppc_tce_put(stt, entry, tce);
@@ -583,7 +611,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -599,10 +627,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
}
if (kvmppc_gpa_to_ua(vcpu->kvm,
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
for (i = 0; i < npages; ++i) {
/*
* This looks unsafe, because we validate, then regrab
* the TCE from userspace which could have been changed by
* another thread.
*
* But it actually is safe, because the relevant checks will be
* re-executed in the following code. If userspace tries to
* change this dodgily it will result in a messier failure mode
* but won't threaten the host.
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -610,14 +654,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua,
iommu_tce_direction(tce));
if (ret == H_SUCCESS)
continue;
if (ret == H_TOO_HARD)
if (ret != H_SUCCESS) {
kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry);
}
}
kvmppc_tce_put(stt, entry + i, tce);

View File

@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode,
*
@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap)
{
unsigned long gfn = gpa >> PAGE_SHIFT;
unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn);
@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) |
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap)
@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
@@ -197,7 +214,7 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
@@ -223,7 +240,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
{
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
/* it_userspace allocation might be delayed */
@@ -287,7 +304,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
{
long ret;
unsigned long hpa = 0;
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
struct mm_iommu_table_group_mem_t *mem;
if (!pua)
@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
&hpa)))
return H_HARDWARE;
return H_TOO_HARD;
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED;
return H_TOO_HARD;
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
ret = kvmppc_tce_validate(stt, tce);
ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
return ret;
dir = iommu_tce_direction(tce);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
if (ret == H_SUCCESS)
continue;
if (ret == H_TOO_HARD)
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
}
kvmppc_tce_put(stt, entry, tce);
@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
struct mm_iommu_table_group_mem_t *mem;
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
if (WARN_ON_ONCE_RM(!rmap))
return H_HARDWARE;
return H_TOO_HARD;
/*
* Synchronize with the MMU notifier callbacks in
@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ret = kvmppc_tce_validate(stt, tce);
ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
if (kvmppc_gpa_to_ua(vcpu->kvm,
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua,
iommu_tce_direction(tce));
if (ret == H_SUCCESS)
continue;
if (ret == H_TOO_HARD)
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
goto unlock_exit;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
}
}
kvmppc_tce_put(stt, entry + i, tce);

View File

@@ -36,7 +36,6 @@
#define OP_31_XOP_MTSR 210
#define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274
#define OP_31_XOP_TLBIE 306
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
#define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402
@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
vcpu->arch.tar_tm = vcpu->arch.tar;
vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.cr_tm = vcpu->arch.cr;
vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
}
@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
vcpu->arch.tar = vcpu->arch.tar_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.cr = vcpu->arch.cr_tm;
vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
}
@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
uint64_t texasr;
/* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT);
@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
tm_abort(ra_val);
/* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT);
@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
preempt_disable();
vcpu->arch.cr = (CR0_TBEGIN_FAILURE |
(vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)));
vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
(vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))

File diff suppressed because it is too large Load Diff

View File

@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
/* For a nested hypervisor, use the XICS via hcall */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
IPI_PRIORITY);
return;
}
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0;
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false);
else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
xirr = cpu_to_be32(retbuf[0]);
} else {
xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0;
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false);
else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
}
if (rc < 0)
return 1;
@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
if (xics_phys) {
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(), 0xff);
plpar_hcall_raw(H_EOI, retbuf, h_xirr);
} else if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
if (xics_phys)
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(),
IPI_PRIORITY);
} else if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
*/
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
lpcr |= ext << LPCR_MER_SH;
mtspr(SPRN_LPCR, lpcr);
isync();
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
vec = BOOK3S_INTERRUPT_EXTERNAL;
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
vec = BOOK3S_INTERRUPT_DECREMENTER;
}
}
if (vec) {
unsigned long msr, old_msr = vcpu->arch.shregs.msr;
kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
kvmppc_set_srr1(vcpu, old_msr);
kvmppc_set_pc(vcpu, vec);
msr = vcpu->arch.intr_msr;
if (MSR_TM_ACTIVE(old_msr))
msr |= MSR_TS_S;
vcpu->arch.shregs.msr = msr;
}
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
vcpu->arch.vcore->dpdes = 1;
smp_wmb();
vcpu->arch.doorbell_request = 0;
}
}

View File

@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Save host PMU registers */
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31:
bl kvmhv_save_host_pmu
/*
* Put whatever is in the decrementer into the
@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
_GLOBAL(kvmhv_save_host_pmu)
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31: blr

File diff suppressed because it is too large Load Diff

View File

@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
}
EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void)
{
@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
}
EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void)
{
@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
} else {
wait_for_tb_resync();
}
/*
* Reset tb_offset_applied so the guest exit code won't try
* to subtract the previous timebase offset from the timebase.
*/
if (local_paca->kvm_hstate.kvm_vcore)
local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
return 0;
}

View File

@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) {
@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{
/* Note: Only called on self ! */
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
}
@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
iosync();
plpar_hcall_raw(H_EOI, retbuf, hwirq);
return;
}
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)

File diff suppressed because it is too large Load Diff

View File

@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
/* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) {
@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_from_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_to_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST;

View File

@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
if (instr & (1 << 21))
vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
/* Set CR0 to 0b0010 */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
0x20000000;
return 1;
}
@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
vcpu->arch.regs.nip = vcpu->arch.tfhar;
copy_from_checkpoint(vcpu);
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
}

View File

@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
svcpu->cr = vcpu->arch.cr;
svcpu->cr = vcpu->arch.regs.ccr;
svcpu->xer = vcpu->arch.regs.xer;
svcpu->ctr = vcpu->arch.regs.ctr;
svcpu->lr = vcpu->arch.regs.link;
@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
vcpu->arch.cr = svcpu->cr;
vcpu->arch.regs.ccr = svcpu->cr;
vcpu->arch.regs.xer = svcpu->xer;
vcpu->arch.regs.ctr = svcpu->ctr;
vcpu->arch.regs.link = svcpu->lr;
@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
case BOOK3S_INTERRUPT_EXTERNAL_HV:
case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;

View File

@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
*/
if (new.out_ee) {
kvmppc_book3s_queue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
BOOK3S_INTERRUPT_EXTERNAL);
if (!change_self)
kvmppc_fast_vcpu_kick(icp->vcpu);
}
@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
u32 xirr;
/* First, remove EE from the processor */
kvmppc_book3s_dequeue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* ICP State: Accept_Interrupt
@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* We can remove EE from the current processor, the update
* transaction will set it again if needed
*/
kvmppc_book3s_dequeue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
do {
old_state = new_state = READ_ONCE(icp->state);
@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* Deassert the CPU interrupt request.
* icp_try_update will reassert it if necessary.
*/
kvmppc_book3s_dequeue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* Note that if we displace an interrupt from old_state.xisr,
@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
if (cpu_has_feature(CPU_FTR_ARCH_206) &&
cpu_has_feature(CPU_FTR_HVMODE)) {
/* Enable real mode support */
xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE;

View File

@@ -61,6 +61,69 @@
*/
#define XIVE_Q_GAP 2
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurious wakeup on the next H_CEDE, which is not an
* issue.
*/
vcpu->arch.irq_pending = 0;
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
/*
* We have a possible subtle race here: The escalation
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
* theorical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
* Fortunately, solving this is rather easy. If the
* above load setting PQ to 01 returns a previous
* value where P is set, then we know the escalation
* interrupt is somewhere on its way to the host. In
* that case we simply don't clear the xive_esc_on
* flag below. It will be eventually cleared by the
* handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again
* before re-enabling the escalation interrupt, and if
* set, we abort the cede.
*/
if (!(pq & XIVE_ESB_VAL_P))
/* Now P is 0, we can clear the flag */
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page

View File

@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
/* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc);
/*
* Cleanup the old-style bits if needed (they may have been
* set by pull or an escalation interrupts).
*/
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr);

View File

@@ -182,7 +182,7 @@
*/
PPC_LL r4, PACACURRENT(r13)
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
stw r10, VCPU_CR(r4)
PPC_STL r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4)
PPC_STL r6, VCPU_GPR(R6)(r4)
@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, THREAD_NORMSAVE(0)(r10)
PPC_STL r5, VCPU_GPR(R5)(r11)
stw r13, VCPU_CR(r11)
PPC_STL r13, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R10)(r11)
PPC_LL r3, THREAD_NORMSAVE(2)(r10)
@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, GPR9(r8)
PPC_STL r5, VCPU_GPR(R5)(r11)
stw r9, VCPU_CR(r11)
PPC_STL r9, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R8)(r11)
PPC_LL r3, GPR10(r8)
@@ -643,7 +643,7 @@ lightweight_exit:
PPC_LL r3, VCPU_LR(r4)
PPC_LL r5, VCPU_XER(r4)
PPC_LL r6, VCPU_CTR(r4)
lwz r7, VCPU_CR(r4)
PPC_LL r7, VCPU_CR(r4)
PPC_LL r8, VCPU_PC(r4)
PPC_LD(r9, VCPU_SHARED_MSR, r11)
PPC_LL r0, VCPU_GPR(R0)(r4)

View File

@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = vcpu->arch.shared->msr;
vcpu->arch.regs.ccr = vcpu->arch.cr;
if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type);

View File

@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && radix_enabled());
break;
case KVM_CAP_PPC_MMU_HASH_V3:
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300));
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
cpu_has_feature(CPU_FTR_HVMODE));
break;
case KVM_CAP_PPC_NESTED_HV:
r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
!kvmppc_hv_ops->enable_nested(NULL));
break;
#endif
case KVM_CAP_SYNC_MMU:
@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break;
}
case KVM_CAP_PPC_NESTED_HV:
r = -EINVAL;
if (!is_kvmppc_hv_enabled(kvm) ||
!kvm->arch.kvm_ops->enable_nested)
break;
r = kvm->arch.kvm_ops->enable_nested(kvm);
break;
#endif
default:
r = -EINVAL;

View File

@@ -28,17 +28,25 @@
* Save transactional state and TM-related registers.
* Called with:
* - r3 pointing to the vcpu struct
* - r4 points to the MSR with current TS bits:
* - r4 containing the MSR with current TS bits:
* (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
* This can modify all checkpointed registers, but
* restores r1, r2 before exit.
* - r5 containing a flag indicating that non-volatile registers
* must be preserved.
* If r5 == 0, this can modify all checkpointed registers, but
* restores r1, r2 before exit. If r5 != 0, this restores the
* MSR TM/FP/VEC/VSX bits to their state on entry.
*/
_GLOBAL(__kvmppc_save_tm)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -SWITCH_FRAME_SIZE(r1)
mr r9, r3
cmpdi cr7, r5, 0
/* Turn on TM. */
mfmsr r8
mr r10, r8
li r0, 1
rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
ori r8, r8, MSR_FP
@@ -51,6 +59,27 @@ _GLOBAL(__kvmppc_save_tm)
std r1, HSTATE_SCRATCH2(r13)
std r3, HSTATE_SCRATCH1(r13)
/* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
mfcr r6
SAVE_GPR(6, r1)
/* Save DSCR so we can restore it to avoid running with user value */
mfspr r7, SPRN_DSCR
SAVE_GPR(7, r1)
/*
* We are going to do treclaim., which will modify all checkpointed
* registers. Save the non-volatile registers on the stack if
* preservation of non-volatile state has been requested.
*/
beq cr7, 3f
SAVE_NVGPRS(r1)
/* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
li r0, 0
rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
SAVE_GPR(10, r1) /* final MSR value */
3:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
BEGIN_FTR_SECTION
/* Emulation of the treclaim instruction needs TEXASR before treclaim */
@@ -74,22 +103,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
std r9, PACATMSCRATCH(r13)
ld r9, HSTATE_SCRATCH1(r13)
/* Get a few more GPRs free. */
std r29, VCPU_GPRS_TM(29)(r9)
std r30, VCPU_GPRS_TM(30)(r9)
std r31, VCPU_GPRS_TM(31)(r9)
/* Save away PPR and DSCR soon so don't run with user values. */
mfspr r31, SPRN_PPR
/* Save away PPR soon so we don't run with user value. */
std r0, VCPU_GPRS_TM(0)(r9)
mfspr r0, SPRN_PPR
HMT_MEDIUM
mfspr r30, SPRN_DSCR
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
ld r29, HSTATE_DSCR(r13)
mtspr SPRN_DSCR, r29
#endif
/* Save all but r9, r13 & r29-r31 */
reg = 0
/* Reload stack pointer. */
std r1, VCPU_GPRS_TM(1)(r9)
ld r1, HSTATE_SCRATCH2(r13)
/* Set MSR RI now we have r1 and r13 back. */
std r2, VCPU_GPRS_TM(2)(r9)
li r2, MSR_RI
mtmsrd r2, 1
/* Reload TOC pointer. */
ld r2, PACATOC(r13)
/* Save all but r0-r2, r9 & r13 */
reg = 3
.rept 29
.if (reg != 9) && (reg != 13)
std reg, VCPU_GPRS_TM(reg)(r9)
@@ -103,33 +135,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
ld r4, PACATMSCRATCH(r13)
std r4, VCPU_GPRS_TM(9)(r9)
/* Reload stack pointer and TOC. */
ld r1, HSTATE_SCRATCH2(r13)
ld r2, PACATOC(r13)
/* Set MSR RI now we have r1 and r13 back. */
li r5, MSR_RI
mtmsrd r5, 1
/* Save away checkpinted SPRs. */
std r31, VCPU_PPR_TM(r9)
std r30, VCPU_DSCR_TM(r9)
mflr r5
/* Restore host DSCR and CR values, after saving guest values */
mfcr r6
mfspr r7, SPRN_DSCR
stw r6, VCPU_CR_TM(r9)
std r7, VCPU_DSCR_TM(r9)
REST_GPR(6, r1)
REST_GPR(7, r1)
mtcr r6
mtspr SPRN_DSCR, r7
/* Save away checkpointed SPRs. */
std r0, VCPU_PPR_TM(r9)
mflr r5
mfctr r7
mfspr r8, SPRN_AMR
mfspr r10, SPRN_TAR
mfxer r11
std r5, VCPU_LR_TM(r9)
stw r6, VCPU_CR_TM(r9)
std r7, VCPU_CTR_TM(r9)
std r8, VCPU_AMR_TM(r9)
std r10, VCPU_TAR_TM(r9)
std r11, VCPU_XER_TM(r9)
/* Restore r12 as trap number. */
lwz r12, VCPU_TRAP(r9)
/* Save FP/VSX. */
addi r3, r9, VCPU_FPRS_TM
bl store_fp_state
@@ -137,6 +165,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
bl store_vr_state
mfspr r6, SPRN_VRSAVE
stw r6, VCPU_VRSAVE_TM(r9)
/* Restore non-volatile registers if requested to */
beq cr7, 1f
REST_NVGPRS(r1)
REST_GPR(10, r1)
1:
/*
* We need to save these SPRs after the treclaim so that the software
@@ -146,12 +179,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
*/
mfspr r7, SPRN_TEXASR
std r7, VCPU_TEXASR(r9)
11:
mfspr r5, SPRN_TFHAR
mfspr r6, SPRN_TFIAR
std r5, VCPU_TFHAR(r9)
std r6, VCPU_TFIAR(r9)
/* Restore MSR state if requested */
beq cr7, 2f
mtmsrd r10, 0
2:
addi r1, r1, SWITCH_FRAME_SIZE
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
@@ -161,49 +198,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
* be invoked from C function by PR KVM only.
*/
_GLOBAL(_kvmppc_save_tm_pr)
mflr r5
std r5, PPC_LR_STKOFF(r1)
stdu r1, -SWITCH_FRAME_SIZE(r1)
SAVE_NVGPRS(r1)
/* save MSR since TM/math bits might be impacted
* by __kvmppc_save_tm().
*/
mfmsr r5
SAVE_GPR(5, r1)
/* also save DSCR/CR/TAR so that it can be recovered later */
mfspr r6, SPRN_DSCR
SAVE_GPR(6, r1)
mfcr r7
stw r7, _CCR(r1)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -PPC_MIN_STKFRM(r1)
mfspr r8, SPRN_TAR
SAVE_GPR(8, r1)
std r8, PPC_MIN_STKFRM-8(r1)
li r5, 1 /* preserve non-volatile registers */
bl __kvmppc_save_tm
REST_GPR(8, r1)
ld r8, PPC_MIN_STKFRM-8(r1)
mtspr SPRN_TAR, r8
ld r7, _CCR(r1)
mtcr r7
REST_GPR(6, r1)
mtspr SPRN_DSCR, r6
/* need preserve current MSR's MSR_TS bits */
REST_GPR(5, r1)
mfmsr r6
rldicl r6, r6, 64 - MSR_TS_S_LG, 62
rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
mtmsrd r5
REST_NVGPRS(r1)
addi r1, r1, SWITCH_FRAME_SIZE
ld r5, PPC_LR_STKOFF(r1)
mtlr r5
addi r1, r1, PPC_MIN_STKFRM
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
@@ -215,15 +225,21 @@ EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
* - r4 is the guest MSR with desired TS bits:
* For HV KVM, it is VCPU_MSR
* For PR KVM, it is provided by caller
* This potentially modifies all checkpointed registers.
* It restores r1, r2 from the PACA.
* - r5 containing a flag indicating that non-volatile registers
* must be preserved.
* If r5 == 0, this potentially modifies all checkpointed registers, but
* restores r1, r2 from the PACA before exit.
* If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
*/
_GLOBAL(__kvmppc_restore_tm)
mflr r0
std r0, PPC_LR_STKOFF(r1)
cmpdi cr7, r5, 0
/* Turn on TM/FP/VSX/VMX so we can restore them. */
mfmsr r5
mr r10, r5
li r6, MSR_TM >> 32
sldi r6, r6, 32
or r5, r5, r6
@@ -244,8 +260,7 @@ _GLOBAL(__kvmppc_restore_tm)
mr r5, r4
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
beqlr /* TM not active in guest */
std r1, HSTATE_SCRATCH2(r13)
beq 9f /* TM not active in guest */
/* Make sure the failure summary is set, otherwise we'll program check
* when we trechkpt. It's possible that this might have been not set
@@ -255,6 +270,26 @@ _GLOBAL(__kvmppc_restore_tm)
oris r7, r7, (TEXASR_FS)@h
mtspr SPRN_TEXASR, r7
/*
* Make a stack frame and save non-volatile registers if requested.
*/
stdu r1, -SWITCH_FRAME_SIZE(r1)
std r1, HSTATE_SCRATCH2(r13)
mfcr r6
mfspr r7, SPRN_DSCR
SAVE_GPR(2, r1)
SAVE_GPR(6, r1)
SAVE_GPR(7, r1)
beq cr7, 4f
SAVE_NVGPRS(r1)
/* MSR[TS] will be 1 (suspended) once we do trechkpt */
li r0, 1
rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
SAVE_GPR(10, r1) /* final MSR value */
4:
/*
* We need to load up the checkpointed state for the guest.
* We need to do this early as it will blow away any GPRs, VSRs and
@@ -291,8 +326,6 @@ _GLOBAL(__kvmppc_restore_tm)
ld r29, VCPU_DSCR_TM(r3)
ld r30, VCPU_PPR_TM(r3)
std r2, PACATMSCRATCH(r13) /* Save TOC */
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
li r5, 0
mtmsrd r5, 1
@@ -318,18 +351,31 @@ _GLOBAL(__kvmppc_restore_tm)
/* Now let's get back the state we need. */
HMT_MEDIUM
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
ld r29, HSTATE_DSCR(r13)
mtspr SPRN_DSCR, r29
#endif
ld r1, HSTATE_SCRATCH2(r13)
ld r2, PACATMSCRATCH(r13)
REST_GPR(7, r1)
mtspr SPRN_DSCR, r7
/* Set the MSR RI since we have our registers back. */
li r5, MSR_RI
mtmsrd r5, 1
/* Restore TOC pointer and CR */
REST_GPR(2, r1)
REST_GPR(6, r1)
mtcr r6
/* Restore non-volatile registers if requested to. */
beq cr7, 5f
REST_GPR(10, r1)
REST_NVGPRS(r1)
5: addi r1, r1, SWITCH_FRAME_SIZE
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
9: /* Restore MSR bits if requested */
beqlr cr7
mtmsrd r10, 0
blr
/*
@@ -337,47 +383,23 @@ _GLOBAL(__kvmppc_restore_tm)
* can be invoked from C function by PR KVM only.
*/
_GLOBAL(_kvmppc_restore_tm_pr)
mflr r5
std r5, PPC_LR_STKOFF(r1)
stdu r1, -SWITCH_FRAME_SIZE(r1)
SAVE_NVGPRS(r1)
/* save MSR to avoid TM/math bits change */
mfmsr r5
SAVE_GPR(5, r1)
/* also save DSCR/CR/TAR so that it can be recovered later */
mfspr r6, SPRN_DSCR
SAVE_GPR(6, r1)
mfcr r7
stw r7, _CCR(r1)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -PPC_MIN_STKFRM(r1)
/* save TAR so that it can be recovered later */
mfspr r8, SPRN_TAR
SAVE_GPR(8, r1)
std r8, PPC_MIN_STKFRM-8(r1)
li r5, 1
bl __kvmppc_restore_tm
REST_GPR(8, r1)
ld r8, PPC_MIN_STKFRM-8(r1)
mtspr SPRN_TAR, r8
ld r7, _CCR(r1)
mtcr r7
REST_GPR(6, r1)
mtspr SPRN_DSCR, r6
/* need preserve current MSR's MSR_TS bits */
REST_GPR(5, r1)
mfmsr r6
rldicl r6, r6, 64 - MSR_TS_S_LG, 62
rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
mtmsrd r5
REST_NVGPRS(r1)
addi r1, r1, SWITCH_FRAME_SIZE
ld r5, PPC_LR_STKOFF(r1)
mtlr r5
addi r1, r1, PPC_MIN_STKFRM
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);

View File

@@ -14,7 +14,6 @@
{0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \
{0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \

View File

@@ -830,6 +830,15 @@ void radix__flush_pwc_lpid(unsigned int lpid)
}
EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
void radix__flush_tlb_lpid(unsigned int lpid)
{
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/

View File

@@ -783,6 +783,17 @@ config VFIO_CCW
To compile this driver as a module, choose M here: the
module will be called vfio_ccw.
config VFIO_AP
def_tristate n
prompt "VFIO support for AP devices"
depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
help
This driver grants access to Adjunct Processor (AP) devices
via the VFIO mediated device interface.
To compile this driver as a module, choose M here: the module
will be called vfio_ap.
endmenu
menu "Dump support"

View File

@@ -44,6 +44,7 @@
#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
#define KVM_REQ_VSIE_RESTART KVM_ARCH_REQ(5)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -186,6 +187,7 @@ struct kvm_s390_sie_block {
#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
#define ECA_APIE 0x00000008
#define ECA_SII 0x00000001
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
@@ -237,7 +239,11 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
__u8 reservedb0[20]; /* 0x00b0 */
__u8 reservedb0[8]; /* 0x00b0 */
#define HPID_KVM 0x4
#define HPID_VSIE 0x5
__u8 hpid; /* 0x00b8 */
__u8 reservedb9[11]; /* 0x00b9 */
__u16 extcpuaddr; /* 0x00c4 */
__u16 eic; /* 0x00c6 */
__u32 reservedc8; /* 0x00c8 */
@@ -255,6 +261,8 @@ struct kvm_s390_sie_block {
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[12]; /* 0x00f0 */
#define CRYCB_FORMAT_MASK 0x00000003
#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
@@ -715,6 +723,7 @@ struct kvm_s390_crypto {
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
__u8 apie;
};
#define APCB0_MASK_SIZE 1
@@ -855,6 +864,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;

View File

@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4
#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5
/* kvm attributes for migration mode */
#define KVM_S390_VM_MIGRATION_STOP 0

View File

@@ -40,6 +40,7 @@
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -844,20 +845,24 @@ void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
kvm_s390_vcpu_block_all(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_s390_vcpu_crypto_setup(vcpu);
/* recreate the shadow crycb by leaving the VSIE handler */
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
}
kvm_s390_vcpu_unblock_all(kvm);
}
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
{
if (!test_kvm_facility(kvm, 76))
return -EINVAL;
mutex_lock(&kvm->lock);
switch (attr->attr) {
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
if (!test_kvm_facility(kvm, 76)) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
get_random_bytes(
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
@@ -865,6 +870,10 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
if (!test_kvm_facility(kvm, 76)) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
get_random_bytes(
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
@@ -872,17 +881,39 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
if (!test_kvm_facility(kvm, 76)) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
kvm->arch.crypto.aes_kw = 0;
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
if (!test_kvm_facility(kvm, 76)) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
kvm->arch.crypto.dea_kw = 0;
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
if (!ap_instructions_available()) {
mutex_unlock(&kvm->lock);
return -EOPNOTSUPP;
}
kvm->arch.crypto.apie = 1;
break;
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
if (!ap_instructions_available()) {
mutex_unlock(&kvm->lock);
return -EOPNOTSUPP;
}
kvm->arch.crypto.apie = 0;
break;
default:
mutex_unlock(&kvm->lock);
return -ENXIO;
@@ -1491,6 +1522,10 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
ret = 0;
break;
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
ret = ap_instructions_available() ? 0 : -ENXIO;
break;
default:
ret = -ENXIO;
break;
@@ -1992,55 +2027,101 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
static int kvm_s390_query_ap_config(u8 *config)
{
u32 fcn_code = 0x04000000UL;
u32 cc = 0;
memset(config, 0, 128);
asm volatile(
"lgr 0,%1\n"
"lgr 2,%2\n"
".long 0xb2af0000\n" /* PQAP(QCI) */
"0: ipm %0\n"
"srl %0,28\n"
"1:\n"
EX_TABLE(0b, 1b)
: "+r" (cc)
: "r" (fcn_code), "r" (config)
: "cc", "0", "2", "memory"
);
return cc;
}
static int kvm_s390_apxa_installed(void)
{
u8 config[128];
int cc;
struct ap_config_info info;
if (test_facility(12)) {
cc = kvm_s390_query_ap_config(config);
if (cc)
pr_err("PQAP(QCI) failed with cc=%d", cc);
else
return config[0] & 0x40;
if (ap_instructions_available()) {
if (ap_qci(&info) == 0)
return info.apxa;
}
return 0;
}
/*
* The format of the crypto control block (CRYCB) is specified in the 3 low
* order bits of the CRYCB designation (CRYCBD) field as follows:
* Format 0: Neither the message security assist extension 3 (MSAX3) nor the
* AP extended addressing (APXA) facility are installed.
* Format 1: The APXA facility is not installed but the MSAX3 facility is.
* Format 2: Both the APXA and MSAX3 facilities are installed
*/
static void kvm_s390_set_crycb_format(struct kvm *kvm)
{
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
/* Clear the CRYCB format bits - i.e., set format 0 by default */
kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
/* Check whether MSAX3 is installed */
if (!test_kvm_facility(kvm, 76))
return;
if (kvm_s390_apxa_installed())
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
else
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm)
{
struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
mutex_lock(&kvm->lock);
kvm_s390_vcpu_block_all(kvm);
switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
case CRYCB_FORMAT2: /* APCB1 use 256 bits */
memcpy(crycb->apcb1.apm, apm, 32);
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
apm[0], apm[1], apm[2], apm[3]);
memcpy(crycb->apcb1.aqm, aqm, 32);
VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
aqm[0], aqm[1], aqm[2], aqm[3]);
memcpy(crycb->apcb1.adm, adm, 32);
VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
adm[0], adm[1], adm[2], adm[3]);
break;
case CRYCB_FORMAT1:
case CRYCB_FORMAT0: /* Fall through both use APCB0 */
memcpy(crycb->apcb0.apm, apm, 8);
memcpy(crycb->apcb0.aqm, aqm, 2);
memcpy(crycb->apcb0.adm, adm, 2);
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
apm[0], *((unsigned short *)aqm),
*((unsigned short *)adm));
break;
default: /* Can not happen */
break;
}
/* recreate the shadow crycb for each vcpu */
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
kvm_s390_vcpu_unblock_all(kvm);
mutex_unlock(&kvm->lock);
}
EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
void kvm_arch_crypto_clear_masks(struct kvm *kvm)
{
mutex_lock(&kvm->lock);
kvm_s390_vcpu_block_all(kvm);
memset(&kvm->arch.crypto.crycb->apcb0, 0,
sizeof(kvm->arch.crypto.crycb->apcb0));
memset(&kvm->arch.crypto.crycb->apcb1, 0,
sizeof(kvm->arch.crypto.crycb->apcb1));
VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
/* recreate the shadow crycb for each vcpu */
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
kvm_s390_vcpu_unblock_all(kvm);
mutex_unlock(&kvm->lock);
}
EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
static u64 kvm_s390_get_initial_cpuid(void)
{
struct cpuid cpuid;
@@ -2052,12 +2133,12 @@ static u64 kvm_s390_get_initial_cpuid(void)
static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
return;
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
if (!test_kvm_facility(kvm, 76))
return;
/* Enable AES/DEA protected key functions by default */
kvm->arch.crypto.aes_kw = 1;
kvm->arch.crypto.dea_kw = 1;
@@ -2583,17 +2664,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
if (!test_kvm_facility(vcpu->kvm, 76))
/*
* If the AP instructions are not being interpreted and the MSAX3
* facility is not configured for the guest, there is nothing to set up.
*/
if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
return;
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
/* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
}
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
@@ -2685,6 +2774,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
vcpu->arch.sie_block->hpid = HPID_KVM;
kvm_s390_vcpu_crypto_setup(vcpu);
return rc;
@@ -2768,18 +2859,25 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
exit_sie(vcpu);
}
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
{
return atomic_read(&vcpu->arch.sie_block->prog20) &
(PROG_BLOCK_SIE | PROG_REQUEST);
}
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
{
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
}
/*
* Kick a guest cpu out of SIE and wait until SIE is not running.
* Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
kvm_s390_vsie_kick(vcpu);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -3196,6 +3294,8 @@ retry:
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
return 0;
}

View File

@@ -290,6 +290,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);

View File

@@ -135,14 +135,148 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
atomic_set(&scb_s->cpuflags, newflags);
return 0;
}
/* Copy to APCB FORMAT1 from APCB FORMAT0 */
static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
{
struct kvm_s390_apcb0 tmp;
/*
if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
return -EFAULT;
apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
return 0;
}
/**
* setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
* @vcpu: pointer to the virtual CPU
* @apcb_s: pointer to start of apcb in the shadow crycb
* @apcb_o: pointer to start of original apcb in the guest2
* @apcb_h: pointer to start of apcb in the guest1
*
* Returns 0 and -EFAULT on error reading guest apcb
*/
static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
unsigned long apcb_o, unsigned long *apcb_h)
{
if (read_guest_real(vcpu, apcb_o, apcb_s,
sizeof(struct kvm_s390_apcb0)))
return -EFAULT;
bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
return 0;
}
/**
* setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
* @vcpu: pointer to the virtual CPU
* @apcb_s: pointer to start of apcb in the shadow crycb
* @apcb_o: pointer to start of original guest apcb
* @apcb_h: pointer to start of apcb in the host
*
* Returns 0 and -EFAULT on error reading guest apcb
*/
static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
unsigned long apcb_o,
unsigned long *apcb_h)
{
if (read_guest_real(vcpu, apcb_o, apcb_s,
sizeof(struct kvm_s390_apcb1)))
return -EFAULT;
bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
return 0;
}
/**
* setup_apcb - Create a shadow copy of the apcb.
* @vcpu: pointer to the virtual CPU
* @crycb_s: pointer to shadow crycb
* @crycb_o: pointer to original guest crycb
* @crycb_h: pointer to the host crycb
* @fmt_o: format of the original guest crycb.
* @fmt_h: format of the host crycb.
*
* Checks the compatibility between the guest and host crycb and calls the
* appropriate copy function.
*
* Return 0 or an error number if the guest and host crycb are incompatible.
*/
static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
const u32 crycb_o,
struct kvm_s390_crypto_cb *crycb_h,
int fmt_o, int fmt_h)
{
struct kvm_s390_crypto_cb *crycb;
crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
switch (fmt_o) {
case CRYCB_FORMAT2:
if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
return -EACCES;
if (fmt_h != CRYCB_FORMAT2)
return -EINVAL;
return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
(unsigned long) &crycb->apcb1,
(unsigned long *)&crycb_h->apcb1);
case CRYCB_FORMAT1:
switch (fmt_h) {
case CRYCB_FORMAT2:
return setup_apcb10(vcpu, &crycb_s->apcb1,
(unsigned long) &crycb->apcb0,
&crycb_h->apcb1);
case CRYCB_FORMAT1:
return setup_apcb00(vcpu,
(unsigned long *) &crycb_s->apcb0,
(unsigned long) &crycb->apcb0,
(unsigned long *) &crycb_h->apcb0);
}
break;
case CRYCB_FORMAT0:
if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
return -EACCES;
switch (fmt_h) {
case CRYCB_FORMAT2:
return setup_apcb10(vcpu, &crycb_s->apcb1,
(unsigned long) &crycb->apcb0,
&crycb_h->apcb1);
case CRYCB_FORMAT1:
case CRYCB_FORMAT0:
return setup_apcb00(vcpu,
(unsigned long *) &crycb_s->apcb0,
(unsigned long) &crycb->apcb0,
(unsigned long *) &crycb_h->apcb0);
}
}
return -EINVAL;
}
/**
* shadow_crycb - Create a shadow copy of the crycb block
* @vcpu: a pointer to the virtual CPU
* @vsie_page: a pointer to internal date used for the vSIE
*
* Create a shadow copy of the crycb block and setup key wrapping, if
* requested for guest 3 and enabled for guest 2.
*
* We only accept format-1 (no AP in g2), but convert it into format-2
* We accept format-1 or format-2, but we convert format-1 into format-2
* in the shadow CRYCB.
* Using format-2 enables the firmware to choose the right format when
* scheduling the SIE.
* There is nothing to do for format-0.
*
* This function centralize the issuing of set_validity_icpt() for all
* the subfunctions working on the crycb.
*
* Returns: - 0 if shadowed or nothing to do
* - > 0 if control has to be given to guest 2
*/
@@ -154,23 +288,40 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
int apie_h;
int key_msk = test_kvm_facility(vcpu->kvm, 76);
int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
int ret = 0;
scb_s->crycbd = 0;
if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
return 0;
/* format-1 is supported with message-security-assist extension 3 */
if (!test_kvm_facility(vcpu->kvm, 76))
apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
if (!apie_h && !key_msk)
return 0;
if (!crycb_addr)
return set_validity_icpt(scb_s, 0x0039U);
if (fmt_o == CRYCB_FORMAT1)
if ((crycb_addr & PAGE_MASK) !=
((crycb_addr + 128) & PAGE_MASK))
return set_validity_icpt(scb_s, 0x003CU);
if (apie_h && (scb_o->eca & ECA_APIE)) {
ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
vcpu->kvm->arch.crypto.crycb,
fmt_o, fmt_h);
if (ret)
goto end;
scb_s->eca |= scb_o->eca & ECA_APIE;
}
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
if (!ecb3_flags)
return 0;
if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
return set_validity_icpt(scb_s, 0x003CU);
else if (!crycb_addr)
return set_validity_icpt(scb_s, 0x0039U);
goto end;
/* copy only the wrapping keys */
if (read_guest_real(vcpu, crycb_addr + 72,
@@ -178,8 +329,6 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;
scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
CRYCB_FORMAT2;
/* xor both blocks in one run */
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -187,6 +336,16 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
/* as 56%8 == 0, bitmap_xor won't overwrite any data */
bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
end:
switch (ret) {
case -EINVAL:
return set_validity_icpt(scb_s, 0x0020U);
case -EFAULT:
return set_validity_icpt(scb_s, 0x0035U);
case -EACCES:
return set_validity_icpt(scb_s, 0x003CU);
}
scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
return 0;
}
@@ -383,6 +542,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 156))
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
scb_s->hpid = HPID_VSIE;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
@@ -830,7 +991,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
int guest_bp_isolation;
int rc;
int rc = 0;
handle_last_fault(vcpu, vsie_page);
@@ -858,7 +1019,18 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
guest_enter_irqoff();
local_irq_enable();
rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
/*
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
* and VCPU requests also hinder the vSIE from running and lead
* to an immediate exit. kvm_s390_vsie_kick() has to be used to
* also kick the vSIE.
*/
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
local_irq_disable();
guest_exit_irqoff();
@@ -1005,7 +1177,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc == -EAGAIN)
rc = 0;
if (rc || scb_s->icptcode || signal_pending(current) ||
kvm_s390_vcpu_has_irq(vcpu, 0))
kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu))
break;
}
@@ -1122,7 +1295,8 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu))
return 0;
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);

View File

@@ -907,10 +907,16 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
spin_lock(&gmap->guest_table_lock);
pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
if (!pmdp)
return NULL;
if (!pmdp || pmd_none(*pmdp)) {
/* without huge pages, there is no need to take the table lock */
if (!gmap->mm->context.allow_gmap_hpage_1m)
return pmd_none(*pmdp) ? NULL : pmdp;
spin_lock(&gmap->guest_table_lock);
if (pmd_none(*pmdp)) {
spin_unlock(&gmap->guest_table_lock);
return NULL;
}

View File

@@ -106,6 +106,8 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){
12, /* AP Query Configuration Information */
15, /* AP Facilities Test */
156, /* etoken facility */
-1 /* END */
}

View File

@@ -102,7 +102,15 @@
#define UNMAPPED_GVA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
#define KVM_NR_PAGE_SIZES 3
enum {
PT_PAGE_TABLE_LEVEL = 1,
PT_DIRECTORY_LEVEL = 2,
PT_PDPE_LEVEL = 3,
/* set max level to the biggest one */
PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
};
#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \
PT_PAGE_TABLE_LEVEL + 1)
#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
@@ -177,6 +185,7 @@ enum {
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
#define DR6_RTM (1 << 16)
#define DR6_FIXED_1 0xfffe0ff0
#define DR6_INIT 0xffff0ff0
@@ -247,7 +256,7 @@ struct kvm_mmu_memory_cache {
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
*/
union kvm_mmu_page_role {
unsigned word;
u32 word;
struct {
unsigned level:4;
unsigned cr4_pae:1;
@@ -273,6 +282,34 @@ union kvm_mmu_page_role {
};
};
union kvm_mmu_extended_role {
/*
* This structure complements kvm_mmu_page_role caching everything needed for
* MMU configuration. If nothing in both these structures changed, MMU
* re-configuration can be skipped. @valid bit is set on first usage so we don't
* treat all-zero structure as valid data.
*/
u32 word;
struct {
unsigned int valid:1;
unsigned int execonly:1;
unsigned int cr0_pg:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
};
};
union kvm_mmu_role {
u64 as_u64;
struct {
union kvm_mmu_page_role base;
union kvm_mmu_extended_role ext;
};
};
struct kvm_rmap_head {
unsigned long val;
};
@@ -280,18 +317,18 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
bool unsync;
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
gfn_t gfn;
union kvm_mmu_page_role role;
gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
@@ -360,7 +397,7 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
union kvm_mmu_page_role base_role;
union kvm_mmu_role mmu_role;
u8 root_level;
u8 shadow_root_level;
u8 ept_ad;
@@ -490,7 +527,7 @@ struct kvm_vcpu_hv {
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
cpumask_t tlb_lush;
cpumask_t tlb_flush;
};
struct kvm_vcpu_arch {
@@ -534,7 +571,13 @@ struct kvm_vcpu_arch {
* the paging mode of the l1 guest. This context is always used to
* handle faults.
*/
struct kvm_mmu mmu;
struct kvm_mmu *mmu;
/* Non-nested MMU for L1 */
struct kvm_mmu root_mmu;
/* L1 MMU when running nested */
struct kvm_mmu guest_mmu;
/*
* Paging state of an L2 guest (used for nested npt)
@@ -585,6 +628,8 @@ struct kvm_vcpu_arch {
bool has_error_code;
u8 nr;
u32 error_code;
unsigned long payload;
bool has_payload;
u8 nested_apf;
} exception;
@@ -781,6 +826,9 @@ struct kvm_hv {
u64 hv_reenlightenment_control;
u64 hv_tsc_emulation_control;
u64 hv_tsc_emulation_status;
/* How many vCPUs have VP index != vCPU index */
atomic_t num_mismatched_vp_indexes;
};
enum kvm_irqchip_mode {
@@ -871,6 +919,7 @@ struct kvm_arch {
bool x2apic_broadcast_quirk_disabled;
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
};
struct kvm_vm_stat {
@@ -1133,6 +1182,9 @@ struct kvm_x86_ops {
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
};
struct kvm_arch_async_pf {
@@ -1170,7 +1222,6 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
@@ -1324,7 +1375,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,

View File

@@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void)
*/
static inline void cpu_vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
asm volatile ("vmxoff");
cr4_clear_bits(X86_CR4_VMXE);
}

View File

@@ -503,19 +503,6 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
struct vmx_msr_entry {
u32 index;
u32 reserved;

View File

@@ -288,6 +288,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -299,7 +300,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 has_error_code;
__u8 pad;
__u8 pending;
__u32 error_code;
} exception;
struct {
@@ -322,7 +323,9 @@ struct kvm_vcpu_events {
__u8 smm_inside_nmi;
__u8 latched_init;
} smi;
__u32 reserved[9];
__u8 reserved[27];
__u8 exception_has_payload;
__u64 exception_payload;
};
/* for KVM_GET/SET_DEBUGREGS */
@@ -381,6 +384,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002

View File

@@ -36,6 +36,8 @@
#include "trace.h"
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
{
return atomic64_read(&synic->sint[sint]);
@@ -132,8 +134,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu *vcpu = NULL;
int i;
if (vpidx < KVM_MAX_VCPUS)
vcpu = kvm_get_vcpu(kvm, vpidx);
if (vpidx >= KVM_MAX_VCPUS)
return NULL;
vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -689,6 +693,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
stimer_cleanup(&hv_vcpu->stimer[i]);
}
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
return false;
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
}
EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
struct hv_vp_assist_page *assist_page)
{
if (!kvm_hv_assist_page_enabled(vcpu))
return false;
return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
assist_page, sizeof(*assist_page));
}
EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct hv_message *msg = &stimer->msg;
@@ -1040,21 +1062,41 @@ static u64 current_task_runtime_100ns(void)
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
{
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX:
if (!host)
case HV_X64_MSR_VP_INDEX: {
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
int vcpu_idx = kvm_vcpu_get_idx(vcpu);
u32 new_vp_index = (u32)data;
if (!host || new_vp_index >= KVM_MAX_VCPUS)
return 1;
hv->vp_index = (u32)data;
if (new_vp_index == hv_vcpu->vp_index)
return 0;
/*
* The VP index is initialized to vcpu_index by
* kvm_hv_vcpu_postcreate so they initially match. Now the
* VP index is changing, adjust num_mismatched_vp_indexes if
* it now matches or no longer matches vcpu_idx.
*/
if (hv_vcpu->vp_index == vcpu_idx)
atomic_inc(&hv->num_mismatched_vp_indexes);
else if (new_vp_index == vcpu_idx)
atomic_dec(&hv->num_mismatched_vp_indexes);
hv_vcpu->vp_index = new_vp_index;
break;
}
case HV_X64_MSR_VP_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0))
hv_vcpu->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1062,12 +1104,19 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
if (__clear_user((void __user *)addr, PAGE_SIZE))
/*
* Clear apic_assist portion of f(struct hv_vp_assist_page
* only, there can be valuable data in the rest which needs
* to be preserved e.g. on migration.
*/
if (__clear_user((void __user *)addr, sizeof(u32)))
return 1;
hv->hv_vapic = data;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
sizeof(struct hv_vp_assist_page)))
return 1;
break;
}
@@ -1080,7 +1129,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
case HV_X64_MSR_VP_RUNTIME:
if (!host)
return 1;
hv->runtime_offset = data - current_task_runtime_100ns();
hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1172,11 +1221,11 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
bool host)
{
u64 data = 0;
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX:
data = hv->vp_index;
data = hv_vcpu->vp_index;
break;
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
@@ -1185,10 +1234,10 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
case HV_X64_MSR_VP_ASSIST_PAGE:
data = hv->hv_vapic;
data = hv_vcpu->hv_vapic;
break;
case HV_X64_MSR_VP_RUNTIME:
data = current_task_runtime_100ns() + hv->runtime_offset;
data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1255,32 +1304,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
return kvm_hv_get_msr(vcpu, msr, pdata, host);
}
static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
static __always_inline unsigned long *sparse_set_to_vcpu_mask(
struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
u64 *vp_bitmap, unsigned long *vcpu_bitmap)
{
int i = 0, j;
struct kvm_hv *hv = &kvm->arch.hyperv;
struct kvm_vcpu *vcpu;
int i, bank, sbank = 0;
if (!(valid_bank_mask & BIT_ULL(bank_no)))
return -1;
memset(vp_bitmap, 0,
KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
vp_bitmap[bank] = sparse_banks[sbank++];
for (j = 0; j < bank_no; j++)
if (valid_bank_mask & BIT_ULL(j))
i++;
if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
/* for all vcpus vp_index == vcpu_idx */
return (unsigned long *)vp_bitmap;
}
return i;
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
(unsigned long *)vp_bitmap))
__set_bit(i, vcpu_bitmap);
}
return vcpu_bitmap;
}
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
u16 rep_cnt, bool ex)
{
struct kvm *kvm = current_vcpu->kvm;
struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
struct kvm_vcpu *vcpu;
unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
unsigned long valid_bank_mask = 0;
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
unsigned long *vcpu_mask;
u64 valid_bank_mask;
u64 sparse_banks[64];
int sparse_banks_len, i;
int sparse_banks_len;
bool all_cpus;
if (!ex) {
@@ -1290,6 +1354,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
trace_kvm_hv_flush_tlb(flush.processor_mask,
flush.address_space, flush.flags);
valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
} else {
@@ -1306,7 +1371,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
sparse_banks_len =
bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
sizeof(sparse_banks[0]);
if (!sparse_banks_len && !all_cpus)
@@ -1321,48 +1387,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
cpumask_clear(&hv_current->tlb_lush);
cpumask_clear(&hv_vcpu->tlb_flush);
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
int bank = hv->vp_index / 64, sbank = 0;
if (!all_cpus) {
/* Banks >64 can't be represented */
if (bank >= 64)
continue;
/* Non-ex hypercalls can only address first 64 vCPUs */
if (!ex && bank)
continue;
if (ex) {
/*
* Check is the bank of this vCPU is in sparse
* set and get the sparse bank number.
*/
sbank = get_sparse_bank_no(valid_bank_mask,
bank);
if (sbank < 0)
continue;
}
if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
continue;
}
/*
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
* can't analyze it here, flush TLB regardless of the specified
* address space.
*/
__set_bit(i, vcpu_bitmap);
}
vcpu_mask = all_cpus ? NULL :
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
vp_bitmap, vcpu_bitmap);
/*
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
* analyze it here, flush TLB regardless of the specified address space.
*/
kvm_make_vcpus_request_mask(kvm,
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
vcpu_bitmap, &hv_current->tlb_lush);
vcpu_mask, &hv_vcpu->tlb_flush);
ret_success:
/* We always do full TLB flush, set rep_done = rep_cnt. */
@@ -1370,6 +1407,99 @@ ret_success:
((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
}
static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
unsigned long *vcpu_bitmap)
{
struct kvm_lapic_irq irq = {
.delivery_mode = APIC_DM_FIXED,
.vector = vector
};
struct kvm_vcpu *vcpu;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
continue;
/* We fail only when APIC is disabled */
kvm_apic_set_irq(vcpu, &irq, NULL);
}
}
static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
bool ex, bool fast)
{
struct kvm *kvm = current_vcpu->kvm;
struct hv_send_ipi_ex send_ipi_ex;
struct hv_send_ipi send_ipi;
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
unsigned long *vcpu_mask;
unsigned long valid_bank_mask;
u64 sparse_banks[64];
int sparse_banks_len;
u32 vector;
bool all_cpus;
if (!ex) {
if (!fast) {
if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
sizeof(send_ipi))))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
sparse_banks[0] = send_ipi.cpu_mask;
vector = send_ipi.vector;
} else {
/* 'reserved' part of hv_send_ipi should be 0 */
if (unlikely(ingpa >> 32 != 0))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
sparse_banks[0] = outgpa;
vector = (u32)ingpa;
}
all_cpus = false;
valid_bank_mask = BIT_ULL(0);
trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
} else {
if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
sizeof(send_ipi_ex))))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
send_ipi_ex.vp_set.format,
send_ipi_ex.vp_set.valid_bank_mask);
vector = send_ipi_ex.vector;
valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
sizeof(sparse_banks[0]);
all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
if (!sparse_banks_len)
goto ret_success;
if (!all_cpus &&
kvm_read_guest(kvm,
ingpa + offsetof(struct hv_send_ipi_ex,
vp_set.bank_contents),
sparse_banks,
sparse_banks_len))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
vcpu_mask = all_cpus ? NULL :
sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
vp_bitmap, vcpu_bitmap);
kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
ret_success:
return HV_STATUS_SUCCESS;
}
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1539,6 +1669,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
break;
case HVCALL_SEND_IPI:
if (unlikely(rep)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
break;
case HVCALL_SEND_IPI_EX:
if (unlikely(fast || rep)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;

View File

@@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
struct hv_vp_assist_page *assist_page);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
int timer_index)
{

View File

@@ -70,6 +70,11 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
static bool lapic_timer_advance_adjust_done = false;
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -955,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
map = rcu_dereference(kvm->arch.apic_map);
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
if (ret)
if (ret) {
*r = 0;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
if (*r < 0)
*r = 0;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
}
rcu_read_unlock();
return ret;
@@ -1472,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
u64 guest_tsc, tsc_deadline, ns;
if (!lapic_in_kernel(vcpu))
return;
@@ -1492,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
if (guest_tsc < tsc_deadline)
__delay(min(tsc_deadline - guest_tsc,
nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
if (!lapic_timer_advance_adjust_done) {
/* too early */
if (guest_tsc < tsc_deadline) {
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
lapic_timer_advance_ns -= min((unsigned int)ns,
lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
lapic_timer_advance_ns += min((unsigned int)ns,
lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
lapic_timer_advance_adjust_done = true;
}
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -2621,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
if (!IS_ALIGNED(addr, 4))
return 1;
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
addr, sizeof(u8));
if (addr == ghc->gpa && len <= ghc->len)
new_len = ghc->len;
else
new_len = len;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
}
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)

View File

@@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_init(void);
void kvm_lapic_exit(void);

View File

@@ -932,7 +932,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
if (!obj)
return -ENOMEM;
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
}
return 0;
@@ -960,7 +960,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
return -ENOMEM;
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
@@ -1265,24 +1265,24 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
mmu_free_pte_list_desc(desc);
}
static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
if (!rmap_head->val) {
printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
pr_err("%s: %p 0->BUG\n", __func__, spte);
BUG();
} else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_remove: %p 1->0\n", spte);
rmap_printk("%s: %p 1->0\n", __func__, spte);
if ((u64 *)rmap_head->val != spte) {
printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
pr_err("%s: %p 1->BUG\n", __func__, spte);
BUG();
}
rmap_head->val = 0;
} else {
rmap_printk("pte_list_remove: %p many->many\n", spte);
rmap_printk("%s: %p many->many\n", __func__, spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
prev_desc = NULL;
while (desc) {
@@ -1296,11 +1296,17 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
prev_desc = desc;
desc = desc->more;
}
pr_err("pte_list_remove: %p many->many\n", spte);
pr_err("%s: %p many->many\n", __func__, spte);
BUG();
}
}
static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
{
mmu_spte_clear_track_bits(sptep);
__pte_list_remove(sptep, rmap_head);
}
static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@@ -1349,7 +1355,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmap_head = gfn_to_rmap(kvm, gfn, sp);
pte_list_remove(spte, rmap_head);
__pte_list_remove(spte, rmap_head);
}
/*
@@ -1685,7 +1691,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
while ((sptep = rmap_get_first(rmap_head, &iter))) {
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
drop_spte(kvm, sptep);
pte_list_remove(rmap_head, sptep);
flush = true;
}
@@ -1721,7 +1727,7 @@ restart:
need_flush = 1;
if (pte_write(*ptep)) {
drop_spte(kvm, sptep);
pte_list_remove(rmap_head, sptep);
goto restart;
} else {
new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
@@ -1988,7 +1994,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
u64 *parent_pte)
{
pte_list_remove(parent_pte, &sp->parent_ptes);
__pte_list_remove(parent_pte, &sp->parent_ptes);
}
static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -2181,7 +2187,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)
|| vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
|| vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return false;
}
@@ -2375,14 +2381,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
int collisions = 0;
LIST_HEAD(invalid_list);
role = vcpu->arch.mmu.base_role;
role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
role.direct = direct;
if (role.direct)
role.cr4_pae = 0;
role.access = access;
if (!vcpu->arch.mmu.direct_map
&& vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
if (!vcpu->arch.mmu->direct_map
&& vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
@@ -2457,11 +2463,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
{
iterator->addr = addr;
iterator->shadow_addr = root;
iterator->level = vcpu->arch.mmu.shadow_root_level;
iterator->level = vcpu->arch.mmu->shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu.direct_map)
vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu->direct_map)
--iterator->level;
if (iterator->level == PT32E_ROOT_LEVEL) {
@@ -2469,10 +2475,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
* prev_root is currently only used for 64-bit hosts. So only
* the active root_hpa is valid here.
*/
BUG_ON(root != vcpu->arch.mmu.root_hpa);
BUG_ON(root != vcpu->arch.mmu->root_hpa);
iterator->shadow_addr
= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
= vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
--iterator->level;
if (!iterator->shadow_addr)
@@ -2483,7 +2489,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
struct kvm_vcpu *vcpu, u64 addr)
{
shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
addr);
}
@@ -3095,7 +3101,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int emulate = 0;
gfn_t pseudo_gfn;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return 0;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
@@ -3301,7 +3307,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
u64 spte = 0ull;
uint retry_count = 0;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return false;
if (!page_fault_can_be_fast(error_code))
@@ -3471,11 +3477,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
}
/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free)
{
int i;
LIST_HEAD(invalid_list);
struct kvm_mmu *mmu = &vcpu->arch.mmu;
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
@@ -3535,20 +3541,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
struct kvm_mmu_page *sp;
unsigned i;
if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
vcpu->arch.mmu->root_hpa = __pa(sp->spt);
} else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
@@ -3561,9 +3567,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
} else
BUG();
@@ -3577,7 +3583,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
gfn_t root_gfn;
int i;
root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
return 1;
@@ -3586,8 +3592,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* Do we shadow a long mode page table? If so we need to
* write-protect the guests page table root.
*/
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
MMU_WARN_ON(VALID_PAGE(root));
@@ -3597,11 +3603,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = root;
vcpu->arch.mmu->root_hpa = root;
return 0;
}
@@ -3611,17 +3617,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* the shadow page table may be a PAE or a long mode page table.
*/
pm_mask = PT_PRESENT_MASK;
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
if (!(pdptr & PT_PRESENT_MASK)) {
vcpu->arch.mmu.pae_root[i] = 0;
vcpu->arch.mmu->pae_root[i] = 0;
continue;
}
root_gfn = pdptr >> PAGE_SHIFT;
@@ -3639,16 +3645,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.pae_root[i] = root | pm_mask;
vcpu->arch.mmu->pae_root[i] = root | pm_mask;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
/*
* If we shadow a 32 bit page table with a long mode page
* table we enter this path.
*/
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
if (vcpu->arch.mmu.lm_root == NULL) {
if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
if (vcpu->arch.mmu->lm_root == NULL) {
/*
* The additional page necessary for this is only
* allocated on demand.
@@ -3660,12 +3666,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (lm_root == NULL)
return 1;
lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
vcpu->arch.mmu.lm_root = lm_root;
vcpu->arch.mmu->lm_root = lm_root;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
}
return 0;
@@ -3673,7 +3679,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.mmu.direct_map)
if (vcpu->arch.mmu->direct_map)
return mmu_alloc_direct_roots(vcpu);
else
return mmu_alloc_shadow_roots(vcpu);
@@ -3684,17 +3690,16 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
int i;
struct kvm_mmu_page *sp;
if (vcpu->arch.mmu.direct_map)
if (vcpu->arch.mmu->direct_map)
return;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
/*
@@ -3725,7 +3730,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
hpa_t root = vcpu->arch.mmu->pae_root[i];
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
@@ -3799,7 +3804,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
int root, leaf;
bool reserved = false;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
goto exit;
walk_shadow_page_lockless_begin(vcpu);
@@ -3816,7 +3821,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (!is_shadow_present_pte(spte))
break;
reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
iterator.level);
}
@@ -3895,7 +3900,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
struct kvm_shadow_walk_iterator iterator;
u64 spte;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
walk_shadow_page_lockless_begin(vcpu);
@@ -3922,7 +3927,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
if (r)
return r;
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
return nonpaging_map(vcpu, gva & PAGE_MASK,
@@ -3935,8 +3940,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
@@ -4042,7 +4047,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
@@ -4118,7 +4123,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
{
uint i;
struct kvm_mmu_root_info root;
struct kvm_mmu *mmu = &vcpu->arch.mmu;
struct kvm_mmu *mmu = vcpu->arch.mmu;
root.cr3 = mmu->get_cr3(vcpu);
root.hpa = mmu->root_hpa;
@@ -4141,7 +4146,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
union kvm_mmu_page_role new_role,
bool skip_tlb_flush)
{
struct kvm_mmu *mmu = &vcpu->arch.mmu;
struct kvm_mmu *mmu = vcpu->arch.mmu;
/*
* For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
@@ -4192,7 +4197,8 @@ static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
bool skip_tlb_flush)
{
if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
KVM_MMU_ROOT_CURRENT);
}
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
@@ -4210,7 +4216,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
static void inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
vcpu->arch.mmu->inject_page_fault(vcpu, fault);
}
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -4414,7 +4420,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
bool uses_nx = context->nx ||
context->mmu_role.base.smep_andnot_wp;
struct rsvd_bits_validate *shadow_zero_check;
int i;
@@ -4553,7 +4560,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
* SMAP:kernel-mode data accesses from user-mode
* mappings should fault. A fault is considered
* as a SMAP violation if all of the following
* conditions are ture:
* conditions are true:
* - X86_CR4_SMAP is set in CR4
* - A user page is accessed
* - The access is not a fetch
@@ -4714,27 +4721,65 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
static union kvm_mmu_page_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
{
union kvm_mmu_page_role role = {0};
union kvm_mmu_extended_role ext = {0};
role.guest_mode = is_guest_mode(vcpu);
role.smm = is_smm(vcpu);
role.ad_disabled = (shadow_accessed_mask == 0);
role.level = kvm_x86_ops->get_tdp_level(vcpu);
role.direct = true;
role.access = ACC_ALL;
ext.cr0_pg = !!is_paging(vcpu);
ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ext.cr4_pse = !!is_pse(vcpu);
ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
ext.valid = 1;
return ext;
}
static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
bool base_only)
{
union kvm_mmu_role role = {0};
role.base.access = ACC_ALL;
role.base.nxe = !!is_nx(vcpu);
role.base.cr4_pae = !!is_pae(vcpu);
role.base.cr0_wp = is_write_protection(vcpu);
role.base.smm = is_smm(vcpu);
role.base.guest_mode = is_guest_mode(vcpu);
if (base_only)
return role;
role.ext = kvm_calc_mmu_role_ext(vcpu);
return role;
}
static union kvm_mmu_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
{
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
role.base.ad_disabled = (shadow_accessed_mask == 0);
role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
role.base.direct = true;
return role;
}
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
struct kvm_mmu *context = vcpu->arch.mmu;
union kvm_mmu_role new_role =
kvm_calc_tdp_mmu_root_page_role(vcpu, false);
context->base_role.word = mmu_base_role_mask.word &
kvm_calc_tdp_mmu_root_page_role(vcpu).word;
new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
context->mmu_role.as_u64 = new_role.as_u64;
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
@@ -4774,36 +4819,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
static union kvm_mmu_page_role
kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
static union kvm_mmu_role
kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
{
union kvm_mmu_page_role role = {0};
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
role.nxe = is_nx(vcpu);
role.cr4_pae = !!is_pae(vcpu);
role.cr0_wp = is_write_protection(vcpu);
role.smep_andnot_wp = smep && !is_write_protection(vcpu);
role.smap_andnot_wp = smap && !is_write_protection(vcpu);
role.guest_mode = is_guest_mode(vcpu);
role.smm = is_smm(vcpu);
role.direct = !is_paging(vcpu);
role.access = ACC_ALL;
role.base.smep_andnot_wp = role.ext.cr4_smep &&
!is_write_protection(vcpu);
role.base.smap_andnot_wp = role.ext.cr4_smap &&
!is_write_protection(vcpu);
role.base.direct = !is_paging(vcpu);
if (!is_long_mode(vcpu))
role.level = PT32E_ROOT_LEVEL;
role.base.level = PT32E_ROOT_LEVEL;
else if (is_la57_mode(vcpu))
role.level = PT64_ROOT_5LEVEL;
role.base.level = PT64_ROOT_5LEVEL;
else
role.level = PT64_ROOT_4LEVEL;
role.base.level = PT64_ROOT_4LEVEL;
return role;
}
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
struct kvm_mmu *context = vcpu->arch.mmu;
union kvm_mmu_role new_role =
kvm_calc_shadow_mmu_root_page_role(vcpu, false);
new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
if (!is_paging(vcpu))
nonpaging_init_context(vcpu, context);
@@ -4814,22 +4859,28 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
else
paging32_init_context(vcpu, context);
context->base_role.word = mmu_base_role_mask.word &
kvm_calc_shadow_mmu_root_page_role(vcpu).word;
context->mmu_role.as_u64 = new_role.as_u64;
reset_shadow_zero_bits_mask(vcpu, context);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
static union kvm_mmu_page_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
bool execonly)
{
union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
union kvm_mmu_role role;
role.level = PT64_ROOT_4LEVEL;
role.direct = false;
role.ad_disabled = !accessed_dirty;
role.guest_mode = true;
role.access = ACC_ALL;
/* Base role is inherited from root_mmu */
role.base.word = vcpu->arch.root_mmu.mmu_role.base.word;
role.ext = kvm_calc_mmu_role_ext(vcpu);
role.base.level = PT64_ROOT_4LEVEL;
role.base.direct = false;
role.base.ad_disabled = !accessed_dirty;
role.base.guest_mode = true;
role.base.access = ACC_ALL;
role.ext.execonly = execonly;
return role;
}
@@ -4837,11 +4888,17 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
union kvm_mmu_page_role root_page_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
struct kvm_mmu *context = vcpu->arch.mmu;
union kvm_mmu_role new_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
execonly);
__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
__kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
context->shadow_root_level = PT64_ROOT_4LEVEL;
context->nx = true;
@@ -4853,7 +4910,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL;
context->direct_map = false;
context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
context->mmu_role.as_u64 = new_role.as_u64;
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
update_last_nonleaf_level(vcpu, context);
@@ -4864,7 +4922,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
struct kvm_mmu *context = vcpu->arch.mmu;
kvm_init_shadow_mmu(vcpu);
context->set_cr3 = kvm_x86_ops->set_cr3;
@@ -4875,14 +4933,20 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
new_role.base.word &= mmu_base_role_mask.word;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
return;
g_context->mmu_role.as_u64 = new_role.as_u64;
g_context->get_cr3 = get_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
/*
* Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
* Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
* L1's nested page tables (e.g. EPT12). The nested translation
* of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
* L2's page tables as the first level of translation and L1's
@@ -4921,10 +4985,10 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
if (reset_roots) {
uint i;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu->root_hpa = INVALID_PAGE;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
}
if (mmu_is_nested(vcpu))
@@ -4939,10 +5003,14 @@ EXPORT_SYMBOL_GPL(kvm_init_mmu);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
{
union kvm_mmu_role role;
if (tdp_enabled)
return kvm_calc_tdp_mmu_root_page_role(vcpu);
role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
else
return kvm_calc_shadow_mmu_root_page_role(vcpu);
role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
return role.base;
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -4972,8 +5040,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4987,7 +5057,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
@@ -5164,10 +5234,12 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
local_flush = true;
while (npte--) {
u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
entry = *spte;
mmu_page_zap_pte(vcpu->kvm, sp, spte);
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
!((sp->role.word ^ base_role)
& mmu_base_role_mask.word) && rmap_can_add(vcpu))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (need_remote_flush(entry, *spte))
@@ -5185,7 +5257,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa_t gpa;
int r;
if (vcpu->arch.mmu.direct_map)
if (vcpu->arch.mmu->direct_map)
return 0;
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
@@ -5221,10 +5293,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
{
int r, emulation_type = 0;
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map;
bool direct = vcpu->arch.mmu->direct_map;
/* With shadow page tables, fault_address contains a GVA or nGPA. */
if (vcpu->arch.mmu.direct_map) {
if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
vcpu->arch.gpa_val = cr2;
}
@@ -5237,8 +5309,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
}
if (r == RET_PF_INVALID) {
r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
false);
r = vcpu->arch.mmu->page_fault(vcpu, cr2,
lower_32_bits(error_code),
false);
WARN_ON(r == RET_PF_INVALID);
}
@@ -5254,7 +5327,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* paging in both guests. If true, we simply unprotect the page
* and resume the guest.
*/
if (vcpu->arch.mmu.direct_map &&
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
return 1;
@@ -5302,7 +5375,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_mmu *mmu = &vcpu->arch.mmu;
struct kvm_mmu *mmu = vcpu->arch.mmu;
int i;
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */
@@ -5333,7 +5406,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
{
struct kvm_mmu *mmu = &vcpu->arch.mmu;
struct kvm_mmu *mmu = vcpu->arch.mmu;
bool tlb_flush = false;
uint i;
@@ -5377,8 +5450,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
free_page((unsigned long)vcpu->arch.mmu.pae_root);
free_page((unsigned long)vcpu->arch.mmu.lm_root);
free_page((unsigned long)vcpu->arch.mmu->pae_root);
free_page((unsigned long)vcpu->arch.mmu->lm_root);
}
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -5398,9 +5471,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
if (!page)
return -ENOMEM;
vcpu->arch.mmu.pae_root = page_address(page);
vcpu->arch.mmu->pae_root = page_address(page);
for (i = 0; i < 4; ++i)
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
return 0;
}
@@ -5409,29 +5482,23 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
uint i;
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
vcpu->arch.root_mmu.translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
return alloc_mmu_pages(vcpu);
}
void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
/*
* kvm_mmu_setup() is called only on vCPU initialization.
* Therefore, no need to reset mmu roots as they are not yet
* initialized.
*/
kvm_init_mmu(vcpu, false);
}
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot,
struct kvm_page_track_notifier_node *node)
@@ -5612,7 +5679,7 @@ restart:
if (sp->role.direct &&
!kvm_is_reserved_pfn(pfn) &&
PageTransCompoundMap(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
pte_list_remove(rmap_head, sptep);
need_tlb_flush = 1;
goto restart;
}
@@ -5869,6 +5936,16 @@ int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
/*
* MMU roles use union aliasing which is, generally speaking, an
* undefined behavior. However, we supposedly know how compilers behave
* and the current status quo is unlikely to change. Guardians below are
* supposed to let us know if the assumption becomes false.
*/
BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
kvm_mmu_reset_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
@@ -5898,7 +5975,7 @@ out:
}
/*
* Caculate mmu pages needed for kvm.
* Calculate mmu pages needed for kvm.
*/
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
{

View File

@@ -43,11 +43,6 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
#define PT_PDPE_LEVEL 3
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
static inline u64 rsvd_bits(int s, int e)
{
if (e < s)
@@ -80,7 +75,7 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
@@ -102,9 +97,9 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
{
if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
kvm_get_active_pcid(vcpu));
if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa |
kvm_get_active_pcid(vcpu));
}
/*

View File

@@ -59,19 +59,19 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
int i;
struct kvm_mmu_page *sp;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
return;
}
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
hpa_t root = vcpu->arch.mmu->pae_root[i];
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
@@ -122,7 +122,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
hpa = pfn << PAGE_SHIFT;
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
"ent %llxn", vcpu->arch.mmu.root_level, pfn,
"ent %llxn", vcpu->arch.mmu->root_level, pfn,
hpa, *sptep);
}

View File

@@ -158,14 +158,15 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
u64 gpte)
{
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
goto no_present;
if (!FNAME(is_present_gpte)(gpte))
goto no_present;
/* if accessed bit is not supported prefetch non accessed gpte */
if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
!(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
return false;
@@ -480,7 +481,7 @@ error:
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr, u32 access)
{
return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
access);
}
@@ -509,7 +510,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
if (is_error_pfn(pfn))
@@ -604,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
direct_access = gw->pte_access;
top_level = vcpu->arch.mmu.root_level;
top_level = vcpu->arch.mmu->root_level;
if (top_level == PT32E_ROOT_LEVEL)
top_level = PT32_ROOT_LEVEL;
/*
@@ -616,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (FNAME(gpte_changed)(vcpu, gw, top_level))
goto out_gpte_changed;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
goto out_gpte_changed;
for (shadow_walk_init(&it, vcpu, addr);
@@ -1004,7 +1005,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))

View File

@@ -809,6 +809,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
nested_svm_check_exception(svm, nr, has_error_code, error_code))
return;
kvm_deliver_exception_payload(&svm->vcpu);
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
@@ -2922,18 +2924,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
WARN_ON(mmu_is_nested(vcpu));
kvm_init_shadow_mmu(vcpu);
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3;
vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
{
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
}
static int nested_svm_check_permissions(struct vcpu_svm *svm)
@@ -2969,16 +2971,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
svm->vmcb->control.exit_info_1 = error_code;
/*
* FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
* The fix is to add the ancillary datum (CR2 or DR6) to structs
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
* written only when inject_pending_event runs (DR6 would written here
* too). This should be conditional on a new capability---if the
* capability is disabled, kvm_multiple_exception would write the
* ancillary information to CR2 or DR6, for backwards ABI-compatibility.
* EXITINFO2 is undefined for all exception intercepts other
* than #PF.
*/
if (svm->vcpu.arch.exception.nested_apf)
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
else if (svm->vcpu.arch.exception.has_payload)
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
else
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
@@ -5642,26 +5641,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%[svm]) \n\t"
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
/*
* Clear host registers marked as clobbered to prevent
* speculative use.
*/
"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
"xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
"xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
#ifdef CONFIG_X86_64
"xor %%r8, %%r8 \n\t"
"xor %%r9, %%r9 \n\t"
"xor %%r10, %%r10 \n\t"
"xor %%r11, %%r11 \n\t"
"xor %%r12, %%r12 \n\t"
"xor %%r13, %%r13 \n\t"
"xor %%r14, %%r14 \n\t"
"xor %%r15, %%r15 \n\t"
"xor %%r8d, %%r8d \n\t"
"xor %%r9d, %%r9d \n\t"
"xor %%r10d, %%r10d \n\t"
"xor %%r11d, %%r11d \n\t"
"xor %%r12d, %%r12d \n\t"
"xor %%r13d, %%r13d \n\t"
"xor %%r14d, %%r14d \n\t"
"xor %%r15d, %%r15d \n\t"
#endif
"xor %%ebx, %%ebx \n\t"
"xor %%ecx, %%ecx \n\t"
"xor %%edx, %%edx \n\t"
"xor %%esi, %%esi \n\t"
"xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
@@ -7040,6 +7037,13 @@ failed:
return ret;
}
static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
/* Intel-only feature */
return -ENODEV;
}
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7169,6 +7173,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.mem_enc_op = svm_mem_enc_op,
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
.nested_enable_evmcs = nested_enable_evmcs,
};
static int __init svm_init(void)

View File

@@ -1418,6 +1418,48 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex,
__entry->valid_bank_mask, __entry->format,
__entry->address_space, __entry->flags)
);
/*
* Tracepoints for kvm_hv_send_ipi.
*/
TRACE_EVENT(kvm_hv_send_ipi,
TP_PROTO(u32 vector, u64 processor_mask),
TP_ARGS(vector, processor_mask),
TP_STRUCT__entry(
__field(u32, vector)
__field(u64, processor_mask)
),
TP_fast_assign(
__entry->vector = vector;
__entry->processor_mask = processor_mask;
),
TP_printk("vector %x processor_mask 0x%llx",
__entry->vector, __entry->processor_mask)
);
TRACE_EVENT(kvm_hv_send_ipi_ex,
TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask),
TP_ARGS(vector, format, valid_bank_mask),
TP_STRUCT__entry(
__field(u32, vector)
__field(u64, format)
__field(u64, valid_bank_mask)
),
TP_fast_assign(
__entry->vector = vector;
__entry->format = format;
__entry->valid_bank_mask = valid_bank_mask;
),
TP_printk("vector %x format %llx valid_bank_mask 0x%llx",
__entry->vector, __entry->format,
__entry->valid_bank_mask)
);
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,6 @@
*/
/* 16-bits */
SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
SHADOW_FIELD_RW(GUEST_PML_INDEX)
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
@@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
SHADOW_FIELD_RW(TPR_THRESHOLD)
SHADOW_FIELD_RW(GUEST_CS_LIMIT)
SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
@@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
SHADOW_FIELD_RW(GUEST_CR3)
SHADOW_FIELD_RW(GUEST_CR4)
SHADOW_FIELD_RW(GUEST_RFLAGS)
SHADOW_FIELD_RW(GUEST_CS_BASE)
SHADOW_FIELD_RW(GUEST_ES_BASE)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
SHADOW_FIELD_RW(CR0_READ_SHADOW)
SHADOW_FIELD_RW(CR4_READ_SHADOW)

View File

@@ -136,7 +136,7 @@ static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
unsigned int __read_mostly lapic_timer_advance_ns = 0;
unsigned int __read_mostly lapic_timer_advance_ns = 1000;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
@@ -400,9 +400,51 @@ static int exception_type(int vector)
return EXCPT_FAULT;
}
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
{
unsigned nr = vcpu->arch.exception.nr;
bool has_payload = vcpu->arch.exception.has_payload;
unsigned long payload = vcpu->arch.exception.payload;
if (!has_payload)
return;
switch (nr) {
case DB_VECTOR:
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
* cleared by the processor".
*/
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
/*
* DR6.RTM is set by all #DB exceptions that don't clear it.
*/
vcpu->arch.dr6 |= DR6_RTM;
vcpu->arch.dr6 |= payload;
/*
* Bit 16 should be set in the payload whenever the #DB
* exception should clear DR6.RTM. This makes the payload
* compatible with the pending debug exceptions under VMX.
* Though not currently documented in the SDM, this also
* makes the payload compatible with the exit qualification
* for #DB exceptions under VMX.
*/
vcpu->arch.dr6 ^= payload & DR6_RTM;
break;
case PF_VECTOR:
vcpu->arch.cr2 = payload;
break;
}
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
}
EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool reinject)
bool has_payload, unsigned long payload, bool reinject)
{
u32 prev_nr;
int class1, class2;
@@ -424,6 +466,14 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
*/
WARN_ON_ONCE(vcpu->arch.exception.pending);
vcpu->arch.exception.injected = true;
if (WARN_ON_ONCE(has_payload)) {
/*
* A reinjected event has already
* delivered its payload.
*/
has_payload = false;
payload = 0;
}
} else {
vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
@@ -431,6 +481,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.nr = nr;
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = has_payload;
vcpu->arch.exception.payload = payload;
/*
* In guest mode, payload delivery should be deferred,
* so that the L1 hypervisor can intercept #PF before
* CR2 is modified (or intercept #DB before DR6 is
* modified under nVMX). However, for ABI
* compatibility with KVM_GET_VCPU_EVENTS and
* KVM_SET_VCPU_EVENTS, we can't delay payload
* delivery unless userspace has enabled this
* functionality via the per-VM capability,
* KVM_CAP_EXCEPTION_PAYLOAD.
*/
if (!vcpu->kvm->arch.exception_payload_enabled ||
!is_guest_mode(vcpu))
kvm_deliver_exception_payload(vcpu);
return;
}
@@ -455,6 +521,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = true;
vcpu->arch.exception.nr = DF_VECTOR;
vcpu->arch.exception.error_code = 0;
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
} else
/* replace previous exception with a new one in a hope
that instruction re-execution will regenerate lost
@@ -464,16 +532,29 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false);
kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, true);
kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
}
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, true, error_code,
true, payload, false);
}
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
@@ -490,11 +571,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
++vcpu->stat.pf_guest;
vcpu->arch.exception.nested_apf =
is_guest_mode(vcpu) && fault->async_page_fault;
if (vcpu->arch.exception.nested_apf)
if (vcpu->arch.exception.nested_apf) {
vcpu->arch.apf.nested_apf_token = fault->address;
else
vcpu->arch.cr2 = fault->address;
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
} else {
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
fault->address);
}
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -503,7 +586,7 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau
if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
else
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
vcpu->arch.mmu->inject_page_fault(vcpu, fault);
return fault->nested_page_fault;
}
@@ -517,13 +600,13 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false);
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, true);
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
@@ -602,7 +685,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
if ((pdpte[i] & PT_PRESENT_MASK) &&
(pdpte[i] &
vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
ret = 0;
goto out;
}
@@ -2477,7 +2560,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_PV_EOI_EN:
if (kvm_lapic_enable_pv_eoi(vcpu, data))
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
@@ -2912,6 +2995,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_HYPERV_TLBFLUSH:
case KVM_CAP_HYPERV_SEND_IPI:
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2930,6 +3015,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -3362,19 +3448,33 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
process_nmi(vcpu);
/*
* FIXME: pass injected and pending separately. This is only
* needed for nested virtualization, whose state cannot be
* migrated yet. For now we can combine them.
* The API doesn't provide the instruction length for software
* exceptions, so don't report them. As long as the guest RIP
* isn't advanced, we should expect to encounter the exception
* again.
*/
events->exception.injected =
(vcpu->arch.exception.pending ||
vcpu->arch.exception.injected) &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
events->exception.injected = 0;
events->exception.pending = 0;
} else {
events->exception.injected = vcpu->arch.exception.injected;
events->exception.pending = vcpu->arch.exception.pending;
/*
* For ABI compatibility, deliberately conflate
* pending and injected exceptions when
* KVM_CAP_EXCEPTION_PAYLOAD isn't enabled.
*/
if (!vcpu->kvm->arch.exception_payload_enabled)
events->exception.injected |=
vcpu->arch.exception.pending;
}
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
events->exception.pad = 0;
events->exception.error_code = vcpu->arch.exception.error_code;
events->exception_has_payload = vcpu->arch.exception.has_payload;
events->exception_payload = vcpu->arch.exception.payload;
events->interrupt.injected =
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
@@ -3398,6 +3498,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SHADOW
| KVM_VCPUEVENT_VALID_SMM);
if (vcpu->kvm->arch.exception_payload_enabled)
events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
memset(&events->reserved, 0, sizeof(events->reserved));
}
@@ -3409,12 +3512,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW
| KVM_VCPUEVENT_VALID_SMM))
| KVM_VCPUEVENT_VALID_SMM
| KVM_VCPUEVENT_VALID_PAYLOAD))
return -EINVAL;
if (events->exception.injected &&
(events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
is_guest_mode(vcpu)))
if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
if (!vcpu->kvm->arch.exception_payload_enabled)
return -EINVAL;
if (events->exception.pending)
events->exception.injected = 0;
else
events->exception_has_payload = 0;
} else {
events->exception.pending = 0;
events->exception_has_payload = 0;
}
if ((events->exception.injected || events->exception.pending) &&
(events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
return -EINVAL;
/* INITs are latched while in SMM */
@@ -3424,11 +3539,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
process_nmi(vcpu);
vcpu->arch.exception.injected = false;
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.injected = events->exception.injected;
vcpu->arch.exception.pending = events->exception.pending;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
vcpu->arch.exception.has_payload = events->exception_has_payload;
vcpu->arch.exception.payload = events->exception_payload;
vcpu->arch.interrupt.injected = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
@@ -3694,6 +3811,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
int r;
uint16_t vmcs_version;
void __user *user_ptr;
if (cap->flags)
return -EINVAL;
@@ -3706,6 +3827,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
if (!r) {
user_ptr = (void __user *)(uintptr_t)cap->args[0];
if (copy_to_user(user_ptr, &vmcs_version,
sizeof(vmcs_version)))
r = -EFAULT;
}
return r;
default:
return -EINVAL;
}
@@ -4047,11 +4178,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
| KVM_STATE_NESTED_EVMCS))
break;
/* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
&& !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
break;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
@@ -4363,6 +4496,10 @@ split_irqchip_unlock:
kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
r = 0;
break;
case KVM_CAP_EXCEPTION_PAYLOAD:
kvm->arch.exception_payload_enabled = cap->args[0];
r = 0;
break;
default:
r = -EINVAL;
break;
@@ -4803,7 +4940,7 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
/* NPT walks are always user-walks */
access |= PFERR_USER_MASK;
t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
return t_gpa;
}
@@ -5889,7 +6026,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (!vcpu->arch.mmu.direct_map) {
if (!vcpu->arch.mmu->direct_map) {
/*
* Write permission should be allowed since only
* write access need to be emulated.
@@ -5922,7 +6059,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
kvm_release_pfn_clean(pfn);
/* The instructions are well-emulated on direct mmu. */
if (vcpu->arch.mmu.direct_map) {
if (vcpu->arch.mmu->direct_map) {
unsigned int indirect_shadow_pages;
spin_lock(&vcpu->kvm->mmu_lock);
@@ -5989,7 +6126,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
vcpu->arch.last_retry_eip = ctxt->eip;
vcpu->arch.last_retry_addr = cr2;
if (!vcpu->arch.mmu.direct_map)
if (!vcpu->arch.mmu->direct_map)
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -6049,14 +6186,7 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
kvm_run->exit_reason = KVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
* cleared by the processor".
*/
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
}
}
@@ -6995,10 +7125,22 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
if (vcpu->arch.exception.nr == DB_VECTOR &&
(vcpu->arch.dr7 & DR7_GD)) {
vcpu->arch.dr7 &= ~DR7_GD;
kvm_update_dr7(vcpu);
if (vcpu->arch.exception.nr == DB_VECTOR) {
/*
* This code assumes that nSVM doesn't use
* check_nested_events(). If it does, the
* DR6/DR7 changes should happen before L1
* gets a #VMEXIT for an intercepted #DB in
* L2. (Under VMX, on the other hand, the
* DR6/DR7 changes should not happen in the
* event of a VM-exit to L1 for an intercepted
* #DB in L2.)
*/
kvm_deliver_exception_payload(vcpu);
if (vcpu->arch.dr7 & DR7_GD) {
vcpu->arch.dr7 &= ~DR7_GD;
kvm_update_dr7(vcpu);
}
}
kvm_x86_ops->queue_exception(vcpu);
@@ -8478,7 +8620,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
kvm_init_mmu(vcpu, false);
vcpu_put(vcpu);
return 0;
}
@@ -9327,7 +9469,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{
int r;
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
work->wakeup_all)
return;
@@ -9335,11 +9477,11 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
if (unlikely(r))
return;
if (!vcpu->arch.mmu.direct_map &&
work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
if (!vcpu->arch.mmu->direct_map &&
work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;
vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
}
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
@@ -9463,6 +9605,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
vcpu->arch.exception.nr = 0;
vcpu->arch.exception.has_error_code = false;
vcpu->arch.exception.error_code = 0;
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;

View File

@@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
int handle_ud(struct kvm_vcpu *vcpu);
void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);