Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - some cleanups - direct physical timer assignment - cache sanitization for 32-bit guests s390: - interrupt cleanup - introduction of the Guest Information Block - preparation for processor subfunctions in cpu models PPC: - bug fixes and improvements, especially related to machine checks and protection keys x86: - many, many cleanups, including removing a bunch of MMU code for unnecessary optimizations - AVIC fixes Generic: - memcg accounting" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (147 commits) kvm: vmx: fix formatting of a comment KVM: doc: Document the life cycle of a VM and its resources MAINTAINERS: Add KVM selftests to existing KVM entry Revert "KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range()" KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char() KVM: PPC: Fix compilation when KVM is not enabled KVM: Minor cleanups for kvm_main.c KVM: s390: add debug logging for cpu model subfunctions KVM: s390: implement subfunction processor calls arm64: KVM: Fix architecturally invalid reset value for FPEXC32_EL2 KVM: arm/arm64: Remove unused timer variable KVM: PPC: Book3S: Improve KVM reference counting KVM: PPC: Book3S HV: Fix build failure without IOMMU support Revert "KVM: Eliminate extra function calls in kvm_get_dirty_log_protect()" x86: kvmguest: use TSC clocksource if invariant TSC is exposed KVM: Never start grow vCPU halt_poll_ns from value below halt_poll_ns_grow_start KVM: Expose the initial start value in grow_halt_poll_ns() as a module parameter KVM: grow_halt_poll_ns() should never shrink vCPU halt_poll_ns KVM: x86/mmu: Consolidate kvm_mmu_zap_all() and kvm_mmu_zap_mmio_sptes() KVM: x86/mmu: WARN if zapping a MMIO spte results in zapping children ...
このコミットが含まれているのは:
@@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
|
||||
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
||||
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
||||
F(CLDEMOTE);
|
||||
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
|
||||
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
|
@@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
|
||||
|
||||
mutex_lock(&hv->hv_lock);
|
||||
ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
|
||||
if (ret >= 0)
|
||||
|
@@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
||||
pid_t pid_nr;
|
||||
int ret;
|
||||
|
||||
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
|
||||
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT);
|
||||
if (!pit)
|
||||
return NULL;
|
||||
|
||||
|
@@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm)
|
||||
struct kvm_pic *s;
|
||||
int ret;
|
||||
|
||||
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
|
||||
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&s->lock);
|
||||
|
@@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm)
|
||||
struct kvm_ioapic *ioapic;
|
||||
int ret;
|
||||
|
||||
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
|
||||
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT);
|
||||
if (!ioapic)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&ioapic->lock);
|
||||
|
@@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm)
|
||||
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
|
||||
|
||||
new = kvzalloc(sizeof(struct kvm_apic_map) +
|
||||
sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
|
||||
sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
if (!new)
|
||||
goto out;
|
||||
@@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
|
||||
ASSERT(vcpu != NULL);
|
||||
apic_debug("apic_init %d\n", vcpu->vcpu_id);
|
||||
|
||||
apic = kzalloc(sizeof(*apic), GFP_KERNEL);
|
||||
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
|
||||
if (!apic)
|
||||
goto nomem;
|
||||
|
||||
vcpu->arch.apic = apic;
|
||||
|
||||
apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!apic->regs) {
|
||||
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
|
||||
vcpu->vcpu_id);
|
||||
|
@@ -109,9 +109,11 @@ module_param(dbg, bool, 0644);
|
||||
(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
|
||||
|
||||
|
||||
#define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
|
||||
#define PT64_DIR_BASE_ADDR_MASK \
|
||||
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
|
||||
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
|
||||
#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
|
||||
#else
|
||||
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
|
||||
#endif
|
||||
#define PT64_LVL_ADDR_MASK(level) \
|
||||
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
|
||||
* PT64_LEVEL_BITS))) - 1))
|
||||
@@ -330,53 +332,56 @@ static inline bool is_access_track_spte(u64 spte)
|
||||
}
|
||||
|
||||
/*
|
||||
* the low bit of the generation number is always presumed to be zero.
|
||||
* This disables mmio caching during memslot updates. The concept is
|
||||
* similar to a seqcount but instead of retrying the access we just punt
|
||||
* and ignore the cache.
|
||||
* Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
|
||||
* the memslots generation and is derived as follows:
|
||||
*
|
||||
* spte bits 3-11 are used as bits 1-9 of the generation number,
|
||||
* the bits 52-61 are used as bits 10-19 of the generation number.
|
||||
* Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
|
||||
* Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
|
||||
*
|
||||
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
|
||||
* the MMIO generation number, as doing so would require stealing a bit from
|
||||
* the "real" generation number and thus effectively halve the maximum number
|
||||
* of MMIO generations that can be handled before encountering a wrap (which
|
||||
* requires a full MMU zap). The flag is instead explicitly queried when
|
||||
* checking for MMIO spte cache hits.
|
||||
*/
|
||||
#define MMIO_SPTE_GEN_LOW_SHIFT 2
|
||||
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
|
||||
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0)
|
||||
|
||||
#define MMIO_GEN_SHIFT 20
|
||||
#define MMIO_GEN_LOW_SHIFT 10
|
||||
#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
|
||||
#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
|
||||
#define MMIO_SPTE_GEN_LOW_START 3
|
||||
#define MMIO_SPTE_GEN_LOW_END 11
|
||||
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
|
||||
MMIO_SPTE_GEN_LOW_START)
|
||||
|
||||
static u64 generation_mmio_spte_mask(unsigned int gen)
|
||||
#define MMIO_SPTE_GEN_HIGH_START 52
|
||||
#define MMIO_SPTE_GEN_HIGH_END 61
|
||||
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
|
||||
MMIO_SPTE_GEN_HIGH_START)
|
||||
static u64 generation_mmio_spte_mask(u64 gen)
|
||||
{
|
||||
u64 mask;
|
||||
|
||||
WARN_ON(gen & ~MMIO_GEN_MASK);
|
||||
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
|
||||
|
||||
mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
|
||||
mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
|
||||
mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
|
||||
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static unsigned int get_mmio_spte_generation(u64 spte)
|
||||
static u64 get_mmio_spte_generation(u64 spte)
|
||||
{
|
||||
unsigned int gen;
|
||||
u64 gen;
|
||||
|
||||
spte &= ~shadow_mmio_mask;
|
||||
|
||||
gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK;
|
||||
gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT;
|
||||
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
|
||||
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
|
||||
return gen;
|
||||
}
|
||||
|
||||
static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK;
|
||||
}
|
||||
|
||||
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
unsigned access)
|
||||
{
|
||||
unsigned int gen = kvm_current_mmio_generation(vcpu);
|
||||
u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
|
||||
u64 mask = generation_mmio_spte_mask(gen);
|
||||
u64 gpa = gfn << PAGE_SHIFT;
|
||||
|
||||
@@ -386,6 +391,8 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
||||
<< shadow_nonpresent_or_rsvd_mask_len;
|
||||
|
||||
page_header(__pa(sptep))->mmio_cached = true;
|
||||
|
||||
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||
mmu_spte_set(sptep, mask);
|
||||
}
|
||||
@@ -407,7 +414,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
|
||||
|
||||
static unsigned get_mmio_spte_access(u64 spte)
|
||||
{
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
|
||||
u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
|
||||
return (spte & ~mask) & ~PAGE_MASK;
|
||||
}
|
||||
|
||||
@@ -424,9 +431,13 @@ static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
|
||||
|
||||
static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
|
||||
{
|
||||
unsigned int kvm_gen, spte_gen;
|
||||
u64 kvm_gen, spte_gen, gen;
|
||||
|
||||
kvm_gen = kvm_current_mmio_generation(vcpu);
|
||||
gen = kvm_vcpu_memslots(vcpu)->generation;
|
||||
if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
|
||||
return false;
|
||||
|
||||
kvm_gen = gen & MMIO_SPTE_GEN_MASK;
|
||||
spte_gen = get_mmio_spte_generation(spte);
|
||||
|
||||
trace_check_mmio_spte(spte, kvm_gen, spte_gen);
|
||||
@@ -959,7 +970,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
|
||||
if (cache->nobjs >= min)
|
||||
return 0;
|
||||
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
|
||||
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
|
||||
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!obj)
|
||||
return cache->nobjs >= min ? 0 : -ENOMEM;
|
||||
cache->objects[cache->nobjs++] = obj;
|
||||
@@ -2049,12 +2060,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
|
||||
if (!direct)
|
||||
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
|
||||
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
|
||||
|
||||
/*
|
||||
* The active_mmu_pages list is the FIFO list, do not move the
|
||||
* page until it is zapped. kvm_zap_obsolete_pages depends on
|
||||
* this feature. See the comments in kvm_zap_obsolete_pages().
|
||||
*/
|
||||
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
|
||||
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
|
||||
return sp;
|
||||
@@ -2195,23 +2200,15 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
--kvm->stat.mmu_unsync;
|
||||
}
|
||||
|
||||
static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list);
|
||||
static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list);
|
||||
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
struct list_head *invalid_list);
|
||||
|
||||
/*
|
||||
* NOTE: we should pay more attention on the zapped-obsolete page
|
||||
* (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk
|
||||
* since it has been deleted from active_mmu_pages but still can be found
|
||||
* at hast list.
|
||||
*
|
||||
* for_each_valid_sp() has skipped that kind of pages.
|
||||
*/
|
||||
#define for_each_valid_sp(_kvm, _sp, _gfn) \
|
||||
hlist_for_each_entry(_sp, \
|
||||
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
|
||||
if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
|
||||
if ((_sp)->role.invalid) { \
|
||||
} else
|
||||
|
||||
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
|
||||
@@ -2231,18 +2228,28 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
|
||||
struct list_head *invalid_list,
|
||||
bool remote_flush)
|
||||
{
|
||||
if (!remote_flush && !list_empty(invalid_list))
|
||||
return false;
|
||||
|
||||
if (!list_empty(invalid_list))
|
||||
kvm_mmu_commit_zap_page(kvm, invalid_list);
|
||||
else
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu,
|
||||
struct list_head *invalid_list,
|
||||
bool remote_flush, bool local_flush)
|
||||
{
|
||||
if (!list_empty(invalid_list)) {
|
||||
kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list);
|
||||
if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush))
|
||||
return;
|
||||
}
|
||||
|
||||
if (remote_flush)
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
else if (local_flush)
|
||||
if (local_flush)
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
}
|
||||
|
||||
@@ -2253,11 +2260,6 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
|
||||
static void mmu_audit_disable(void) { }
|
||||
#endif
|
||||
|
||||
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
||||
}
|
||||
|
||||
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list)
|
||||
{
|
||||
@@ -2482,7 +2484,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
|
||||
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
|
||||
}
|
||||
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
|
||||
clear_page(sp->spt);
|
||||
trace_kvm_mmu_get_page(sp, true);
|
||||
|
||||
@@ -2668,17 +2669,22 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
|
||||
return zapped;
|
||||
}
|
||||
|
||||
static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list)
|
||||
static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
|
||||
struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list,
|
||||
int *nr_zapped)
|
||||
{
|
||||
int ret;
|
||||
bool list_unstable;
|
||||
|
||||
trace_kvm_mmu_prepare_zap_page(sp);
|
||||
++kvm->stat.mmu_shadow_zapped;
|
||||
ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
|
||||
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
|
||||
kvm_mmu_page_unlink_children(kvm, sp);
|
||||
kvm_mmu_unlink_parents(kvm, sp);
|
||||
|
||||
/* Zapping children means active_mmu_pages has become unstable. */
|
||||
list_unstable = *nr_zapped;
|
||||
|
||||
if (!sp->role.invalid && !sp->role.direct)
|
||||
unaccount_shadowed(kvm, sp);
|
||||
|
||||
@@ -2686,22 +2692,27 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
kvm_unlink_unsync_page(kvm, sp);
|
||||
if (!sp->root_count) {
|
||||
/* Count self */
|
||||
ret++;
|
||||
(*nr_zapped)++;
|
||||
list_move(&sp->link, invalid_list);
|
||||
kvm_mod_used_mmu_pages(kvm, -1);
|
||||
} else {
|
||||
list_move(&sp->link, &kvm->arch.active_mmu_pages);
|
||||
|
||||
/*
|
||||
* The obsolete pages can not be used on any vcpus.
|
||||
* See the comments in kvm_mmu_invalidate_zap_all_pages().
|
||||
*/
|
||||
if (!sp->role.invalid && !is_obsolete_sp(kvm, sp))
|
||||
if (!sp->role.invalid)
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
}
|
||||
|
||||
sp->role.invalid = 1;
|
||||
return ret;
|
||||
return list_unstable;
|
||||
}
|
||||
|
||||
static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
struct list_head *invalid_list)
|
||||
{
|
||||
int nr_zapped;
|
||||
|
||||
__kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped);
|
||||
return nr_zapped;
|
||||
}
|
||||
|
||||
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||
@@ -3703,7 +3714,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
|
||||
u64 *lm_root;
|
||||
|
||||
lm_root = (void*)get_zeroed_page(GFP_KERNEL);
|
||||
lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||
if (lm_root == NULL)
|
||||
return 1;
|
||||
|
||||
@@ -4204,14 +4215,6 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
|
||||
return false;
|
||||
|
||||
if (cached_root_available(vcpu, new_cr3, new_role)) {
|
||||
/*
|
||||
* It is possible that the cached previous root page is
|
||||
* obsolete because of a change in the MMU
|
||||
* generation number. However, that is accompanied by
|
||||
* KVM_REQ_MMU_RELOAD, which will free the root that we
|
||||
* have set here and allocate a new one.
|
||||
*/
|
||||
|
||||
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
|
||||
if (!skip_tlb_flush) {
|
||||
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
|
||||
@@ -5486,81 +5489,6 @@ void kvm_disable_tdp(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_disable_tdp);
|
||||
|
||||
static void free_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
free_page((unsigned long)vcpu->arch.mmu->pae_root);
|
||||
free_page((unsigned long)vcpu->arch.mmu->lm_root);
|
||||
}
|
||||
|
||||
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
if (tdp_enabled)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
|
||||
* Therefore we need to allocate shadow page tables in the first
|
||||
* 4GB of memory, which happens to fit the DMA32 zone.
|
||||
*/
|
||||
page = alloc_page(GFP_KERNEL | __GFP_DMA32);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.mmu->pae_root = page_address(page);
|
||||
for (i = 0; i < 4; ++i)
|
||||
vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint i;
|
||||
|
||||
vcpu->arch.mmu = &vcpu->arch.root_mmu;
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
|
||||
|
||||
vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.root_mmu.root_cr3 = 0;
|
||||
vcpu->arch.root_mmu.translate_gpa = translate_gpa;
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.guest_mmu.root_cr3 = 0;
|
||||
vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
||||
return alloc_mmu_pages(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
}
|
||||
|
||||
void kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
kvm_page_track_register_notifier(kvm, node);
|
||||
}
|
||||
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
kvm_page_track_unregister_notifier(kvm, node);
|
||||
}
|
||||
|
||||
/* The return value indicates if tlb flush on all vcpus is needed. */
|
||||
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
|
||||
@@ -5631,17 +5559,119 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
|
||||
}
|
||||
|
||||
static void free_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
free_page((unsigned long)vcpu->arch.mmu->pae_root);
|
||||
free_page((unsigned long)vcpu->arch.mmu->lm_root);
|
||||
}
|
||||
|
||||
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
if (tdp_enabled)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
|
||||
* Therefore we need to allocate shadow page tables in the first
|
||||
* 4GB of memory, which happens to fit the DMA32 zone.
|
||||
*/
|
||||
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.mmu->pae_root = page_address(page);
|
||||
for (i = 0; i < 4; ++i)
|
||||
vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint i;
|
||||
|
||||
vcpu->arch.mmu = &vcpu->arch.root_mmu;
|
||||
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
|
||||
|
||||
vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.root_mmu.root_cr3 = 0;
|
||||
vcpu->arch.root_mmu.translate_gpa = translate_gpa;
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
|
||||
vcpu->arch.guest_mmu.root_cr3 = 0;
|
||||
vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
|
||||
|
||||
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
|
||||
return alloc_mmu_pages(vcpu);
|
||||
}
|
||||
|
||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
LIST_HEAD(invalid_list);
|
||||
unsigned long i;
|
||||
bool flush;
|
||||
gfn_t gfn;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (list_empty(&kvm->arch.active_mmu_pages))
|
||||
goto out_unlock;
|
||||
|
||||
flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false);
|
||||
|
||||
for (i = 0; i < slot->npages; i++) {
|
||||
gfn = slot->base_gfn + i;
|
||||
|
||||
for_each_valid_sp(kvm, sp, gfn) {
|
||||
if (sp->gfn != gfn)
|
||||
continue;
|
||||
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
}
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
flush = false;
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
node->track_write = kvm_mmu_pte_write;
|
||||
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
|
||||
kvm_page_track_register_notifier(kvm, node);
|
||||
}
|
||||
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
kvm_page_track_unregister_notifier(kvm, node);
|
||||
}
|
||||
|
||||
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
bool flush_tlb = true;
|
||||
bool flush = false;
|
||||
int i;
|
||||
|
||||
if (kvm_available_flush_tlb_with_range())
|
||||
flush_tlb = false;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
@@ -5653,17 +5683,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
flush |= slot_handle_level_range(kvm, memslot,
|
||||
kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL,
|
||||
PT_MAX_HUGEPAGE_LEVEL, start,
|
||||
end - 1, flush_tlb);
|
||||
slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
|
||||
PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
|
||||
start, end - 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
|
||||
gfn_end - gfn_start + 1);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
@@ -5815,101 +5840,58 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
|
||||
|
||||
#define BATCH_ZAP_PAGES 10
|
||||
static void kvm_zap_obsolete_pages(struct kvm *kvm)
|
||||
static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
|
||||
{
|
||||
struct kvm_mmu_page *sp, *node;
|
||||
int batch = 0;
|
||||
LIST_HEAD(invalid_list);
|
||||
int ign;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe_reverse(sp, node,
|
||||
&kvm->arch.active_mmu_pages, link) {
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* No obsolete page exists before new created page since
|
||||
* active_mmu_pages is the FIFO list.
|
||||
*/
|
||||
if (!is_obsolete_sp(kvm, sp))
|
||||
break;
|
||||
|
||||
/*
|
||||
* Since we are reversely walking the list and the invalid
|
||||
* list will be moved to the head, skip the invalid page
|
||||
* can help us to avoid the infinity list walking.
|
||||
*/
|
||||
if (sp->role.invalid)
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||
if (mmio_only && !sp->mmio_cached)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Need not flush tlb since we only zap the sp with invalid
|
||||
* generation number.
|
||||
*/
|
||||
if (batch >= BATCH_ZAP_PAGES &&
|
||||
cond_resched_lock(&kvm->mmu_lock)) {
|
||||
batch = 0;
|
||||
if (sp->role.invalid && sp->root_count)
|
||||
continue;
|
||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
|
||||
WARN_ON_ONCE(mmio_only);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
ret = kvm_mmu_prepare_zap_page(kvm, sp,
|
||||
&kvm->arch.zapped_obsolete_pages);
|
||||
batch += ret;
|
||||
|
||||
if (ret)
|
||||
if (cond_resched_lock(&kvm->mmu_lock))
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should flush tlb before free page tables since lockless-walking
|
||||
* may use the pages.
|
||||
*/
|
||||
kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast invalidate all shadow pages and use lock-break technique
|
||||
* to zap obsolete pages.
|
||||
*
|
||||
* It's required when memslot is being deleted or VM is being
|
||||
* destroyed, in these cases, we should ensure that KVM MMU does
|
||||
* not use any resource of the being-deleted slot or all slots
|
||||
* after calling the function.
|
||||
*/
|
||||
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
|
||||
{
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
trace_kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
kvm->arch.mmu_valid_gen++;
|
||||
|
||||
/*
|
||||
* Notify all vcpus to reload its shadow page table
|
||||
* and flush TLB. Then all vcpus will switch to new
|
||||
* shadow page table with the new mmu_valid_gen.
|
||||
*
|
||||
* Note: we should do this under the protection of
|
||||
* mmu-lock, otherwise, vcpu would purge shadow page
|
||||
* but miss tlb flush.
|
||||
*/
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
|
||||
kvm_zap_obsolete_pages(kvm);
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
{
|
||||
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||
return __kvm_mmu_zap_all(kvm, false);
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
{
|
||||
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||
|
||||
gen &= MMIO_SPTE_GEN_MASK;
|
||||
|
||||
/*
|
||||
* The very rare case: if the generation-number is round,
|
||||
* Generation numbers are incremented in multiples of the number of
|
||||
* address spaces in order to provide unique generations across all
|
||||
* address spaces. Strip what is effectively the address space
|
||||
* modifier prior to checking for a wrap of the MMIO generation so
|
||||
* that a wrap in any address space is detected.
|
||||
*/
|
||||
gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
|
||||
|
||||
/*
|
||||
* The very rare case: if the MMIO generation number has wrapped,
|
||||
* zap all shadow pages.
|
||||
*/
|
||||
if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
|
||||
if (unlikely(gen == 0)) {
|
||||
kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
__kvm_mmu_zap_all(kvm, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5940,24 +5922,16 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
* want to shrink a VM that only started to populate its MMU
|
||||
* anyway.
|
||||
*/
|
||||
if (!kvm->arch.n_used_mmu_pages &&
|
||||
!kvm_has_zapped_obsolete_pages(kvm))
|
||||
if (!kvm->arch.n_used_mmu_pages)
|
||||
continue;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvm_has_zapped_obsolete_pages(kvm)) {
|
||||
kvm_mmu_commit_zap_page(kvm,
|
||||
&kvm->arch.zapped_obsolete_pages);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
|
||||
freed++;
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
|
||||
unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
|
@@ -203,7 +203,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
return -(u32)fault & errcode;
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
|
||||
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
|
||||
|
||||
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
|
@@ -8,18 +8,16 @@
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvmmmu
|
||||
|
||||
#define KVM_MMU_PAGE_FIELDS \
|
||||
__field(unsigned long, mmu_valid_gen) \
|
||||
__field(__u64, gfn) \
|
||||
__field(__u32, role) \
|
||||
__field(__u32, root_count) \
|
||||
#define KVM_MMU_PAGE_FIELDS \
|
||||
__field(__u64, gfn) \
|
||||
__field(__u32, role) \
|
||||
__field(__u32, root_count) \
|
||||
__field(bool, unsync)
|
||||
|
||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||
__entry->mmu_valid_gen = sp->mmu_valid_gen; \
|
||||
__entry->gfn = sp->gfn; \
|
||||
__entry->role = sp->role.word; \
|
||||
__entry->root_count = sp->root_count; \
|
||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||
__entry->gfn = sp->gfn; \
|
||||
__entry->role = sp->role.word; \
|
||||
__entry->root_count = sp->root_count; \
|
||||
__entry->unsync = sp->unsync;
|
||||
|
||||
#define KVM_MMU_PAGE_PRINTK() ({ \
|
||||
@@ -31,9 +29,8 @@
|
||||
\
|
||||
role.word = __entry->role; \
|
||||
\
|
||||
trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \
|
||||
trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \
|
||||
" %snxe %sad root %u %s%c", \
|
||||
__entry->mmu_valid_gen, \
|
||||
__entry->gfn, role.level, \
|
||||
role.cr4_pae ? " pae" : "", \
|
||||
role.quadrant, \
|
||||
@@ -282,27 +279,6 @@ TRACE_EVENT(
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(
|
||||
kvm_mmu_invalidate_zap_all_pages,
|
||||
TP_PROTO(struct kvm *kvm),
|
||||
TP_ARGS(kvm),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, mmu_valid_gen)
|
||||
__field(unsigned int, mmu_used_pages)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
|
||||
__entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
|
||||
),
|
||||
|
||||
TP_printk("kvm-mmu-valid-gen %lx used_pages %x",
|
||||
__entry->mmu_valid_gen, __entry->mmu_used_pages
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
TRACE_EVENT(
|
||||
check_mmio_spte,
|
||||
TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
|
||||
|
@@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
|
||||
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
|
||||
slot->arch.gfn_track[i] =
|
||||
kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!slot->arch.gfn_track[i])
|
||||
goto track_free;
|
||||
}
|
||||
|
@@ -145,7 +145,6 @@ struct kvm_svm {
|
||||
|
||||
/* Struct members for AVIC */
|
||||
u32 avic_vm_id;
|
||||
u32 ldr_mode;
|
||||
struct page *avic_logical_id_table_page;
|
||||
struct page *avic_physical_id_table_page;
|
||||
struct hlist_node hnode;
|
||||
@@ -236,6 +235,7 @@ struct vcpu_svm {
|
||||
bool nrips_enabled : 1;
|
||||
|
||||
u32 ldr_reg;
|
||||
u32 dfr_reg;
|
||||
struct page *avic_backing_page;
|
||||
u64 *avic_physical_id_cache;
|
||||
bool avic_is_running;
|
||||
@@ -1795,9 +1795,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
|
||||
/* Avoid using vmalloc for smaller buffers. */
|
||||
size = npages * sizeof(struct page *);
|
||||
if (size > PAGE_SIZE)
|
||||
pages = vmalloc(size);
|
||||
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
|
||||
PAGE_KERNEL);
|
||||
else
|
||||
pages = kmalloc(size, GFP_KERNEL);
|
||||
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
|
||||
if (!pages)
|
||||
return NULL;
|
||||
@@ -1865,7 +1866,9 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
|
||||
|
||||
static struct kvm *svm_vm_alloc(void)
|
||||
{
|
||||
struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
|
||||
struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
|
||||
GFP_KERNEL_ACCOUNT | __GFP_ZERO,
|
||||
PAGE_KERNEL);
|
||||
return &kvm_svm->kvm;
|
||||
}
|
||||
|
||||
@@ -1940,7 +1943,7 @@ static int avic_vm_init(struct kvm *kvm)
|
||||
return 0;
|
||||
|
||||
/* Allocating physical APIC ID table (4KB) */
|
||||
p_page = alloc_page(GFP_KERNEL);
|
||||
p_page = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!p_page)
|
||||
goto free_avic;
|
||||
|
||||
@@ -1948,7 +1951,7 @@ static int avic_vm_init(struct kvm *kvm)
|
||||
clear_page(page_address(p_page));
|
||||
|
||||
/* Allocating logical APIC ID table (4KB) */
|
||||
l_page = alloc_page(GFP_KERNEL);
|
||||
l_page = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!l_page)
|
||||
goto free_avic;
|
||||
|
||||
@@ -2106,6 +2109,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
|
||||
|
||||
INIT_LIST_HEAD(&svm->ir_list);
|
||||
spin_lock_init(&svm->ir_list_lock);
|
||||
svm->dfr_reg = APIC_DFR_FLAT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2119,13 +2123,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
struct page *nested_msrpm_pages;
|
||||
int err;
|
||||
|
||||
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!svm) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
|
||||
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!svm->vcpu.arch.guest_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
|
||||
err = -ENOMEM;
|
||||
@@ -2137,19 +2142,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
goto free_svm;
|
||||
|
||||
err = -ENOMEM;
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
page = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!page)
|
||||
goto uninit;
|
||||
|
||||
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
|
||||
msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
|
||||
if (!msrpm_pages)
|
||||
goto free_page1;
|
||||
|
||||
nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
|
||||
nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
|
||||
if (!nested_msrpm_pages)
|
||||
goto free_page2;
|
||||
|
||||
hsave_page = alloc_page(GFP_KERNEL);
|
||||
hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!hsave_page)
|
||||
goto free_page3;
|
||||
|
||||
@@ -4565,8 +4570,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
|
||||
return &logical_apic_id_table[index];
|
||||
}
|
||||
|
||||
static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
|
||||
bool valid)
|
||||
static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
|
||||
{
|
||||
bool flat;
|
||||
u32 *entry, new_entry;
|
||||
@@ -4579,31 +4583,39 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
|
||||
new_entry = READ_ONCE(*entry);
|
||||
new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
|
||||
new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
|
||||
if (valid)
|
||||
new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
|
||||
else
|
||||
new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
|
||||
new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
|
||||
WRITE_ONCE(*entry, new_entry);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
bool flat = svm->dfr_reg == APIC_DFR_FLAT;
|
||||
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
|
||||
|
||||
if (entry)
|
||||
WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
|
||||
}
|
||||
|
||||
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
|
||||
|
||||
if (!ldr)
|
||||
return 1;
|
||||
if (ldr == svm->ldr_reg)
|
||||
return 0;
|
||||
|
||||
ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
|
||||
if (ret && svm->ldr_reg) {
|
||||
avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
|
||||
svm->ldr_reg = 0;
|
||||
} else {
|
||||
avic_invalidate_logical_id_entry(vcpu);
|
||||
|
||||
if (ldr)
|
||||
ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
|
||||
|
||||
if (!ret)
|
||||
svm->ldr_reg = ldr;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -4637,27 +4649,16 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
|
||||
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
||||
u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
|
||||
u32 mod = (dfr >> 28) & 0xf;
|
||||
|
||||
/*
|
||||
* We assume that all local APICs are using the same type.
|
||||
* If this changes, we need to flush the AVIC logical
|
||||
* APID id table.
|
||||
*/
|
||||
if (kvm_svm->ldr_mode == mod)
|
||||
return 0;
|
||||
if (svm->dfr_reg == dfr)
|
||||
return;
|
||||
|
||||
clear_page(page_address(kvm_svm->avic_logical_id_table_page));
|
||||
kvm_svm->ldr_mode = mod;
|
||||
|
||||
if (svm->ldr_reg)
|
||||
avic_handle_ldr_update(vcpu);
|
||||
return 0;
|
||||
avic_invalidate_logical_id_entry(vcpu);
|
||||
svm->dfr_reg = dfr;
|
||||
}
|
||||
|
||||
static int avic_unaccel_trap_write(struct vcpu_svm *svm)
|
||||
@@ -5125,11 +5126,11 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
if (!kvm_vcpu_apicv_active(&svm->vcpu))
|
||||
return;
|
||||
|
||||
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
|
||||
mark_dirty(vmcb, VMCB_INTR);
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
else
|
||||
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
|
||||
mark_dirty(vmcb, VMCB_AVIC);
|
||||
}
|
||||
|
||||
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
|
||||
@@ -5195,7 +5196,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
||||
* Allocating new amd_iommu_pi_data, which will get
|
||||
* add to the per-vcpu ir_list.
|
||||
*/
|
||||
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
|
||||
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
|
||||
if (!ir) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -6163,8 +6164,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (avic_handle_apic_id_update(vcpu) != 0)
|
||||
return;
|
||||
if (avic_handle_dfr_update(vcpu) != 0)
|
||||
return;
|
||||
avic_handle_dfr_update(vcpu);
|
||||
avic_handle_ldr_update(vcpu);
|
||||
}
|
||||
|
||||
@@ -6311,7 +6311,7 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6361,7 +6361,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
||||
return -EFAULT;
|
||||
|
||||
start = kzalloc(sizeof(*start), GFP_KERNEL);
|
||||
start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
|
||||
if (!start)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6458,7 +6458,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
||||
return -EFAULT;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6535,7 +6535,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (copy_from_user(¶ms, measure, sizeof(params)))
|
||||
return -EFAULT;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6597,7 +6597,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (!sev_guest(kvm))
|
||||
return -ENOTTY;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6618,7 +6618,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (!sev_guest(kvm))
|
||||
return -ENOTTY;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6646,7 +6646,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
|
||||
struct sev_data_dbg *data;
|
||||
int ret;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -6901,7 +6901,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
goto e_unpin_memory;
|
||||
|
||||
@@ -7007,7 +7007,7 @@ static int svm_register_enc_region(struct kvm *kvm,
|
||||
if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
region = kzalloc(sizeof(*region), GFP_KERNEL);
|
||||
region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
|
||||
if (!region)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@@ -211,7 +211,6 @@ static void free_nested(struct kvm_vcpu *vcpu)
|
||||
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
|
||||
return;
|
||||
|
||||
hrtimer_cancel(&vmx->nested.preemption_timer);
|
||||
vmx->nested.vmxon = false;
|
||||
vmx->nested.smm.vmxon = false;
|
||||
free_vpid(vmx->nested.vpid02);
|
||||
@@ -274,6 +273,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
|
||||
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu_load(vcpu);
|
||||
vmx_leave_nested(vcpu);
|
||||
vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
|
||||
free_nested(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
@@ -1979,17 +1979,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
||||
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
|
||||
prepare_vmcs02_early_full(vmx, vmcs12);
|
||||
|
||||
/*
|
||||
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
|
||||
* entry, but only if the current (host) sp changed from the value
|
||||
* we wrote last (vmx->host_rsp). This cache is no longer relevant
|
||||
* if we switch vmcs, and rather than hold a separate cache per vmcs,
|
||||
* here we just force the write to happen on entry. host_rsp will
|
||||
* also be written unconditionally by nested_vmx_check_vmentry_hw()
|
||||
* if we are doing early consistency checks via hardware.
|
||||
*/
|
||||
vmx->host_rsp = 0;
|
||||
|
||||
/*
|
||||
* PIN CONTROLS
|
||||
*/
|
||||
@@ -2289,10 +2278,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
}
|
||||
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
|
||||
|
||||
vmx->nested.preemption_timer_expired = false;
|
||||
if (nested_cpu_has_preemption_timer(vmcs12))
|
||||
vmx_start_preemption_timer(vcpu);
|
||||
|
||||
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
|
||||
* bitwise-or of what L1 wants to trap for L2, and what we want to
|
||||
* trap. Note that CR0.TS also needs updating - we do this later.
|
||||
@@ -2722,6 +2707,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
unsigned long cr3, cr4;
|
||||
bool vm_fail;
|
||||
|
||||
if (!nested_early_check)
|
||||
return 0;
|
||||
@@ -2755,29 +2741,34 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
||||
vmx->loaded_vmcs->host_state.cr4 = cr4;
|
||||
}
|
||||
|
||||
vmx->__launched = vmx->loaded_vmcs->launched;
|
||||
|
||||
asm(
|
||||
/* Set HOST_RSP */
|
||||
"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
|
||||
__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
|
||||
"mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t"
|
||||
"cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
|
||||
"je 1f \n\t"
|
||||
__ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
|
||||
"mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
|
||||
"1: \n\t"
|
||||
"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
|
||||
|
||||
/* Check if vmlaunch or vmresume is needed */
|
||||
"cmpl $0, %c[launched](%% " _ASM_CX")\n\t"
|
||||
"cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
|
||||
|
||||
/*
|
||||
* VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
|
||||
* RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
|
||||
* Valid. vmx_vmenter() directly "returns" RFLAGS, and so the
|
||||
* results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
|
||||
*/
|
||||
"call vmx_vmenter\n\t"
|
||||
|
||||
/* Set vmx->fail accordingly */
|
||||
"setbe %c[fail](%% " _ASM_CX")\n\t"
|
||||
: ASM_CALL_CONSTRAINT
|
||||
: "c"(vmx), "d"((unsigned long)HOST_RSP),
|
||||
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
|
||||
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
|
||||
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
|
||||
CC_SET(be)
|
||||
: ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
|
||||
: [HOST_RSP]"r"((unsigned long)HOST_RSP),
|
||||
[loaded_vmcs]"r"(vmx->loaded_vmcs),
|
||||
[launched]"i"(offsetof(struct loaded_vmcs, launched)),
|
||||
[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
|
||||
[wordsize]"i"(sizeof(ulong))
|
||||
: "rax", "cc", "memory"
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
preempt_enable();
|
||||
@@ -2787,10 +2778,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
||||
if (vmx->msr_autoload.guest.nr)
|
||||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
||||
|
||||
if (vmx->fail) {
|
||||
if (vm_fail) {
|
||||
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
|
||||
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
||||
vmx->fail = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -2813,8 +2803,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
||||
|
||||
return 0;
|
||||
}
|
||||
STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
|
||||
|
||||
|
||||
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12);
|
||||
@@ -3030,6 +3018,15 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
|
||||
if (unlikely(evaluate_pending_interrupts))
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
/*
|
||||
* Do not start the preemption timer hrtimer until after we know
|
||||
* we are successful, so that only nested_vmx_vmexit needs to cancel
|
||||
* the timer.
|
||||
*/
|
||||
vmx->nested.preemption_timer_expired = false;
|
||||
if (nested_cpu_has_preemption_timer(vmcs12))
|
||||
vmx_start_preemption_timer(vcpu);
|
||||
|
||||
/*
|
||||
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
|
||||
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
|
||||
@@ -3450,13 +3447,10 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
||||
else
|
||||
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
|
||||
|
||||
if (nested_cpu_has_preemption_timer(vmcs12)) {
|
||||
if (vmcs12->vm_exit_controls &
|
||||
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
|
||||
if (nested_cpu_has_preemption_timer(vmcs12) &&
|
||||
vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
|
||||
vmcs12->vmx_preemption_timer_value =
|
||||
vmx_get_preemption_timer_value(vcpu);
|
||||
hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
|
||||
}
|
||||
|
||||
/*
|
||||
* In some cases (usually, nested EPT), L2 is allowed to change its
|
||||
@@ -3864,6 +3858,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
|
||||
leave_guest_mode(vcpu);
|
||||
|
||||
if (nested_cpu_has_preemption_timer(vmcs12))
|
||||
hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
|
||||
|
||||
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
|
||||
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
|
||||
|
||||
@@ -3915,9 +3912,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
vmx_flush_tlb(vcpu, true);
|
||||
}
|
||||
|
||||
/* This is needed for same reason as it was needed in prepare_vmcs02 */
|
||||
vmx->host_rsp = 0;
|
||||
|
||||
/* Unpin physical memory we referred to in vmcs02 */
|
||||
if (vmx->nested.apic_access_page) {
|
||||
kvm_release_page_dirty(vmx->nested.apic_access_page);
|
||||
@@ -4035,25 +4029,50 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
|
||||
/* Addr = segment_base + offset */
|
||||
/* offset = base + [index * scale] + displacement */
|
||||
off = exit_qualification; /* holds the displacement */
|
||||
if (addr_size == 1)
|
||||
off = (gva_t)sign_extend64(off, 31);
|
||||
else if (addr_size == 0)
|
||||
off = (gva_t)sign_extend64(off, 15);
|
||||
if (base_is_valid)
|
||||
off += kvm_register_read(vcpu, base_reg);
|
||||
if (index_is_valid)
|
||||
off += kvm_register_read(vcpu, index_reg)<<scaling;
|
||||
vmx_get_segment(vcpu, &s, seg_reg);
|
||||
*ret = s.base + off;
|
||||
|
||||
/*
|
||||
* The effective address, i.e. @off, of a memory operand is truncated
|
||||
* based on the address size of the instruction. Note that this is
|
||||
* the *effective address*, i.e. the address prior to accounting for
|
||||
* the segment's base.
|
||||
*/
|
||||
if (addr_size == 1) /* 32 bit */
|
||||
*ret &= 0xffffffff;
|
||||
off &= 0xffffffff;
|
||||
else if (addr_size == 0) /* 16 bit */
|
||||
off &= 0xffff;
|
||||
|
||||
/* Checks for #GP/#SS exceptions. */
|
||||
exn = false;
|
||||
if (is_long_mode(vcpu)) {
|
||||
/*
|
||||
* The virtual/linear address is never truncated in 64-bit
|
||||
* mode, e.g. a 32-bit address size can yield a 64-bit virtual
|
||||
* address when using FS/GS with a non-zero base.
|
||||
*/
|
||||
*ret = s.base + off;
|
||||
|
||||
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
|
||||
* non-canonical form. This is the only check on the memory
|
||||
* destination for long mode!
|
||||
*/
|
||||
exn = is_noncanonical_address(*ret, vcpu);
|
||||
} else if (is_protmode(vcpu)) {
|
||||
} else {
|
||||
/*
|
||||
* When not in long mode, the virtual/linear address is
|
||||
* unconditionally truncated to 32 bits regardless of the
|
||||
* address size.
|
||||
*/
|
||||
*ret = (s.base + off) & 0xffffffff;
|
||||
|
||||
/* Protected mode: apply checks for segment validity in the
|
||||
* following order:
|
||||
* - segment type check (#GP(0) may be thrown)
|
||||
@@ -4077,10 +4096,16 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
|
||||
/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
|
||||
*/
|
||||
exn = (s.unusable != 0);
|
||||
/* Protected mode: #GP(0)/#SS(0) if the memory
|
||||
* operand is outside the segment limit.
|
||||
|
||||
/*
|
||||
* Protected mode: #GP(0)/#SS(0) if the memory operand is
|
||||
* outside the segment limit. All CPUs that support VMX ignore
|
||||
* limit checks for flat segments, i.e. segments with base==0,
|
||||
* limit==0xffffffff and of type expand-up data or code.
|
||||
*/
|
||||
exn = exn || (off + sizeof(u64) > s.limit);
|
||||
if (!(s.base == 0 && s.limit == 0xffffffff &&
|
||||
((s.type & 8) || !(s.type & 4))))
|
||||
exn = exn || (off + sizeof(u64) > s.limit);
|
||||
}
|
||||
if (exn) {
|
||||
kvm_queue_exception_e(vcpu,
|
||||
@@ -4145,11 +4170,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
|
||||
if (r < 0)
|
||||
goto out_vmcs02;
|
||||
|
||||
vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
|
||||
vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->nested.cached_vmcs12)
|
||||
goto out_cached_vmcs12;
|
||||
|
||||
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
|
||||
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->nested.cached_shadow_vmcs12)
|
||||
goto out_cached_shadow_vmcs12;
|
||||
|
||||
@@ -5696,6 +5721,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
|
||||
enable_shadow_vmcs = 0;
|
||||
if (enable_shadow_vmcs) {
|
||||
for (i = 0; i < VMX_BITMAP_NR; i++) {
|
||||
/*
|
||||
* The vmx_bitmap is not tied to a VM and so should
|
||||
* not be charged to a memcg.
|
||||
*/
|
||||
vmx_bitmap[i] = (unsigned long *)
|
||||
__get_free_page(GFP_KERNEL);
|
||||
if (!vmx_bitmap[i]) {
|
||||
|
@@ -34,6 +34,7 @@ struct vmcs_host_state {
|
||||
unsigned long cr4; /* May not match real cr4 */
|
||||
unsigned long gs_base;
|
||||
unsigned long fs_base;
|
||||
unsigned long rsp;
|
||||
|
||||
u16 fs_sel, gs_sel, ldt_sel;
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@@ -1,6 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <asm/kvm_vcpu_regs.h>
|
||||
|
||||
#define WORD_SIZE (BITS_PER_LONG / 8)
|
||||
|
||||
#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE
|
||||
#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
|
||||
#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
|
||||
#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
|
||||
/* Intentionally omit RSP as it's context switched by hardware */
|
||||
#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
|
||||
#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
|
||||
#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
|
||||
#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
|
||||
#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
|
||||
#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
|
||||
#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
|
||||
#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
|
||||
#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
|
||||
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
@@ -55,3 +79,146 @@ ENDPROC(vmx_vmenter)
|
||||
ENTRY(vmx_vmexit)
|
||||
ret
|
||||
ENDPROC(vmx_vmexit)
|
||||
|
||||
/**
|
||||
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
|
||||
* @vmx: struct vcpu_vmx *
|
||||
* @regs: unsigned long * (to guest registers)
|
||||
* @launched: %true if the VMCS has been launched
|
||||
*
|
||||
* Returns:
|
||||
* 0 on VM-Exit, 1 on VM-Fail
|
||||
*/
|
||||
ENTRY(__vmx_vcpu_run)
|
||||
push %_ASM_BP
|
||||
mov %_ASM_SP, %_ASM_BP
|
||||
#ifdef CONFIG_X86_64
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
#else
|
||||
push %edi
|
||||
push %esi
|
||||
#endif
|
||||
push %_ASM_BX
|
||||
|
||||
/*
|
||||
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
|
||||
* @regs is needed after VM-Exit to save the guest's register values.
|
||||
*/
|
||||
push %_ASM_ARG2
|
||||
|
||||
/* Copy @launched to BL, _ASM_ARG3 is volatile. */
|
||||
mov %_ASM_ARG3B, %bl
|
||||
|
||||
/* Adjust RSP to account for the CALL to vmx_vmenter(). */
|
||||
lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
|
||||
call vmx_update_host_rsp
|
||||
|
||||
/* Load @regs to RAX. */
|
||||
mov (%_ASM_SP), %_ASM_AX
|
||||
|
||||
/* Check if vmlaunch or vmresume is needed */
|
||||
cmpb $0, %bl
|
||||
|
||||
/* Load guest registers. Don't clobber flags. */
|
||||
mov VCPU_RBX(%_ASM_AX), %_ASM_BX
|
||||
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
|
||||
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
|
||||
mov VCPU_RSI(%_ASM_AX), %_ASM_SI
|
||||
mov VCPU_RDI(%_ASM_AX), %_ASM_DI
|
||||
mov VCPU_RBP(%_ASM_AX), %_ASM_BP
|
||||
#ifdef CONFIG_X86_64
|
||||
mov VCPU_R8 (%_ASM_AX), %r8
|
||||
mov VCPU_R9 (%_ASM_AX), %r9
|
||||
mov VCPU_R10(%_ASM_AX), %r10
|
||||
mov VCPU_R11(%_ASM_AX), %r11
|
||||
mov VCPU_R12(%_ASM_AX), %r12
|
||||
mov VCPU_R13(%_ASM_AX), %r13
|
||||
mov VCPU_R14(%_ASM_AX), %r14
|
||||
mov VCPU_R15(%_ASM_AX), %r15
|
||||
#endif
|
||||
/* Load guest RAX. This kills the vmx_vcpu pointer! */
|
||||
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
|
||||
|
||||
/* Enter guest mode */
|
||||
call vmx_vmenter
|
||||
|
||||
/* Jump on VM-Fail. */
|
||||
jbe 2f
|
||||
|
||||
/* Temporarily save guest's RAX. */
|
||||
push %_ASM_AX
|
||||
|
||||
/* Reload @regs to RAX. */
|
||||
mov WORD_SIZE(%_ASM_SP), %_ASM_AX
|
||||
|
||||
/* Save all guest registers, including RAX from the stack */
|
||||
__ASM_SIZE(pop) VCPU_RAX(%_ASM_AX)
|
||||
mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
|
||||
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
|
||||
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
|
||||
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
|
||||
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
|
||||
mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
|
||||
#ifdef CONFIG_X86_64
|
||||
mov %r8, VCPU_R8 (%_ASM_AX)
|
||||
mov %r9, VCPU_R9 (%_ASM_AX)
|
||||
mov %r10, VCPU_R10(%_ASM_AX)
|
||||
mov %r11, VCPU_R11(%_ASM_AX)
|
||||
mov %r12, VCPU_R12(%_ASM_AX)
|
||||
mov %r13, VCPU_R13(%_ASM_AX)
|
||||
mov %r14, VCPU_R14(%_ASM_AX)
|
||||
mov %r15, VCPU_R15(%_ASM_AX)
|
||||
#endif
|
||||
|
||||
/* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
|
||||
xor %eax, %eax
|
||||
|
||||
/*
|
||||
* Clear all general purpose registers except RSP and RAX to prevent
|
||||
* speculative use of the guest's values, even those that are reloaded
|
||||
* via the stack. In theory, an L1 cache miss when restoring registers
|
||||
* could lead to speculative execution with the guest's values.
|
||||
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
|
||||
* free. RSP and RAX are exempt as RSP is restored by hardware during
|
||||
* VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
|
||||
*/
|
||||
1: xor %ebx, %ebx
|
||||
xor %ecx, %ecx
|
||||
xor %edx, %edx
|
||||
xor %esi, %esi
|
||||
xor %edi, %edi
|
||||
xor %ebp, %ebp
|
||||
#ifdef CONFIG_X86_64
|
||||
xor %r8d, %r8d
|
||||
xor %r9d, %r9d
|
||||
xor %r10d, %r10d
|
||||
xor %r11d, %r11d
|
||||
xor %r12d, %r12d
|
||||
xor %r13d, %r13d
|
||||
xor %r14d, %r14d
|
||||
xor %r15d, %r15d
|
||||
#endif
|
||||
|
||||
/* "POP" @regs. */
|
||||
add $WORD_SIZE, %_ASM_SP
|
||||
pop %_ASM_BX
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
#else
|
||||
pop %esi
|
||||
pop %edi
|
||||
#endif
|
||||
pop %_ASM_BP
|
||||
ret
|
||||
|
||||
/* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
|
||||
2: mov $1, %eax
|
||||
jmp 1b
|
||||
ENDPROC(__vmx_vcpu_run)
|
||||
|
@@ -246,6 +246,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
|
||||
|
||||
if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
|
||||
!boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
|
||||
/*
|
||||
* This allocation for vmx_l1d_flush_pages is not tied to a VM
|
||||
* lifetime and so should not be charged to a memcg.
|
||||
*/
|
||||
page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
@@ -2387,13 +2391,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
|
||||
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
|
||||
{
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *pages;
|
||||
struct vmcs *vmcs;
|
||||
|
||||
pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
|
||||
pages = __alloc_pages_node(node, flags, vmcs_config.order);
|
||||
if (!pages)
|
||||
return NULL;
|
||||
vmcs = page_address(pages);
|
||||
@@ -2440,7 +2444,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
|
||||
loaded_vmcs_init(loaded_vmcs);
|
||||
|
||||
if (cpu_has_vmx_msr_bitmap()) {
|
||||
loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
|
||||
loaded_vmcs->msr_bitmap = (unsigned long *)
|
||||
__get_free_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!loaded_vmcs->msr_bitmap)
|
||||
goto out_vmcs;
|
||||
memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
|
||||
@@ -2481,7 +2486,7 @@ static __init int alloc_kvm_area(void)
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct vmcs *vmcs;
|
||||
|
||||
vmcs = alloc_vmcs_cpu(false, cpu);
|
||||
vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
|
||||
if (!vmcs) {
|
||||
free_kvm_area();
|
||||
return -ENOMEM;
|
||||
@@ -6360,150 +6365,15 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
||||
vmx->loaded_vmcs->hv_timer_armed = false;
|
||||
}
|
||||
|
||||
static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
|
||||
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
|
||||
{
|
||||
unsigned long evmcs_rsp;
|
||||
|
||||
vmx->__launched = vmx->loaded_vmcs->launched;
|
||||
|
||||
evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
|
||||
(unsigned long)¤t_evmcs->host_rsp : 0;
|
||||
|
||||
if (static_branch_unlikely(&vmx_l1d_should_flush))
|
||||
vmx_l1d_flush(vcpu);
|
||||
|
||||
asm(
|
||||
/* Store host registers */
|
||||
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
|
||||
"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
|
||||
"push %%" _ASM_CX " \n\t"
|
||||
"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
|
||||
"cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
|
||||
"je 1f \n\t"
|
||||
"mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
|
||||
/* Avoid VMWRITE when Enlightened VMCS is in use */
|
||||
"test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
|
||||
"jz 2f \n\t"
|
||||
"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
|
||||
"jmp 1f \n\t"
|
||||
"2: \n\t"
|
||||
__ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
|
||||
"1: \n\t"
|
||||
"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
|
||||
|
||||
/* Reload cr2 if changed */
|
||||
"mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t"
|
||||
"mov %%cr2, %%" _ASM_DX " \n\t"
|
||||
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
|
||||
"je 3f \n\t"
|
||||
"mov %%" _ASM_AX", %%cr2 \n\t"
|
||||
"3: \n\t"
|
||||
/* Check if vmlaunch or vmresume is needed */
|
||||
"cmpl $0, %c[launched](%%" _ASM_CX ") \n\t"
|
||||
/* Load guest registers. Don't clobber flags. */
|
||||
"mov %c[rax](%%" _ASM_CX "), %%" _ASM_AX " \n\t"
|
||||
"mov %c[rbx](%%" _ASM_CX "), %%" _ASM_BX " \n\t"
|
||||
"mov %c[rdx](%%" _ASM_CX "), %%" _ASM_DX " \n\t"
|
||||
"mov %c[rsi](%%" _ASM_CX "), %%" _ASM_SI " \n\t"
|
||||
"mov %c[rdi](%%" _ASM_CX "), %%" _ASM_DI " \n\t"
|
||||
"mov %c[rbp](%%" _ASM_CX "), %%" _ASM_BP " \n\t"
|
||||
#ifdef CONFIG_X86_64
|
||||
"mov %c[r8](%%" _ASM_CX "), %%r8 \n\t"
|
||||
"mov %c[r9](%%" _ASM_CX "), %%r9 \n\t"
|
||||
"mov %c[r10](%%" _ASM_CX "), %%r10 \n\t"
|
||||
"mov %c[r11](%%" _ASM_CX "), %%r11 \n\t"
|
||||
"mov %c[r12](%%" _ASM_CX "), %%r12 \n\t"
|
||||
"mov %c[r13](%%" _ASM_CX "), %%r13 \n\t"
|
||||
"mov %c[r14](%%" _ASM_CX "), %%r14 \n\t"
|
||||
"mov %c[r15](%%" _ASM_CX "), %%r15 \n\t"
|
||||
#endif
|
||||
/* Load guest RCX. This kills the vmx_vcpu pointer! */
|
||||
"mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t"
|
||||
|
||||
/* Enter guest mode */
|
||||
"call vmx_vmenter\n\t"
|
||||
|
||||
/* Save guest's RCX to the stack placeholder (see above) */
|
||||
"mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t"
|
||||
|
||||
/* Load host's RCX, i.e. the vmx_vcpu pointer */
|
||||
"pop %%" _ASM_CX " \n\t"
|
||||
|
||||
/* Set vmx->fail based on EFLAGS.{CF,ZF} */
|
||||
"setbe %c[fail](%%" _ASM_CX ")\n\t"
|
||||
|
||||
/* Save all guest registers, including RCX from the stack */
|
||||
"mov %%" _ASM_AX ", %c[rax](%%" _ASM_CX ") \n\t"
|
||||
"mov %%" _ASM_BX ", %c[rbx](%%" _ASM_CX ") \n\t"
|
||||
__ASM_SIZE(pop) " %c[rcx](%%" _ASM_CX ") \n\t"
|
||||
"mov %%" _ASM_DX ", %c[rdx](%%" _ASM_CX ") \n\t"
|
||||
"mov %%" _ASM_SI ", %c[rsi](%%" _ASM_CX ") \n\t"
|
||||
"mov %%" _ASM_DI ", %c[rdi](%%" _ASM_CX ") \n\t"
|
||||
"mov %%" _ASM_BP ", %c[rbp](%%" _ASM_CX ") \n\t"
|
||||
#ifdef CONFIG_X86_64
|
||||
"mov %%r8, %c[r8](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r9, %c[r9](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r10, %c[r10](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r11, %c[r11](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r12, %c[r12](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r13, %c[r13](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r14, %c[r14](%%" _ASM_CX ") \n\t"
|
||||
"mov %%r15, %c[r15](%%" _ASM_CX ") \n\t"
|
||||
/*
|
||||
* Clear host registers marked as clobbered to prevent
|
||||
* speculative use.
|
||||
*/
|
||||
"xor %%r8d, %%r8d \n\t"
|
||||
"xor %%r9d, %%r9d \n\t"
|
||||
"xor %%r10d, %%r10d \n\t"
|
||||
"xor %%r11d, %%r11d \n\t"
|
||||
"xor %%r12d, %%r12d \n\t"
|
||||
"xor %%r13d, %%r13d \n\t"
|
||||
"xor %%r14d, %%r14d \n\t"
|
||||
"xor %%r15d, %%r15d \n\t"
|
||||
#endif
|
||||
"mov %%cr2, %%" _ASM_AX " \n\t"
|
||||
"mov %%" _ASM_AX ", %c[cr2](%%" _ASM_CX ") \n\t"
|
||||
|
||||
"xor %%eax, %%eax \n\t"
|
||||
"xor %%ebx, %%ebx \n\t"
|
||||
"xor %%esi, %%esi \n\t"
|
||||
"xor %%edi, %%edi \n\t"
|
||||
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
|
||||
: ASM_CALL_CONSTRAINT
|
||||
: "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
|
||||
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
|
||||
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
|
||||
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
|
||||
[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
|
||||
[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
|
||||
[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
|
||||
[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
|
||||
[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
|
||||
[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
|
||||
[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
|
||||
#ifdef CONFIG_X86_64
|
||||
[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
|
||||
[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
|
||||
[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
|
||||
[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
|
||||
[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
|
||||
[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
|
||||
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
|
||||
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
|
||||
#endif
|
||||
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
|
||||
[wordsize]"i"(sizeof(ulong))
|
||||
: "cc", "memory"
|
||||
#ifdef CONFIG_X86_64
|
||||
, "rax", "rbx", "rdi"
|
||||
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
|
||||
#else
|
||||
, "eax", "ebx", "edi"
|
||||
#endif
|
||||
);
|
||||
if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
|
||||
vmx->loaded_vmcs->host_state.rsp = host_rsp;
|
||||
vmcs_writel(HOST_RSP, host_rsp);
|
||||
}
|
||||
}
|
||||
STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
|
||||
|
||||
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
|
||||
|
||||
static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@@ -6572,7 +6442,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
|
||||
|
||||
__vmx_vcpu_run(vcpu, vmx);
|
||||
if (static_branch_unlikely(&vmx_l1d_should_flush))
|
||||
vmx_l1d_flush(vcpu);
|
||||
|
||||
if (vcpu->arch.cr2 != read_cr2())
|
||||
write_cr2(vcpu->arch.cr2);
|
||||
|
||||
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
|
||||
vmx->loaded_vmcs->launched);
|
||||
|
||||
vcpu->arch.cr2 = read_cr2();
|
||||
|
||||
/*
|
||||
* We do not use IBRS in the kernel. If this vCPU has used the
|
||||
@@ -6657,7 +6536,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
static struct kvm *vmx_vm_alloc(void)
|
||||
{
|
||||
struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
|
||||
struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
|
||||
GFP_KERNEL_ACCOUNT | __GFP_ZERO,
|
||||
PAGE_KERNEL);
|
||||
return &kvm_vmx->kvm;
|
||||
}
|
||||
|
||||
@@ -6673,7 +6554,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
if (enable_pml)
|
||||
vmx_destroy_pml_buffer(vmx);
|
||||
free_vpid(vmx->vpid);
|
||||
leave_guest_mode(vcpu);
|
||||
nested_vmx_free_vcpu(vcpu);
|
||||
free_loaded_vmcs(vmx->loaded_vmcs);
|
||||
kfree(vmx->guest_msrs);
|
||||
@@ -6685,14 +6565,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
int err;
|
||||
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
|
||||
struct vcpu_vmx *vmx;
|
||||
unsigned long *msr_bitmap;
|
||||
int cpu;
|
||||
|
||||
vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
|
||||
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!vmx->vcpu.arch.guest_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
|
||||
err = -ENOMEM;
|
||||
@@ -6714,12 +6596,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
* for the guest, etc.
|
||||
*/
|
||||
if (enable_pml) {
|
||||
vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
if (!vmx->pml_pg)
|
||||
goto uninit_vcpu;
|
||||
}
|
||||
|
||||
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
|
||||
> PAGE_SIZE);
|
||||
|
||||
|
@@ -175,7 +175,6 @@ struct nested_vmx {
|
||||
|
||||
struct vcpu_vmx {
|
||||
struct kvm_vcpu vcpu;
|
||||
unsigned long host_rsp;
|
||||
u8 fail;
|
||||
u8 msr_bitmap_mode;
|
||||
u32 exit_intr_info;
|
||||
@@ -209,7 +208,7 @@ struct vcpu_vmx {
|
||||
struct loaded_vmcs vmcs01;
|
||||
struct loaded_vmcs *loaded_vmcs;
|
||||
struct loaded_vmcs *loaded_cpu_state;
|
||||
bool __launched; /* temporary, used in vmx_vcpu_run */
|
||||
|
||||
struct msr_autoload {
|
||||
struct vmx_msrs guest;
|
||||
struct vmx_msrs host;
|
||||
@@ -339,8 +338,8 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
|
||||
|
||||
static inline void pi_set_sn(struct pi_desc *pi_desc)
|
||||
{
|
||||
return set_bit(POSTED_INTR_SN,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
set_bit(POSTED_INTR_SN,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline void pi_set_on(struct pi_desc *pi_desc)
|
||||
@@ -445,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void)
|
||||
{
|
||||
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
|
||||
if (pt_mode == PT_MODE_SYSTEM)
|
||||
vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL);
|
||||
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
|
||||
VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmentry_ctrl &
|
||||
~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
|
||||
@@ -455,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void)
|
||||
{
|
||||
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
|
||||
if (pt_mode == PT_MODE_SYSTEM)
|
||||
vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
|
||||
VM_EXIT_CLEAR_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmcs_config.vmexit_ctrl &
|
||||
return vmexit_ctrl &
|
||||
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
@@ -478,7 +479,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
|
||||
return &(to_vmx(vcpu)->pi_desc);
|
||||
}
|
||||
|
||||
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu);
|
||||
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags);
|
||||
void free_vmcs(struct vmcs *vmcs);
|
||||
int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
|
||||
void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
|
||||
@@ -487,7 +488,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
|
||||
|
||||
static inline struct vmcs *alloc_vmcs(bool shadow)
|
||||
{
|
||||
return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
|
||||
return alloc_vmcs_cpu(shadow, raw_smp_processor_id(),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
}
|
||||
|
||||
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
|
||||
|
@@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = -EINVAL;
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
goto out;
|
||||
u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
|
||||
u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
r = -ENOMEM;
|
||||
if (!u.lapic)
|
||||
@@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
case KVM_GET_XSAVE: {
|
||||
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
|
||||
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
|
||||
r = -ENOMEM;
|
||||
if (!u.xsave)
|
||||
break;
|
||||
@@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
break;
|
||||
}
|
||||
case KVM_GET_XCRS: {
|
||||
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
|
||||
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
|
||||
r = -ENOMEM;
|
||||
if (!u.xcrs)
|
||||
break;
|
||||
@@ -7055,6 +7056,13 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
|
||||
|
||||
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!lapic_in_kernel(vcpu)) {
|
||||
WARN_ON_ONCE(vcpu->arch.apicv_active);
|
||||
return;
|
||||
}
|
||||
if (!vcpu->arch.apicv_active)
|
||||
return;
|
||||
|
||||
vcpu->arch.apicv_active = false;
|
||||
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
|
||||
}
|
||||
@@ -9005,7 +9013,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
struct page *page;
|
||||
int r;
|
||||
|
||||
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
|
||||
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
|
||||
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
@@ -9026,6 +9033,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
goto fail_free_pio_data;
|
||||
|
||||
if (irqchip_in_kernel(vcpu->kvm)) {
|
||||
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
|
||||
r = kvm_create_lapic(vcpu);
|
||||
if (r < 0)
|
||||
goto fail_mmu_destroy;
|
||||
@@ -9033,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
static_key_slow_inc(&kvm_no_apic_vcpu);
|
||||
|
||||
vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!vcpu->arch.mce_banks) {
|
||||
r = -ENOMEM;
|
||||
goto fail_free_lapic;
|
||||
}
|
||||
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
|
||||
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
|
||||
GFP_KERNEL_ACCOUNT)) {
|
||||
r = -ENOMEM;
|
||||
goto fail_free_mce_banks;
|
||||
}
|
||||
@@ -9104,7 +9113,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
||||
|
||||
@@ -9299,13 +9307,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
|
||||
slot->arch.rmap[i] =
|
||||
kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
|
||||
GFP_KERNEL);
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!slot->arch.rmap[i])
|
||||
goto out_free;
|
||||
if (i == 0)
|
||||
continue;
|
||||
|
||||
linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
|
||||
linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
|
||||
if (!linfo)
|
||||
goto out_free;
|
||||
|
||||
@@ -9348,13 +9356,13 @@ out_free:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
|
||||
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
|
||||
{
|
||||
/*
|
||||
* memslots->generation has been incremented.
|
||||
* mmio generation may have reached its maximum value.
|
||||
*/
|
||||
kvm_mmu_invalidate_mmio_sptes(kvm, slots);
|
||||
kvm_mmu_invalidate_mmio_sptes(kvm, gen);
|
||||
}
|
||||
|
||||
int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
@@ -9462,7 +9470,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
|
||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||
{
|
||||
kvm_mmu_invalidate_zap_all_pages(kvm);
|
||||
kvm_mmu_zap_all(kvm);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
|
@@ -181,6 +181,11 @@ static inline bool emul_is_noncanonical_address(u64 la,
|
||||
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
|
||||
gva_t gva, gfn_t gfn, unsigned access)
|
||||
{
|
||||
u64 gen = kvm_memslots(vcpu->kvm)->generation;
|
||||
|
||||
if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this is a shadow nested page table, the "GVA" is
|
||||
* actually a nGPA.
|
||||
@@ -188,7 +193,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
|
||||
vcpu->arch.access = access;
|
||||
vcpu->arch.mmio_gfn = gfn;
|
||||
vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
|
||||
vcpu->arch.mmio_gen = gen;
|
||||
}
|
||||
|
||||
static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
|
||||
|
新しいイシューから参照
ユーザーをブロックする