Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
This commit is contained in:
@@ -320,6 +320,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
if (index == 0) {
|
||||
entry->ebx &= kvm_supported_word9_x86_features;
|
||||
cpuid_mask(&entry->ebx, 9);
|
||||
// TSC_ADJUST is emulated
|
||||
entry->ebx |= F(TSC_ADJUST);
|
||||
} else
|
||||
entry->ebx = 0;
|
||||
entry->eax = 0;
|
||||
@@ -659,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
|
||||
} else
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_cpuid);
|
||||
|
||||
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
@@ -31,6 +31,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
|
||||
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 7, 0);
|
||||
return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@@ -676,8 +676,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
|
||||
addr.seg);
|
||||
if (!usable)
|
||||
goto bad;
|
||||
/* code segment or read-only data segment */
|
||||
if (((desc.type & 8) || !(desc.type & 2)) && write)
|
||||
/* code segment in protected mode or read-only data segment */
|
||||
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
|
||||
|| !(desc.type & 2)) && write)
|
||||
goto bad;
|
||||
/* unreadable code segment */
|
||||
if (!fetch && (desc.type & 8) && !(desc.type & 2))
|
||||
|
@@ -1011,7 +1011,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
||||
local_irq_save(flags);
|
||||
|
||||
now = apic->lapic_timer.timer.base->get_time();
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
|
||||
if (likely(tscdeadline > guest_tsc)) {
|
||||
ns = (tscdeadline - guest_tsc) * 1000000ULL;
|
||||
do_div(ns, this_tsc_khz);
|
||||
|
@@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
|| (!vcpu->arch.mmu.direct_map && write_fault
|
||||
&& !is_write_protection(vcpu) && !user_fault)) {
|
||||
|
||||
/*
|
||||
* There are two cases:
|
||||
* - the one is other vcpu creates new sp in the window
|
||||
* between mapping_level() and acquiring mmu-lock.
|
||||
* - the another case is the new sp is created by itself
|
||||
* (page-fault path) when guest uses the target gfn as
|
||||
* its page table.
|
||||
* Both of these cases can be fixed by allowing guest to
|
||||
* retry the access, it will refault, then we can establish
|
||||
* the mapping by using small page.
|
||||
*/
|
||||
if (level > PT_PAGE_TABLE_LEVEL &&
|
||||
has_wrprotected_page(vcpu->kvm, gfn, level)) {
|
||||
ret = 1;
|
||||
drop_spte(vcpu->kvm, sptep);
|
||||
has_wrprotected_page(vcpu->kvm, gfn, level))
|
||||
goto done;
|
||||
}
|
||||
|
||||
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
|
||||
|
||||
@@ -2505,6 +2513,14 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
|
||||
mmu_free_roots(vcpu);
|
||||
}
|
||||
|
||||
static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
||||
{
|
||||
int bit7;
|
||||
|
||||
bit7 = (gpte >> 7) & 1;
|
||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
||||
}
|
||||
|
||||
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
bool no_dirty_log)
|
||||
{
|
||||
@@ -2517,6 +2533,26 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
||||
return gfn_to_pfn_memslot_atomic(slot, gfn);
|
||||
}
|
||||
|
||||
static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
u64 gpte)
|
||||
{
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
|
||||
goto no_present;
|
||||
|
||||
if (!is_present_gpte(gpte))
|
||||
goto no_present;
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
goto no_present;
|
||||
|
||||
return false;
|
||||
|
||||
no_present:
|
||||
drop_spte(vcpu->kvm, spte);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp,
|
||||
u64 *start, u64 *end)
|
||||
@@ -2671,7 +2707,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
* PT_PAGE_TABLE_LEVEL and there would be no adjustment done
|
||||
* here.
|
||||
*/
|
||||
if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
|
||||
if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
|
||||
level == PT_PAGE_TABLE_LEVEL &&
|
||||
PageTransCompound(pfn_to_page(pfn)) &&
|
||||
!has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
|
||||
@@ -2699,18 +2735,13 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static bool mmu_invalid_pfn(pfn_t pfn)
|
||||
{
|
||||
return unlikely(is_invalid_pfn(pfn));
|
||||
}
|
||||
|
||||
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
|
||||
pfn_t pfn, unsigned access, int *ret_val)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
/* The pfn is invalid, report the error! */
|
||||
if (unlikely(is_invalid_pfn(pfn))) {
|
||||
if (unlikely(is_error_pfn(pfn))) {
|
||||
*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
|
||||
goto exit;
|
||||
}
|
||||
@@ -2862,7 +2893,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (likely(!force_pt_level))
|
||||
@@ -3331,7 +3362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (likely(!force_pt_level))
|
||||
@@ -3399,14 +3430,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
|
||||
nonpaging_free(vcpu);
|
||||
}
|
||||
|
||||
static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
|
||||
{
|
||||
int bit7;
|
||||
|
||||
bit7 = (gpte >> 7) & 1;
|
||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
||||
}
|
||||
|
||||
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
|
||||
{
|
||||
unsigned mask;
|
||||
|
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
|
||||
addr, access);
|
||||
}
|
||||
|
||||
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
pt_element_t gpte)
|
||||
static bool
|
||||
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, pt_element_t gpte, bool no_dirty_log)
|
||||
{
|
||||
if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
|
||||
goto no_present;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
if (!is_present_gpte(gpte))
|
||||
goto no_present;
|
||||
if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
|
||||
return false;
|
||||
|
||||
if (!(gpte & PT_ACCESSED_MASK))
|
||||
goto no_present;
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
|
||||
return false;
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
no_dirty_log && (pte_access & ACC_WRITE_MASK));
|
||||
if (is_error_pfn(pfn))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* we call mmu_set_spte() with host_writable = true because
|
||||
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
|
||||
|
||||
no_present:
|
||||
drop_spte(vcpu->kvm, spte);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte)
|
||||
{
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
pfn_t pfn;
|
||||
pt_element_t gpte = *(const pt_element_t *)pte;
|
||||
|
||||
gpte = *(const pt_element_t *)pte;
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
return;
|
||||
|
||||
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
|
||||
if (mmu_invalid_pfn(pfn))
|
||||
return;
|
||||
|
||||
/*
|
||||
* we call mmu_set_spte() with host_writable = true because that
|
||||
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
|
||||
*/
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL,
|
||||
gpte_to_gfn(gpte), pfn, true, true);
|
||||
FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
|
||||
}
|
||||
|
||||
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
|
||||
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
|
||||
spte = sp->spt + i;
|
||||
|
||||
for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
|
||||
pt_element_t gpte;
|
||||
unsigned pte_access;
|
||||
gfn_t gfn;
|
||||
pfn_t pfn;
|
||||
|
||||
if (spte == sptep)
|
||||
continue;
|
||||
|
||||
if (is_shadow_present_pte(*spte))
|
||||
continue;
|
||||
|
||||
gpte = gptep[i];
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
|
||||
continue;
|
||||
|
||||
pte_access = sp->role.access & gpte_access(vcpu, gpte);
|
||||
protect_clean_gpte(&pte_access, gpte);
|
||||
gfn = gpte_to_gfn(gpte);
|
||||
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
|
||||
pte_access & ACC_WRITE_MASK);
|
||||
if (mmu_invalid_pfn(pfn))
|
||||
if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
|
||||
break;
|
||||
|
||||
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
|
||||
NULL, PT_PAGE_TABLE_LEVEL, gfn,
|
||||
pfn, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch a shadow pte for a specific level in the paging hierarchy.
|
||||
* If the guest tries to write a write-protected page, we need to
|
||||
* emulate this operation, return 1 to indicate this case.
|
||||
*/
|
||||
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int user_fault, int write_fault, int hlevel,
|
||||
int *emulate, pfn_t pfn, bool map_writable,
|
||||
bool prefault)
|
||||
pfn_t pfn, bool map_writable, bool prefault)
|
||||
{
|
||||
unsigned access = gw->pt_access;
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
int top_level;
|
||||
unsigned direct_access;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, emulate = 0;
|
||||
|
||||
if (!is_present_gpte(gw->ptes[gw->level - 1]))
|
||||
return NULL;
|
||||
return 0;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
|
||||
user_fault, write_fault, emulate, it.level,
|
||||
user_fault, write_fault, &emulate, it.level,
|
||||
gw->gfn, pfn, prefault, map_writable);
|
||||
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
|
||||
|
||||
return it.sptep;
|
||||
return emulate;
|
||||
|
||||
out_gpte_changed:
|
||||
if (sp)
|
||||
kvm_mmu_put_page(sp, it.sptep);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
int write_fault = error_code & PFERR_WRITE_MASK;
|
||||
int user_fault = error_code & PFERR_USER_MASK;
|
||||
struct guest_walker walker;
|
||||
u64 *sptep;
|
||||
int emulate = 0;
|
||||
int r;
|
||||
pfn_t pfn;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||
return r;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu, mmu_seq))
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
|
||||
kvm_mmu_free_some_pages(vcpu);
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
|
||||
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, &emulate, pfn, map_writable, prefault);
|
||||
(void)sptep;
|
||||
pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
|
||||
sptep, *sptep, emulate);
|
||||
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
|
||||
level, pfn, map_writable, prefault);
|
||||
++vcpu->stat.pf_fixed;
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
return emulate;
|
||||
return r;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
sizeof(pt_element_t)))
|
||||
return -EINVAL;
|
||||
|
||||
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
|
||||
vcpu->kvm->tlbs_dirty++;
|
||||
continue;
|
||||
}
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include "mmu.h"
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "x86.h"
|
||||
#include "cpuid.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/mod_devicetable.h>
|
||||
@@ -630,15 +631,12 @@ static int svm_hardware_enable(void *garbage)
|
||||
return -EBUSY;
|
||||
|
||||
if (!has_svm()) {
|
||||
printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
|
||||
me);
|
||||
pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
|
||||
return -EINVAL;
|
||||
}
|
||||
sd = per_cpu(svm_data, me);
|
||||
|
||||
if (!sd) {
|
||||
printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
|
||||
me);
|
||||
pr_err("%s: svm_data is NULL on %d\n", __func__, me);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -1012,6 +1010,13 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
svm->tsc_ratio = ratio;
|
||||
}
|
||||
|
||||
static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
return svm->vmcb->control.tsc_offset;
|
||||
}
|
||||
|
||||
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@@ -1189,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 dummy;
|
||||
u32 eax = 1;
|
||||
|
||||
init_vmcb(svm);
|
||||
|
||||
@@ -1197,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
|
||||
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
|
||||
}
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1254,11 +1262,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
|
||||
svm->asid_generation = 0;
|
||||
init_vmcb(svm);
|
||||
kvm_write_tsc(&svm->vcpu, 0);
|
||||
|
||||
err = fx_init(&svm->vcpu);
|
||||
if (err)
|
||||
goto free_page4;
|
||||
|
||||
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
|
||||
if (kvm_vcpu_is_bsp(&svm->vcpu))
|
||||
@@ -1268,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
|
||||
return &svm->vcpu;
|
||||
|
||||
free_page4:
|
||||
__free_page(hsave_page);
|
||||
free_page3:
|
||||
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
|
||||
free_page2:
|
||||
@@ -3008,11 +3009,11 @@ static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
|
||||
return vmcb->control.tsc_offset +
|
||||
svm_scale_tsc(vcpu, native_read_tsc());
|
||||
svm_scale_tsc(vcpu, host_tsc);
|
||||
}
|
||||
|
||||
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
|
||||
@@ -3131,13 +3132,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
u32 ecx = msr->index;
|
||||
u64 data = msr->data;
|
||||
switch (ecx) {
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, data);
|
||||
kvm_write_tsc(vcpu, msr);
|
||||
break;
|
||||
case MSR_STAR:
|
||||
svm->vmcb->save.star = data;
|
||||
@@ -3192,20 +3195,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
|
||||
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
|
||||
break;
|
||||
default:
|
||||
return kvm_set_msr_common(vcpu, ecx, data);
|
||||
return kvm_set_msr_common(vcpu, msr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wrmsr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
|
||||
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
|
||||
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
|
||||
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
|
||||
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
|
||||
if (svm_set_msr(&svm->vcpu, ecx, data)) {
|
||||
if (svm_set_msr(&svm->vcpu, &msr)) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(&svm->vcpu, 0);
|
||||
} else {
|
||||
@@ -4302,6 +4309,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
||||
.has_wbinvd_exit = svm_has_wbinvd_exit,
|
||||
|
||||
.set_tsc_khz = svm_set_tsc_khz,
|
||||
.read_tsc_offset = svm_read_tsc_offset,
|
||||
.write_tsc_offset = svm_write_tsc_offset,
|
||||
.adjust_tsc_offset = svm_adjust_tsc_offset,
|
||||
.compute_tsc_offset = svm_compute_tsc_offset,
|
||||
|
@@ -4,6 +4,7 @@
|
||||
#include <linux/tracepoint.h>
|
||||
#include <asm/vmx.h>
|
||||
#include <asm/svm.h>
|
||||
#include <asm/clocksource.h>
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
@@ -754,6 +755,68 @@ TRACE_EVENT(
|
||||
__entry->write ? "Write" : "Read",
|
||||
__entry->gpa_match ? "GPA" : "GVA")
|
||||
);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define host_clocks \
|
||||
{VCLOCK_NONE, "none"}, \
|
||||
{VCLOCK_TSC, "tsc"}, \
|
||||
{VCLOCK_HPET, "hpet"} \
|
||||
|
||||
TRACE_EVENT(kvm_update_master_clock,
|
||||
TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
|
||||
TP_ARGS(use_master_clock, host_clock, offset_matched),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( bool, use_master_clock )
|
||||
__field( unsigned int, host_clock )
|
||||
__field( bool, offset_matched )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->use_master_clock = use_master_clock;
|
||||
__entry->host_clock = host_clock;
|
||||
__entry->offset_matched = offset_matched;
|
||||
),
|
||||
|
||||
TP_printk("masterclock %d hostclock %s offsetmatched %u",
|
||||
__entry->use_master_clock,
|
||||
__print_symbolic(__entry->host_clock, host_clocks),
|
||||
__entry->offset_matched)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_track_tsc,
|
||||
TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
|
||||
unsigned int online_vcpus, bool use_master_clock,
|
||||
unsigned int host_clock),
|
||||
TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
|
||||
host_clock),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned int, vcpu_id )
|
||||
__field( unsigned int, nr_vcpus_matched_tsc )
|
||||
__field( unsigned int, online_vcpus )
|
||||
__field( bool, use_master_clock )
|
||||
__field( unsigned int, host_clock )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->nr_vcpus_matched_tsc = nr_matched;
|
||||
__entry->online_vcpus = online_vcpus;
|
||||
__entry->use_master_clock = use_master_clock;
|
||||
__entry->host_clock = host_clock;
|
||||
),
|
||||
|
||||
TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
|
||||
" hostclock %s",
|
||||
__entry->vcpu_id, __entry->use_master_clock,
|
||||
__entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
|
||||
__print_symbolic(__entry->host_clock, host_clocks))
|
||||
);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
@@ -42,6 +42,7 @@
|
||||
#include <asm/i387.h>
|
||||
#include <asm/xcr.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/kexec.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
@@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
|
||||
return vmx_capability.ept & VMX_EPT_AD_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_individual_addr(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_context(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
|
||||
@@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
|
||||
vmcs, phys_addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
/*
|
||||
* This bitmap is used to indicate whether the vmclear
|
||||
* operation is enabled on all cpus. All disabled by
|
||||
* default.
|
||||
*/
|
||||
static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
|
||||
|
||||
static inline void crash_enable_local_vmclear(int cpu)
|
||||
{
|
||||
cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static inline void crash_disable_local_vmclear(int cpu)
|
||||
{
|
||||
cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static inline int crash_local_vmclear_enabled(int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
|
||||
}
|
||||
|
||||
static void crash_vmclear_local_loaded_vmcss(void)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct loaded_vmcs *v;
|
||||
|
||||
if (!crash_local_vmclear_enabled(cpu))
|
||||
return;
|
||||
|
||||
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
|
||||
loaded_vmcss_on_cpu_link)
|
||||
vmcs_clear(v->vmcs);
|
||||
}
|
||||
#else
|
||||
static inline void crash_enable_local_vmclear(int cpu) { }
|
||||
static inline void crash_disable_local_vmclear(int cpu) { }
|
||||
#endif /* CONFIG_KEXEC */
|
||||
|
||||
static void __loaded_vmcs_clear(void *arg)
|
||||
{
|
||||
struct loaded_vmcs *loaded_vmcs = arg;
|
||||
@@ -1001,15 +1037,28 @@ static void __loaded_vmcs_clear(void *arg)
|
||||
return; /* vcpu migration can race with cpu offline */
|
||||
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
|
||||
per_cpu(current_vmcs, cpu) = NULL;
|
||||
crash_disable_local_vmclear(cpu);
|
||||
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
|
||||
|
||||
/*
|
||||
* we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
|
||||
* is before setting loaded_vmcs->vcpu to -1 which is done in
|
||||
* loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
|
||||
* then adds the vmcs into percpu list before it is deleted.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
loaded_vmcs_init(loaded_vmcs);
|
||||
crash_enable_local_vmclear(cpu);
|
||||
}
|
||||
|
||||
static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
|
||||
{
|
||||
if (loaded_vmcs->cpu != -1)
|
||||
smp_call_function_single(
|
||||
loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
|
||||
int cpu = loaded_vmcs->cpu;
|
||||
|
||||
if (cpu != -1)
|
||||
smp_call_function_single(cpu,
|
||||
__loaded_vmcs_clear, loaded_vmcs, 1);
|
||||
}
|
||||
|
||||
static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
|
||||
@@ -1051,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
|
||||
{
|
||||
if (enable_ept) {
|
||||
if (cpu_has_vmx_invept_individual_addr())
|
||||
__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
|
||||
eptp, gpa);
|
||||
else
|
||||
ept_sync_context(eptp);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
@@ -1535,8 +1573,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
local_irq_disable();
|
||||
crash_disable_local_vmclear(cpu);
|
||||
|
||||
/*
|
||||
* Read loaded_vmcs->cpu should be before fetching
|
||||
* loaded_vmcs->loaded_vmcss_on_cpu_link.
|
||||
* See the comments in __loaded_vmcs_clear().
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
|
||||
&per_cpu(loaded_vmcss_on_cpu, cpu));
|
||||
crash_enable_local_vmclear(cpu);
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
@@ -1839,11 +1887,10 @@ static u64 guest_read_tsc(void)
|
||||
* Like guest_read_tsc, but always returns L1's notion of the timestamp
|
||||
* counter, even if a nested guest (L2) is currently running.
|
||||
*/
|
||||
u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
|
||||
u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
|
||||
{
|
||||
u64 host_tsc, tsc_offset;
|
||||
u64 tsc_offset;
|
||||
|
||||
rdtscll(host_tsc);
|
||||
tsc_offset = is_guest_mode(vcpu) ?
|
||||
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
|
||||
vmcs_read64(TSC_OFFSET);
|
||||
@@ -1866,6 +1913,11 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
|
||||
WARN(1, "user requested TSC rate below hardware speed\n");
|
||||
}
|
||||
|
||||
static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vmcs_read64(TSC_OFFSET);
|
||||
}
|
||||
|
||||
/*
|
||||
* writes 'offset' into guest's timestamp counter offset register
|
||||
*/
|
||||
@@ -2202,15 +2254,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct shared_msr_entry *msr;
|
||||
int ret = 0;
|
||||
u32 msr_index = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr_index) {
|
||||
case MSR_EFER:
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
#ifdef CONFIG_X86_64
|
||||
case MSR_FS_BASE:
|
||||
@@ -2236,7 +2290,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
vmcs_writel(GUEST_SYSENTER_ESP, data);
|
||||
break;
|
||||
case MSR_IA32_TSC:
|
||||
kvm_write_tsc(vcpu, data);
|
||||
kvm_write_tsc(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_IA32_CR_PAT:
|
||||
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
||||
@@ -2244,7 +2298,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
vcpu->arch.pat = data;
|
||||
break;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
if (!vmx->rdtscp_enabled)
|
||||
@@ -2267,7 +2324,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
}
|
||||
break;
|
||||
}
|
||||
ret = kvm_set_msr_common(vcpu, msr_index, data);
|
||||
ret = kvm_set_msr_common(vcpu, msr_info);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -2341,6 +2398,18 @@ static int hardware_enable(void *garbage)
|
||||
return -EBUSY;
|
||||
|
||||
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
|
||||
|
||||
/*
|
||||
* Now we can enable the vmclear operation in kdump
|
||||
* since the loaded_vmcss_on_cpu list on this cpu
|
||||
* has been initialized.
|
||||
*
|
||||
* Though the cpu is not in VMX operation now, there
|
||||
* is no problem to enable the vmclear operation
|
||||
* for the loaded_vmcss_on_cpu list is empty!
|
||||
*/
|
||||
crash_enable_local_vmclear(cpu);
|
||||
|
||||
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
|
||||
|
||||
test_bits = FEATURE_CONTROL_LOCKED;
|
||||
@@ -2697,6 +2766,7 @@ static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment
|
||||
if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
|
||||
tmp.base = vmcs_readl(sf->base);
|
||||
tmp.selector = vmcs_read16(sf->selector);
|
||||
tmp.dpl = tmp.selector & SELECTOR_RPL_MASK;
|
||||
tmp.s = 1;
|
||||
}
|
||||
vmx_set_segment(vcpu, &tmp, seg);
|
||||
@@ -3246,7 +3316,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
||||
* unrestricted guest like Westmere to older host that don't have
|
||||
* unrestricted guest like Nehelem.
|
||||
*/
|
||||
if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
|
||||
if (vmx->rmode.vm86_active) {
|
||||
switch (seg) {
|
||||
case VCPU_SREG_CS:
|
||||
vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
|
||||
@@ -3897,8 +3967,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
|
||||
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
|
||||
set_cr4_guest_host_mask(vmx);
|
||||
|
||||
kvm_write_tsc(&vmx->vcpu, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3908,8 +3976,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
u64 msr;
|
||||
int ret;
|
||||
|
||||
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
|
||||
|
||||
vmx->rmode.vm86_active = 0;
|
||||
|
||||
vmx->soft_vnmi_blocked = 0;
|
||||
@@ -3921,10 +3987,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
msr |= MSR_IA32_APICBASE_BSP;
|
||||
kvm_set_apic_base(&vmx->vcpu, msr);
|
||||
|
||||
ret = fx_init(&vmx->vcpu);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
vmx_segment_cache_clear(vmx);
|
||||
|
||||
seg_setup(VCPU_SREG_CS);
|
||||
@@ -3965,7 +4027,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
kvm_rip_write(vcpu, 0xfff0);
|
||||
else
|
||||
kvm_rip_write(vcpu, 0);
|
||||
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
|
||||
|
||||
vmcs_writel(GUEST_GDTR_BASE, 0);
|
||||
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
|
||||
@@ -4015,7 +4076,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
/* HACK: Don't enable emulation on guest boot/reset */
|
||||
vmx->emulation_required = 0;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -4287,16 +4347,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
if (is_machine_check(intr_info))
|
||||
return handle_machine_check(vcpu);
|
||||
|
||||
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
|
||||
!is_page_fault(intr_info)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vect_info;
|
||||
vcpu->run->internal.data[1] = intr_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
|
||||
return 1; /* already handled by vmx_vcpu_run() */
|
||||
|
||||
@@ -4315,6 +4365,22 @@ static int handle_exception(struct kvm_vcpu *vcpu)
|
||||
error_code = 0;
|
||||
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
|
||||
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
|
||||
/*
|
||||
* The #PF with PFEC.RSVD = 1 indicates the guest is accessing
|
||||
* MMIO, it is better to report an internal error.
|
||||
* See the comments in vmx_handle_exit.
|
||||
*/
|
||||
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
|
||||
!(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vect_info;
|
||||
vcpu->run->internal.data[1] = intr_info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_page_fault(intr_info)) {
|
||||
/* EPT won't cause page fault directly */
|
||||
BUG_ON(enable_ept);
|
||||
@@ -4626,11 +4692,15 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_wrmsr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct msr_data msr;
|
||||
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
|
||||
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
|
||||
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
|
||||
|
||||
if (vmx_set_msr(vcpu, ecx, data) != 0) {
|
||||
msr.data = data;
|
||||
msr.index = ecx;
|
||||
msr.host_initiated = false;
|
||||
if (vmx_set_msr(vcpu, &msr) != 0) {
|
||||
trace_kvm_msr_write_ex(ecx, data);
|
||||
kvm_inject_gp(vcpu, 0);
|
||||
return 1;
|
||||
@@ -4827,11 +4897,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||
|
||||
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
|
||||
|
||||
if (exit_qualification & (1 << 6)) {
|
||||
printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gla_validity = (exit_qualification >> 7) & 0x3;
|
||||
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
|
||||
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
|
||||
@@ -5979,13 +6044,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
|
||||
* delivery event since it indicates guest is accessing MMIO.
|
||||
* The vm-exit can be triggered again after return to guest that
|
||||
* will cause infinite loop.
|
||||
*/
|
||||
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
|
||||
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
|
||||
exit_reason != EXIT_REASON_EPT_VIOLATION &&
|
||||
exit_reason != EXIT_REASON_TASK_SWITCH))
|
||||
printk(KERN_WARNING "%s: unexpected, valid vectoring info "
|
||||
"(0x%x) and exit reason is 0x%x\n",
|
||||
__func__, vectoring_info, exit_reason);
|
||||
exit_reason != EXIT_REASON_TASK_SWITCH)) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
|
||||
vcpu->run->internal.ndata = 2;
|
||||
vcpu->run->internal.data[0] = vectoring_info;
|
||||
vcpu->run->internal.data[1] = exit_reason;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
|
||||
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
|
||||
@@ -7309,6 +7385,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
||||
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
|
||||
|
||||
.set_tsc_khz = vmx_set_tsc_khz,
|
||||
.read_tsc_offset = vmx_read_tsc_offset,
|
||||
.write_tsc_offset = vmx_write_tsc_offset,
|
||||
.adjust_tsc_offset = vmx_adjust_tsc_offset,
|
||||
.compute_tsc_offset = vmx_compute_tsc_offset,
|
||||
@@ -7367,6 +7444,11 @@ static int __init vmx_init(void)
|
||||
if (r)
|
||||
goto out3;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
|
||||
crash_vmclear_local_loaded_vmcss);
|
||||
#endif
|
||||
|
||||
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
|
||||
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
|
||||
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
|
||||
@@ -7404,6 +7486,11 @@ static void __exit vmx_exit(void)
|
||||
free_page((unsigned long)vmx_io_bitmap_b);
|
||||
free_page((unsigned long)vmx_io_bitmap_a);
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
|
||||
synchronize_rcu();
|
||||
#endif
|
||||
|
||||
kvm_exit();
|
||||
}
|
||||
|
||||
|
@@ -46,6 +46,8 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
|
||||
u64 __read_mostly host_xcr0;
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
}
|
||||
|
||||
if (is_long_mode(vcpu)) {
|
||||
if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
|
||||
if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
|
||||
return 1;
|
||||
} else
|
||||
@@ -827,6 +831,7 @@ static u32 msrs_to_save[] = {
|
||||
static unsigned num_msrs_to_save;
|
||||
|
||||
static const u32 emulated_msrs[] = {
|
||||
MSR_IA32_TSC_ADJUST,
|
||||
MSR_IA32_TSCDEADLINE,
|
||||
MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MCG_STATUS,
|
||||
@@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
return kvm_x86_ops->set_msr(vcpu, msr_index, data);
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
*/
|
||||
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
{
|
||||
return kvm_set_msr(vcpu, index, *data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = *data;
|
||||
msr.index = index;
|
||||
msr.host_initiated = true;
|
||||
return kvm_set_msr(vcpu, &msr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct pvclock_gtod_data {
|
||||
seqcount_t seq;
|
||||
|
||||
struct { /* extract of a clocksource struct */
|
||||
int vclock_mode;
|
||||
cycle_t cycle_last;
|
||||
cycle_t mask;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
} clock;
|
||||
|
||||
/* open coded 'struct timespec' */
|
||||
u64 monotonic_time_snsec;
|
||||
time_t monotonic_time_sec;
|
||||
};
|
||||
|
||||
static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
{
|
||||
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
|
||||
|
||||
write_seqcount_begin(&vdata->seq);
|
||||
|
||||
/* copy pvclock gtod data */
|
||||
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
|
||||
vdata->clock.cycle_last = tk->clock->cycle_last;
|
||||
vdata->clock.mask = tk->clock->mask;
|
||||
vdata->clock.mult = tk->mult;
|
||||
vdata->clock.shift = tk->shift;
|
||||
|
||||
vdata->monotonic_time_sec = tk->xtime_sec
|
||||
+ tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->xtime_nsec
|
||||
+ (tk->wall_to_monotonic.tv_nsec
|
||||
<< tk->shift);
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
{
|
||||
int version;
|
||||
@@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void)
|
||||
return timespec_to_ns(&ts);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
|
||||
unsigned long max_tsc_khz;
|
||||
|
||||
@@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
||||
return tsc;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
bool vcpus_matched;
|
||||
bool do_request = false;
|
||||
struct kvm_arch *ka = &vcpu->kvm->arch;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&vcpu->kvm->online_vcpus));
|
||||
|
||||
if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
|
||||
if (!ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (!vcpus_matched && ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (do_request)
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
|
||||
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
|
||||
atomic_read(&vcpu->kvm->online_vcpus),
|
||||
ka->use_master_clock, gtod->clock.vclock_mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
|
||||
{
|
||||
u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
|
||||
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u64 offset, ns, elapsed;
|
||||
unsigned long flags;
|
||||
s64 usdiff;
|
||||
bool matched;
|
||||
u64 data = msr->data;
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
@@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
|
||||
}
|
||||
matched = true;
|
||||
} else {
|
||||
/*
|
||||
* We split periods of matched TSC writes into generations.
|
||||
@@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
kvm->arch.cur_tsc_nsec = ns;
|
||||
kvm->arch.cur_tsc_write = data;
|
||||
kvm->arch.cur_tsc_offset = offset;
|
||||
matched = false;
|
||||
pr_debug("kvm: new tsc generation %u, clock %llu\n",
|
||||
kvm->arch.cur_tsc_generation, data);
|
||||
}
|
||||
@@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
|
||||
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
|
||||
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
|
||||
update_ia32_tsc_adjust_msr(vcpu, offset);
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
|
||||
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
if (matched)
|
||||
kvm->arch.nr_vcpus_matched_tsc++;
|
||||
else
|
||||
kvm->arch.nr_vcpus_matched_tsc = 0;
|
||||
|
||||
kvm_track_tsc_matching(vcpu);
|
||||
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static cycle_t read_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)vget_cycles();
|
||||
|
||||
last = pvclock_gtod_data.clock.cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* GCC likes to generate cmov here, but this branch is extremely
|
||||
* predictable (it's just a funciton of time and the likely is
|
||||
* very likely) and there's a data dependence, so force GCC
|
||||
* to generate a branch instead. I don't barrier() because
|
||||
* we don't actually need a barrier, and if this function
|
||||
* ever gets inlined it will generate worse code.
|
||||
*/
|
||||
asm volatile ("");
|
||||
return last;
|
||||
}
|
||||
|
||||
static inline u64 vgettsc(cycle_t *cycle_now)
|
||||
{
|
||||
long v;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
*cycle_now = read_tsc();
|
||||
|
||||
v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||
return v * gtod->clock.mult;
|
||||
}
|
||||
|
||||
static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
ts->tv_nsec = 0;
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgettsc(cycle_now);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
timespec_add_ns(ts, ns);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
/* returns true if host is using tsc clocksource */
|
||||
static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
/* checked again under seqlock below */
|
||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
monotonic_to_bootbased(&ts);
|
||||
*kernel_ns = timespec_to_ns(&ts);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Assuming a stable TSC across physical CPUS, and a stable TSC
|
||||
* across virtual CPUs, the following condition is possible.
|
||||
* Each numbered line represents an event visible to both
|
||||
* CPUs at the next numbered event.
|
||||
*
|
||||
* "timespecX" represents host monotonic time. "tscX" represents
|
||||
* RDTSC value.
|
||||
*
|
||||
* VCPU0 on CPU0 | VCPU1 on CPU1
|
||||
*
|
||||
* 1. read timespec0,tsc0
|
||||
* 2. | timespec1 = timespec0 + N
|
||||
* | tsc1 = tsc0 + M
|
||||
* 3. transition to guest | transition to guest
|
||||
* 4. ret0 = timespec0 + (rdtsc - tsc0) |
|
||||
* 5. | ret1 = timespec1 + (rdtsc - tsc1)
|
||||
* | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
*
|
||||
* Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
|
||||
*
|
||||
* - ret0 < ret1
|
||||
* - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
* ...
|
||||
* - 0 < N - M => M < N
|
||||
*
|
||||
* That is, when timespec0 != timespec1, M < N. Unfortunately that is not
|
||||
* always the case (the difference between two distinct xtime instances
|
||||
* might be smaller then the difference between corresponding TSC reads,
|
||||
* when updating guest vcpus pvclock areas).
|
||||
*
|
||||
* To avoid that problem, do not allow visibility of distinct
|
||||
* system_timestamp/tsc_timestamp values simultaneously: use a master
|
||||
* copy of host monotonic time values. Update that master copy
|
||||
* in lockstep.
|
||||
*
|
||||
* Rely on synchronization of host TSCs and guest TSCs for monotonicity.
|
||||
*
|
||||
*/
|
||||
|
||||
static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
int vclock_mode;
|
||||
bool host_tsc_clocksource, vcpus_matched;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&kvm->online_vcpus));
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
host_tsc_clocksource = kvm_get_time_and_clockread(
|
||||
&ka->master_kernel_ns,
|
||||
&ka->master_cycle_now);
|
||||
|
||||
ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
|
||||
|
||||
if (ka->use_master_clock)
|
||||
atomic_set(&kvm_guest_has_master_clock, 1);
|
||||
|
||||
vclock_mode = pvclock_gtod_data.clock.vclock_mode;
|
||||
trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
|
||||
vcpus_matched);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, this_tsc_khz;
|
||||
struct kvm_vcpu_arch *vcpu = &v->arch;
|
||||
struct kvm_arch *ka = &v->kvm->arch;
|
||||
void *shared_kaddr;
|
||||
unsigned long this_tsc_khz;
|
||||
s64 kernel_ns, max_kernel_ns;
|
||||
u64 tsc_timestamp;
|
||||
u64 tsc_timestamp, host_tsc;
|
||||
struct pvclock_vcpu_time_info *guest_hv_clock;
|
||||
u8 pvclock_flags;
|
||||
bool use_master_clock;
|
||||
|
||||
kernel_ns = 0;
|
||||
host_tsc = 0;
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
|
||||
kernel_ns = get_kernel_ns();
|
||||
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
|
||||
if (unlikely(this_tsc_khz == 0)) {
|
||||
local_irq_restore(flags);
|
||||
@@ -1156,6 +1425,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
use_master_clock = ka->use_master_clock;
|
||||
if (use_master_clock) {
|
||||
host_tsc = ka->master_cycle_now;
|
||||
kernel_ns = ka->master_kernel_ns;
|
||||
}
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
if (!use_master_clock) {
|
||||
host_tsc = native_read_tsc();
|
||||
kernel_ns = get_kernel_ns();
|
||||
}
|
||||
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
|
||||
|
||||
/*
|
||||
* We may have to catch up the TSC to match elapsed wall clock
|
||||
* time for two reasons, even if kvmclock is used.
|
||||
@@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
vcpu->hw_tsc_khz = this_tsc_khz;
|
||||
}
|
||||
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
|
||||
/* with a master <monotonic time, tsc value> tuple,
|
||||
* pvclock clock reads always increase at the (scaled) rate
|
||||
* of guest TSC - no need to deal with sampling errors.
|
||||
*/
|
||||
if (!use_master_clock) {
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
}
|
||||
/* With all the info we got, fill in the values */
|
||||
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||
vcpu->last_kernel_ns = kernel_ns;
|
||||
vcpu->last_guest_tsc = tsc_timestamp;
|
||||
|
||||
pvclock_flags = 0;
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
/*
|
||||
* The interface expects us to write an even number signaling that the
|
||||
* update is finished. Since the guest won't see the intermediate
|
||||
@@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
shared_kaddr = kmap_atomic(vcpu->time_page);
|
||||
|
||||
guest_hv_clock = shared_kaddr + vcpu->time_offset;
|
||||
|
||||
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
|
||||
pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
|
||||
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
/* If the host uses TSC clocksource, then it is stable */
|
||||
if (use_master_clock)
|
||||
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock));
|
||||
|
||||
@@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
bool pr = false;
|
||||
u32 msr = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_EFER:
|
||||
@@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
kvm_set_lapic_tscdeadline_msr(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu)) {
|
||||
if (!msr_info->host_initiated) {
|
||||
u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
|
||||
}
|
||||
vcpu->arch.ia32_tsc_adjust_msr = data;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
break;
|
||||
@@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
data = kvm_get_lapic_tscdeadline_msr(vcpu);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
data = vcpu->arch.ia32_misc_enable_msr;
|
||||
break;
|
||||
@@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
}
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
/*
|
||||
* On a host with synchronized TSC, there is no need to update
|
||||
* kvmclock on vcpu->cpu migration
|
||||
*/
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
if (vcpu->cpu != cpu)
|
||||
kvm_migrate_timers(vcpu);
|
||||
vcpu->cpu = cpu;
|
||||
@@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (!vcpu->arch.apic)
|
||||
goto out;
|
||||
u.lapic = memdup_user(argp, sizeof(*u.lapic));
|
||||
if (IS_ERR(u.lapic)) {
|
||||
r = PTR_ERR(u.lapic);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.lapic))
|
||||
return PTR_ERR(u.lapic);
|
||||
|
||||
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_INTERRUPT: {
|
||||
@@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&irq, argp, sizeof irq))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_NMI: {
|
||||
r = kvm_vcpu_ioctl_nmi(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID: {
|
||||
@@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID2: {
|
||||
@@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
|
||||
cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_CPUID2: {
|
||||
@@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XSAVE: {
|
||||
u.xsave = memdup_user(argp, sizeof(*u.xsave));
|
||||
if (IS_ERR(u.xsave)) {
|
||||
r = PTR_ERR(u.xsave);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xsave))
|
||||
return PTR_ERR(u.xsave);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
|
||||
break;
|
||||
@@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XCRS: {
|
||||
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
|
||||
if (IS_ERR(u.xcrs)) {
|
||||
r = PTR_ERR(u.xcrs);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xcrs))
|
||||
return PTR_ERR(u.xcrs);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
|
||||
break;
|
||||
@@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
|
||||
int ret;
|
||||
|
||||
if (addr > (unsigned int)(-3 * PAGE_SIZE))
|
||||
return -1;
|
||||
return -EINVAL;
|
||||
ret = kvm_x86_ops->set_tss_addr(kvm, addr);
|
||||
return ret;
|
||||
}
|
||||
@@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
switch (ioctl) {
|
||||
case KVM_SET_TSS_ADDR:
|
||||
r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_SET_IDENTITY_MAP_ADDR: {
|
||||
u64 ident_addr;
|
||||
@@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_GET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
|
||||
@@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
get_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_IRQCHIP: {
|
||||
@@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
set_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT: {
|
||||
@@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT2: {
|
||||
@@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_REINJECT_CONTROL: {
|
||||
@@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&control, argp, sizeof(control)))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_reinject(kvm, &control);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_XEN_HVM_CONFIG: {
|
||||
@@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
|
||||
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
|
||||
u32 msr_index, u64 data)
|
||||
{
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = data;
|
||||
msr.index = msr_index;
|
||||
msr.host_initiated = false;
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
|
||||
}
|
||||
|
||||
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
@@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
* instruction -> ...
|
||||
*/
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
if (!is_error_pfn(pfn)) {
|
||||
if (!is_error_noslot_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return true;
|
||||
}
|
||||
@@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
kvm_mmu_set_mmio_spte_mask(mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void pvclock_gtod_update_fn(struct work_struct *work)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
raw_spin_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
|
||||
atomic_set(&kvm_guest_has_master_clock, 0);
|
||||
raw_spin_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
||||
|
||||
/*
|
||||
* Notification about pvclock gtod data update.
|
||||
*/
|
||||
static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
|
||||
void *priv)
|
||||
{
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
struct timekeeper *tk = priv;
|
||||
|
||||
update_pvclock_gtod(tk);
|
||||
|
||||
/* disable master clock if host does not trust, or does not
|
||||
* use, TSC clocksource
|
||||
*/
|
||||
if (gtod->clock.vclock_mode != VCLOCK_TSC &&
|
||||
atomic_read(&kvm_guest_has_master_clock) != 0)
|
||||
queue_work(system_long_wq, &pvclock_gtod_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pvclock_gtod_notifier = {
|
||||
.notifier_call = pvclock_gtod_notify,
|
||||
};
|
||||
#endif
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int r;
|
||||
@@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque)
|
||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
|
||||
kvm_lapic_init();
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
@@ -4936,6 +5270,9 @@ void kvm_arch_exit(void)
|
||||
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
kvm_x86_ops = NULL;
|
||||
kvm_mmu_module_exit();
|
||||
}
|
||||
@@ -5059,7 +5396,7 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
char instruction[3];
|
||||
@@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
/* no guest entries from this point */
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
|
||||
/* guest entries allowed */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
|
||||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_mmu_unload(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
|
||||
__kvm_migrate_timers(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
|
||||
kvm_gen_update_masterclock(vcpu->kvm);
|
||||
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
|
||||
r = kvm_guest_time_update(vcpu);
|
||||
if (unlikely(r))
|
||||
@@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
|
||||
native_read_tsc());
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
@@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
pr_debug("vcpu %d received sipi with vector # %x\n",
|
||||
vcpu->vcpu_id, vcpu->arch.sipi_vector);
|
||||
kvm_lapic_reset(vcpu);
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
@@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r == 0)
|
||||
r = kvm_mmu_setup(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
@@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
struct msr_data msr;
|
||||
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
msr.data = 0x0;
|
||||
msr.index = MSR_IA32_TSC;
|
||||
msr.host_initiated = true;
|
||||
kvm_write_tsc(vcpu, &msr);
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops->vcpu_free(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.nmi_queued, 0);
|
||||
vcpu->arch.nmi_pending = 0;
|
||||
@@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_pmu_reset(vcpu);
|
||||
|
||||
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
return kvm_x86_ops->vcpu_reset(vcpu);
|
||||
}
|
||||
|
||||
@@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.tsc_offset_adjustment += delta_cyc;
|
||||
vcpu->arch.last_host_tsc = local_tsc;
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
|
||||
&vcpu->requests);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
|
||||
goto fail_free_mce_banks;
|
||||
|
||||
r = fx_init(vcpu);
|
||||
if (r)
|
||||
goto fail_free_wbinvd_dirty_mask;
|
||||
|
||||
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
kvm_pmu_init(vcpu);
|
||||
|
||||
return 0;
|
||||
fail_free_wbinvd_dirty_mask:
|
||||
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
|
||||
fail_free_mce_banks:
|
||||
kfree(vcpu->arch.mce_banks);
|
||||
fail_free_lapic:
|
||||
@@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -112,7 +112,7 @@ void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
|
||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||
|
||||
int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
|
||||
gva_t addr, void *val, unsigned int bytes,
|
||||
|
Reference in New Issue
Block a user