Merge tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti: "Considerable KVM/PPC work, x86 kvmclock vsyscall support, IA32_TSC_ADJUST MSR emulation, amongst others." Fix up trivial conflict in kernel/sched/core.c due to cross-cpu migration notifier added next to rq migration call-back. * tag 'kvm-3.8-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (156 commits) KVM: emulator: fix real mode segment checks in address linearization VMX: remove unneeded enable_unrestricted_guest check KVM: VMX: fix DPL during entry to protected mode x86/kexec: crash_vmclear_local_vmcss needs __rcu kvm: Fix irqfd resampler list walk KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary KVM: MMU: optimize for set_spte KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation KVM: PPC: bookehv: Add guest computation mode for irq delivery KVM: PPC: Make EPCR a valid field for booke64 and bookehv KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation KVM: PPC: e500: Add emulation helper for getting instruction ea KVM: PPC: bookehv64: Add support for interrupt handling KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler KVM: PPC: booke: Fix get_tb() compile error on 64-bit KVM: PPC: e500: Silence bogus GCC warning in tlb code ...
This commit is contained in:
@@ -46,6 +46,8 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <linux/pvclock_gtod.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
|
||||
u64 __read_mostly host_xcr0;
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@@ -633,7 +637,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
|
||||
}
|
||||
|
||||
if (is_long_mode(vcpu)) {
|
||||
if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
|
||||
if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
|
||||
if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
|
||||
return 1;
|
||||
} else
|
||||
@@ -827,6 +831,7 @@ static u32 msrs_to_save[] = {
|
||||
static unsigned num_msrs_to_save;
|
||||
|
||||
static const u32 emulated_msrs[] = {
|
||||
MSR_IA32_TSC_ADJUST,
|
||||
MSR_IA32_TSCDEADLINE,
|
||||
MSR_IA32_MISC_ENABLE,
|
||||
MSR_IA32_MCG_STATUS,
|
||||
@@ -886,9 +891,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
||||
* Returns 0 on success, non-0 otherwise.
|
||||
* Assumes vcpu_load() was already called.
|
||||
*/
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
return kvm_x86_ops->set_msr(vcpu, msr_index, data);
|
||||
return kvm_x86_ops->set_msr(vcpu, msr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -896,9 +901,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
||||
*/
|
||||
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
||||
{
|
||||
return kvm_set_msr(vcpu, index, *data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = *data;
|
||||
msr.index = index;
|
||||
msr.host_initiated = true;
|
||||
return kvm_set_msr(vcpu, &msr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct pvclock_gtod_data {
|
||||
seqcount_t seq;
|
||||
|
||||
struct { /* extract of a clocksource struct */
|
||||
int vclock_mode;
|
||||
cycle_t cycle_last;
|
||||
cycle_t mask;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
} clock;
|
||||
|
||||
/* open coded 'struct timespec' */
|
||||
u64 monotonic_time_snsec;
|
||||
time_t monotonic_time_sec;
|
||||
};
|
||||
|
||||
static struct pvclock_gtod_data pvclock_gtod_data;
|
||||
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
{
|
||||
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
|
||||
|
||||
write_seqcount_begin(&vdata->seq);
|
||||
|
||||
/* copy pvclock gtod data */
|
||||
vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
|
||||
vdata->clock.cycle_last = tk->clock->cycle_last;
|
||||
vdata->clock.mask = tk->clock->mask;
|
||||
vdata->clock.mult = tk->mult;
|
||||
vdata->clock.shift = tk->shift;
|
||||
|
||||
vdata->monotonic_time_sec = tk->xtime_sec
|
||||
+ tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->xtime_nsec
|
||||
+ (tk->wall_to_monotonic.tv_nsec
|
||||
<< tk->shift);
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
write_seqcount_end(&vdata->seq);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
|
||||
{
|
||||
int version;
|
||||
@@ -995,6 +1054,10 @@ static inline u64 get_kernel_ns(void)
|
||||
return timespec_to_ns(&ts);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
|
||||
unsigned long max_tsc_khz;
|
||||
|
||||
@@ -1046,12 +1109,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
|
||||
return tsc;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
bool vcpus_matched;
|
||||
bool do_request = false;
|
||||
struct kvm_arch *ka = &vcpu->kvm->arch;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&vcpu->kvm->online_vcpus));
|
||||
|
||||
if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
|
||||
if (!ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (!vcpus_matched && ka->use_master_clock)
|
||||
do_request = 1;
|
||||
|
||||
if (do_request)
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
|
||||
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
|
||||
atomic_read(&vcpu->kvm->online_vcpus),
|
||||
ka->use_master_clock, gtod->clock.vclock_mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
|
||||
{
|
||||
u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
|
||||
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
|
||||
}
|
||||
|
||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u64 offset, ns, elapsed;
|
||||
unsigned long flags;
|
||||
s64 usdiff;
|
||||
bool matched;
|
||||
u64 data = msr->data;
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
@@ -1094,6 +1192,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
|
||||
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
|
||||
}
|
||||
matched = true;
|
||||
} else {
|
||||
/*
|
||||
* We split periods of matched TSC writes into generations.
|
||||
@@ -1108,6 +1207,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
kvm->arch.cur_tsc_nsec = ns;
|
||||
kvm->arch.cur_tsc_write = data;
|
||||
kvm->arch.cur_tsc_offset = offset;
|
||||
matched = false;
|
||||
pr_debug("kvm: new tsc generation %u, clock %llu\n",
|
||||
kvm->arch.cur_tsc_generation, data);
|
||||
}
|
||||
@@ -1129,26 +1229,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
||||
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
|
||||
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
|
||||
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
|
||||
update_ia32_tsc_adjust_msr(vcpu, offset);
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
|
||||
|
||||
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
if (matched)
|
||||
kvm->arch.nr_vcpus_matched_tsc++;
|
||||
else
|
||||
kvm->arch.nr_vcpus_matched_tsc = 0;
|
||||
|
||||
kvm_track_tsc_matching(vcpu);
|
||||
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(kvm_write_tsc);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static cycle_t read_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)vget_cycles();
|
||||
|
||||
last = pvclock_gtod_data.clock.cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* GCC likes to generate cmov here, but this branch is extremely
|
||||
* predictable (it's just a funciton of time and the likely is
|
||||
* very likely) and there's a data dependence, so force GCC
|
||||
* to generate a branch instead. I don't barrier() because
|
||||
* we don't actually need a barrier, and if this function
|
||||
* ever gets inlined it will generate worse code.
|
||||
*/
|
||||
asm volatile ("");
|
||||
return last;
|
||||
}
|
||||
|
||||
static inline u64 vgettsc(cycle_t *cycle_now)
|
||||
{
|
||||
long v;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
*cycle_now = read_tsc();
|
||||
|
||||
v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||
return v * gtod->clock.mult;
|
||||
}
|
||||
|
||||
static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
|
||||
ts->tv_nsec = 0;
|
||||
do {
|
||||
seq = read_seqcount_begin(>od->seq);
|
||||
mode = gtod->clock.vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgettsc(cycle_now);
|
||||
ns >>= gtod->clock.shift;
|
||||
} while (unlikely(read_seqcount_retry(>od->seq, seq)));
|
||||
timespec_add_ns(ts, ns);
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
/* returns true if host is using tsc clocksource */
|
||||
static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
/* checked again under seqlock below */
|
||||
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
|
||||
return false;
|
||||
|
||||
monotonic_to_bootbased(&ts);
|
||||
*kernel_ns = timespec_to_ns(&ts);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Assuming a stable TSC across physical CPUS, and a stable TSC
|
||||
* across virtual CPUs, the following condition is possible.
|
||||
* Each numbered line represents an event visible to both
|
||||
* CPUs at the next numbered event.
|
||||
*
|
||||
* "timespecX" represents host monotonic time. "tscX" represents
|
||||
* RDTSC value.
|
||||
*
|
||||
* VCPU0 on CPU0 | VCPU1 on CPU1
|
||||
*
|
||||
* 1. read timespec0,tsc0
|
||||
* 2. | timespec1 = timespec0 + N
|
||||
* | tsc1 = tsc0 + M
|
||||
* 3. transition to guest | transition to guest
|
||||
* 4. ret0 = timespec0 + (rdtsc - tsc0) |
|
||||
* 5. | ret1 = timespec1 + (rdtsc - tsc1)
|
||||
* | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
*
|
||||
* Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
|
||||
*
|
||||
* - ret0 < ret1
|
||||
* - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
|
||||
* ...
|
||||
* - 0 < N - M => M < N
|
||||
*
|
||||
* That is, when timespec0 != timespec1, M < N. Unfortunately that is not
|
||||
* always the case (the difference between two distinct xtime instances
|
||||
* might be smaller then the difference between corresponding TSC reads,
|
||||
* when updating guest vcpus pvclock areas).
|
||||
*
|
||||
* To avoid that problem, do not allow visibility of distinct
|
||||
* system_timestamp/tsc_timestamp values simultaneously: use a master
|
||||
* copy of host monotonic time values. Update that master copy
|
||||
* in lockstep.
|
||||
*
|
||||
* Rely on synchronization of host TSCs and guest TSCs for monotonicity.
|
||||
*
|
||||
*/
|
||||
|
||||
static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
int vclock_mode;
|
||||
bool host_tsc_clocksource, vcpus_matched;
|
||||
|
||||
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
|
||||
atomic_read(&kvm->online_vcpus));
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
host_tsc_clocksource = kvm_get_time_and_clockread(
|
||||
&ka->master_kernel_ns,
|
||||
&ka->master_cycle_now);
|
||||
|
||||
ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
|
||||
|
||||
if (ka->use_master_clock)
|
||||
atomic_set(&kvm_guest_has_master_clock, 1);
|
||||
|
||||
vclock_mode = pvclock_gtod_data.clock.vclock_mode;
|
||||
trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
|
||||
vcpus_matched);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, this_tsc_khz;
|
||||
struct kvm_vcpu_arch *vcpu = &v->arch;
|
||||
struct kvm_arch *ka = &v->kvm->arch;
|
||||
void *shared_kaddr;
|
||||
unsigned long this_tsc_khz;
|
||||
s64 kernel_ns, max_kernel_ns;
|
||||
u64 tsc_timestamp;
|
||||
u64 tsc_timestamp, host_tsc;
|
||||
struct pvclock_vcpu_time_info *guest_hv_clock;
|
||||
u8 pvclock_flags;
|
||||
bool use_master_clock;
|
||||
|
||||
kernel_ns = 0;
|
||||
host_tsc = 0;
|
||||
|
||||
/* Keep irq disabled to prevent changes to the clock */
|
||||
local_irq_save(flags);
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
|
||||
kernel_ns = get_kernel_ns();
|
||||
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
|
||||
if (unlikely(this_tsc_khz == 0)) {
|
||||
local_irq_restore(flags);
|
||||
@@ -1156,6 +1425,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the host uses TSC clock, then passthrough TSC as stable
|
||||
* to the guest.
|
||||
*/
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
use_master_clock = ka->use_master_clock;
|
||||
if (use_master_clock) {
|
||||
host_tsc = ka->master_cycle_now;
|
||||
kernel_ns = ka->master_kernel_ns;
|
||||
}
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
if (!use_master_clock) {
|
||||
host_tsc = native_read_tsc();
|
||||
kernel_ns = get_kernel_ns();
|
||||
}
|
||||
|
||||
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
|
||||
|
||||
/*
|
||||
* We may have to catch up the TSC to match elapsed wall clock
|
||||
* time for two reasons, even if kvmclock is used.
|
||||
@@ -1217,23 +1504,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
vcpu->hw_tsc_khz = this_tsc_khz;
|
||||
}
|
||||
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
|
||||
/* with a master <monotonic time, tsc value> tuple,
|
||||
* pvclock clock reads always increase at the (scaled) rate
|
||||
* of guest TSC - no need to deal with sampling errors.
|
||||
*/
|
||||
if (!use_master_clock) {
|
||||
if (max_kernel_ns > kernel_ns)
|
||||
kernel_ns = max_kernel_ns;
|
||||
}
|
||||
/* With all the info we got, fill in the values */
|
||||
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
||||
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
||||
vcpu->last_kernel_ns = kernel_ns;
|
||||
vcpu->last_guest_tsc = tsc_timestamp;
|
||||
|
||||
pvclock_flags = 0;
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
/*
|
||||
* The interface expects us to write an even number signaling that the
|
||||
* update is finished. Since the guest won't see the intermediate
|
||||
@@ -1243,6 +1527,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
||||
|
||||
shared_kaddr = kmap_atomic(vcpu->time_page);
|
||||
|
||||
guest_hv_clock = shared_kaddr + vcpu->time_offset;
|
||||
|
||||
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
|
||||
pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
|
||||
|
||||
if (vcpu->pvclock_set_guest_stopped_request) {
|
||||
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
||||
vcpu->pvclock_set_guest_stopped_request = false;
|
||||
}
|
||||
|
||||
/* If the host uses TSC clocksource, then it is stable */
|
||||
if (use_master_clock)
|
||||
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
|
||||
|
||||
vcpu->hv_clock.flags = pvclock_flags;
|
||||
|
||||
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
|
||||
sizeof(vcpu->hv_clock));
|
||||
|
||||
@@ -1572,9 +1872,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
||||
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
|
||||
}
|
||||
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
bool pr = false;
|
||||
u32 msr = msr_info->index;
|
||||
u64 data = msr_info->data;
|
||||
|
||||
switch (msr) {
|
||||
case MSR_EFER:
|
||||
@@ -1625,6 +1927,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
kvm_set_lapic_tscdeadline_msr(vcpu, data);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
if (guest_cpuid_has_tsc_adjust(vcpu)) {
|
||||
if (!msr_info->host_initiated) {
|
||||
u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
|
||||
kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
|
||||
}
|
||||
vcpu->arch.ia32_tsc_adjust_msr = data;
|
||||
}
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
vcpu->arch.ia32_misc_enable_msr = data;
|
||||
break;
|
||||
@@ -1984,6 +2295,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
case MSR_IA32_TSCDEADLINE:
|
||||
data = kvm_get_lapic_tscdeadline_msr(vcpu);
|
||||
break;
|
||||
case MSR_IA32_TSC_ADJUST:
|
||||
data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
|
||||
break;
|
||||
case MSR_IA32_MISC_ENABLE:
|
||||
data = vcpu->arch.ia32_misc_enable_msr;
|
||||
break;
|
||||
@@ -2342,7 +2656,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
kvm_x86_ops->write_tsc_offset(vcpu, offset);
|
||||
vcpu->arch.tsc_catchup = 1;
|
||||
}
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
/*
|
||||
* On a host with synchronized TSC, there is no need to update
|
||||
* kvmclock on vcpu->cpu migration
|
||||
*/
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
if (vcpu->cpu != cpu)
|
||||
kvm_migrate_timers(vcpu);
|
||||
vcpu->cpu = cpu;
|
||||
@@ -2691,15 +3010,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (!vcpu->arch.apic)
|
||||
goto out;
|
||||
u.lapic = memdup_user(argp, sizeof(*u.lapic));
|
||||
if (IS_ERR(u.lapic)) {
|
||||
r = PTR_ERR(u.lapic);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.lapic))
|
||||
return PTR_ERR(u.lapic);
|
||||
|
||||
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_INTERRUPT: {
|
||||
@@ -2709,16 +3023,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&irq, argp, sizeof irq))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_NMI: {
|
||||
r = kvm_vcpu_ioctl_nmi(vcpu);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID: {
|
||||
@@ -2729,8 +3037,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_CPUID2: {
|
||||
@@ -2742,8 +3048,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
goto out;
|
||||
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
|
||||
cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_CPUID2: {
|
||||
@@ -2875,10 +3179,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XSAVE: {
|
||||
u.xsave = memdup_user(argp, sizeof(*u.xsave));
|
||||
if (IS_ERR(u.xsave)) {
|
||||
r = PTR_ERR(u.xsave);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xsave))
|
||||
return PTR_ERR(u.xsave);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
|
||||
break;
|
||||
@@ -2900,10 +3202,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
}
|
||||
case KVM_SET_XCRS: {
|
||||
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
|
||||
if (IS_ERR(u.xcrs)) {
|
||||
r = PTR_ERR(u.xcrs);
|
||||
goto out;
|
||||
}
|
||||
if (IS_ERR(u.xcrs))
|
||||
return PTR_ERR(u.xcrs);
|
||||
|
||||
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
|
||||
break;
|
||||
@@ -2951,7 +3251,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
|
||||
int ret;
|
||||
|
||||
if (addr > (unsigned int)(-3 * PAGE_SIZE))
|
||||
return -1;
|
||||
return -EINVAL;
|
||||
ret = kvm_x86_ops->set_tss_addr(kvm, addr);
|
||||
return ret;
|
||||
}
|
||||
@@ -3212,8 +3512,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
switch (ioctl) {
|
||||
case KVM_SET_TSS_ADDR:
|
||||
r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_SET_IDENTITY_MAP_ADDR: {
|
||||
u64 ident_addr;
|
||||
@@ -3222,14 +3520,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
|
||||
if (r < 0)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
case KVM_GET_NR_MMU_PAGES:
|
||||
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
|
||||
@@ -3320,8 +3614,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
get_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_SET_IRQCHIP: {
|
||||
@@ -3343,8 +3635,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = 0;
|
||||
set_irqchip_out:
|
||||
kfree(chip);
|
||||
if (r)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT: {
|
||||
@@ -3371,9 +3661,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_GET_PIT2: {
|
||||
@@ -3397,9 +3684,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (!kvm->arch.vpit)
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_REINJECT_CONTROL: {
|
||||
@@ -3408,9 +3692,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
if (copy_from_user(&control, argp, sizeof(control)))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_reinject(kvm, &control);
|
||||
if (r)
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_XEN_HVM_CONFIG: {
|
||||
@@ -4273,7 +4554,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
|
||||
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
|
||||
u32 msr_index, u64 data)
|
||||
{
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
|
||||
struct msr_data msr;
|
||||
|
||||
msr.data = data;
|
||||
msr.index = msr_index;
|
||||
msr.host_initiated = false;
|
||||
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
|
||||
}
|
||||
|
||||
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
|
||||
@@ -4495,7 +4781,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
* instruction -> ...
|
||||
*/
|
||||
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
|
||||
if (!is_error_pfn(pfn)) {
|
||||
if (!is_error_noslot_pfn(pfn)) {
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return true;
|
||||
}
|
||||
@@ -4881,6 +5167,50 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
kvm_mmu_set_mmio_spte_mask(mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void pvclock_gtod_update_fn(struct work_struct *work)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
raw_spin_lock(&kvm_lock);
|
||||
list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
|
||||
atomic_set(&kvm_guest_has_master_clock, 0);
|
||||
raw_spin_unlock(&kvm_lock);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
|
||||
|
||||
/*
|
||||
* Notification about pvclock gtod data update.
|
||||
*/
|
||||
static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
|
||||
void *priv)
|
||||
{
|
||||
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
|
||||
struct timekeeper *tk = priv;
|
||||
|
||||
update_pvclock_gtod(tk);
|
||||
|
||||
/* disable master clock if host does not trust, or does not
|
||||
* use, TSC clocksource
|
||||
*/
|
||||
if (gtod->clock.vclock_mode != VCLOCK_TSC &&
|
||||
atomic_read(&kvm_guest_has_master_clock) != 0)
|
||||
queue_work(system_long_wq, &pvclock_gtod_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pvclock_gtod_notifier = {
|
||||
.notifier_call = pvclock_gtod_notify,
|
||||
};
|
||||
#endif
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int r;
|
||||
@@ -4922,6 +5252,10 @@ int kvm_arch_init(void *opaque)
|
||||
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
|
||||
kvm_lapic_init();
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
@@ -4936,6 +5270,9 @@ void kvm_arch_exit(void)
|
||||
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
|
||||
CPUFREQ_TRANSITION_NOTIFIER);
|
||||
unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
|
||||
#ifdef CONFIG_X86_64
|
||||
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
|
||||
#endif
|
||||
kvm_x86_ops = NULL;
|
||||
kvm_mmu_module_exit();
|
||||
}
|
||||
@@ -5059,7 +5396,7 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
|
||||
|
||||
int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
char instruction[3];
|
||||
@@ -5235,6 +5572,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
}
|
||||
|
||||
static void kvm_gen_update_masterclock(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
/* no guest entries from this point */
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
|
||||
|
||||
/* guest entries allowed */
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
|
||||
|
||||
spin_unlock(&ka->pvclock_gtod_sync_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@@ -5247,6 +5607,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
kvm_mmu_unload(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
|
||||
__kvm_migrate_timers(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
|
||||
kvm_gen_update_masterclock(vcpu->kvm);
|
||||
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
|
||||
r = kvm_guest_time_update(vcpu);
|
||||
if (unlikely(r))
|
||||
@@ -5362,7 +5724,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
|
||||
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
|
||||
native_read_tsc());
|
||||
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
smp_wmb();
|
||||
@@ -5419,7 +5782,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
|
||||
pr_debug("vcpu %d received sipi with vector # %x\n",
|
||||
vcpu->vcpu_id, vcpu->arch.sipi_vector);
|
||||
kvm_lapic_reset(vcpu);
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
@@ -6047,7 +6410,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
r = kvm_arch_vcpu_reset(vcpu);
|
||||
r = kvm_vcpu_reset(vcpu);
|
||||
if (r == 0)
|
||||
r = kvm_mmu_setup(vcpu);
|
||||
vcpu_put(vcpu);
|
||||
@@ -6055,6 +6418,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
struct msr_data msr;
|
||||
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
msr.data = 0x0;
|
||||
msr.index = MSR_IA32_TSC;
|
||||
msr.host_initiated = true;
|
||||
kvm_write_tsc(vcpu, &msr);
|
||||
vcpu_put(vcpu);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
@@ -6069,7 +6449,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_x86_ops->vcpu_free(vcpu);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
atomic_set(&vcpu->arch.nmi_queued, 0);
|
||||
vcpu->arch.nmi_pending = 0;
|
||||
@@ -6092,6 +6472,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_pmu_reset(vcpu);
|
||||
|
||||
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
|
||||
vcpu->arch.regs_avail = ~0;
|
||||
vcpu->arch.regs_dirty = ~0;
|
||||
|
||||
return kvm_x86_ops->vcpu_reset(vcpu);
|
||||
}
|
||||
|
||||
@@ -6168,6 +6552,8 @@ int kvm_arch_hardware_enable(void *garbage)
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
vcpu->arch.tsc_offset_adjustment += delta_cyc;
|
||||
vcpu->arch.last_host_tsc = local_tsc;
|
||||
set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
|
||||
&vcpu->requests);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6258,10 +6644,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
|
||||
goto fail_free_mce_banks;
|
||||
|
||||
r = fx_init(vcpu);
|
||||
if (r)
|
||||
goto fail_free_wbinvd_dirty_mask;
|
||||
|
||||
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
|
||||
kvm_async_pf_hash_reset(vcpu);
|
||||
kvm_pmu_init(vcpu);
|
||||
|
||||
return 0;
|
||||
fail_free_wbinvd_dirty_mask:
|
||||
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
|
||||
fail_free_mce_banks:
|
||||
kfree(vcpu->arch.mce_banks);
|
||||
fail_free_lapic:
|
||||
@@ -6305,6 +6698,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
|
||||
|
||||
pvclock_update_vm_gtod_copy(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user