Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "First batch of KVM changes for 4.1

  The most interesting bit here is irqfd/ioeventfd support for ARM and
  ARM64.

  Summary:

  ARM/ARM64:
     fixes for live migration, irqfd and ioeventfd support (enabling
     vhost, too), page aging

  s390:
     interrupt handling rework, allowing to inject all local interrupts
     via new ioctl and to get/set the full local irq state for migration
     and introspection.  New ioctls to access memory by virtual address,
     and to get/set the guest storage keys.  SIMD support.

  MIPS:
     FPU and MIPS SIMD Architecture (MSA) support.  Includes some
     patches from Ralf Baechle's MIPS tree.

  x86:
     bugfixes (notably for pvclock, the others are small) and cleanups.
     Another small latency improvement for the TSC deadline timer"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits)
  KVM: use slowpath for cross page cached accesses
  kvm: mmu: lazy collapse small sptes into large sptes
  KVM: x86: Clear CR2 on VCPU reset
  KVM: x86: DR0-DR3 are not clear on reset
  KVM: x86: BSP in MSR_IA32_APICBASE is writable
  KVM: x86: simplify kvm_apic_map
  KVM: x86: avoid logical_map when it is invalid
  KVM: x86: fix mixed APIC mode broadcast
  KVM: x86: use MDA for interrupt matching
  kvm/ppc/mpic: drop unused IRQ_testbit
  KVM: nVMX: remove unnecessary double caching of MAXPHYADDR
  KVM: nVMX: checks for address bits beyond MAXPHYADDR on VM-entry
  KVM: x86: cache maxphyaddr CPUID leaf in struct kvm_vcpu
  KVM: vmx: pass error code with internal error #2
  x86: vdso: fix pvclock races with task migration
  KVM: remove kvm_read_hva and kvm_read_hva_atomic
  KVM: x86: optimize delivery of TSC deadline timer interrupt
  KVM: x86: extract blocking logic from __vcpu_run
  kvm: x86: fix x86 eflags fixed bit
  KVM: s390: migrate vcpu interrupt state
  ...
This commit is contained in:
Linus Torvalds
2015-04-13 09:47:01 -07:00
99 changed files with 5428 additions and 2084 deletions

View File

@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
{
int i;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
for (i = 0; i < KVM_NR_DB_REGS; i++)
vcpu->arch.eff_db[i] = vcpu->arch.db[i];
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
}
}
static void kvm_update_dr6(struct kvm_vcpu *vcpu)
{
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -3149,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return -EINVAL;
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
kvm_update_dr6(vcpu);
vcpu->arch.dr7 = dbgregs->dr7;
@@ -4114,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
do {
n = min(len, 8);
if (!(vcpu->arch.apic &&
!kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
&& kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
!kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
&& kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
handled += n;
addr += n;
@@ -4134,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
do {
n = min(len, 8);
if (!(vcpu->arch.apic &&
!kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
&& kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
!kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
handled += n;
@@ -4475,7 +4488,8 @@ mmio:
return X86EMUL_CONTINUE;
}
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
void *val, unsigned int bytes,
struct x86_exception *exception,
const struct read_write_emulator_ops *ops)
@@ -4538,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
exception, &read_emultor);
}
int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
const void *val,
unsigned int bytes,
@@ -4629,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
int r;
if (vcpu->arch.pio.in)
r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
return r;
@@ -4705,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4722,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
wbinvd();
return X86EMUL_CONTINUE;
}
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->skip_emulated_instruction(vcpu);
return kvm_emulate_wbinvd_noskip(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
{
kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
}
int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long *dest)
{
return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value)
{
return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5816,7 +5840,7 @@ void kvm_arch_exit(void)
free_percpu(shared_msrs);
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5827,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
return 0;
}
}
EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->skip_emulated_instruction(vcpu);
return kvm_vcpu_halt(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5903,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
lapic_irq.dest_id = apicid;
lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5911,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit, r = 1;
kvm_x86_ops->skip_emulated_instruction(vcpu);
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -6164,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
}
/*
* Returns 1 to let __vcpu_run() continue the guest execution loop without
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
*/
@@ -6301,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
trace_kvm_entry(vcpu->vcpu_id);
@@ -6382,42 +6416,47 @@ out:
return r;
}
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
if (!kvm_arch_vcpu_runnable(vcpu)) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
return 1;
}
static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
case KVM_MP_STATE_INIT_RECEIVED:
break;
default:
return -EINTR;
break;
}
return 1;
}
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
r = 1;
while (r > 0) {
for (;;) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
r = vcpu_enter_guest(vcpu);
else {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
case KVM_MP_STATE_INIT_RECEIVED:
break;
default:
r = -EINTR;
break;
}
}
}
else
r = vcpu_block(kvm, vcpu);
if (r <= 0)
break;
@@ -6429,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
break;
}
kvm_check_async_pf_completion(vcpu);
@@ -6437,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
break;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6568,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
r = __vcpu_run(vcpu);
r = vcpu_run(vcpu);
out:
post_kvm_run_save(vcpu);
@@ -7075,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_clear_exception_queue(vcpu);
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = DR6_INIT;
kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
vcpu->arch.cr2 = 0;
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.apf.msr_val = 0;
vcpu->arch.st.msr_val = 0;
@@ -7240,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7288,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 = 0;
vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
@@ -7428,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
kvm_kvfree(free->arch.rmap[i]);
kvfree(free->arch.rmap[i]);
free->arch.rmap[i] = NULL;
}
if (i == 0)
@@ -7436,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
if (!dont || free->arch.lpage_info[i - 1] !=
dont->arch.lpage_info[i - 1]) {
kvm_kvfree(free->arch.lpage_info[i - 1]);
kvfree(free->arch.lpage_info[i - 1]);
free->arch.lpage_info[i - 1] = NULL;
}
}
@@ -7490,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
kvm_kvfree(slot->arch.rmap[i]);
kvfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL;
if (i == 0)
continue;
kvm_kvfree(slot->arch.lpage_info[i - 1]);
kvfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
return -ENOMEM;
@@ -7617,6 +7663,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
/* It's OK to get 'new' slot here as it has already been installed */
new = id_to_memslot(kvm->memslots, mem->slot);
/*
* Dirty logging tracks sptes in 4k granularity, meaning that large
* sptes have to be split. If live migration is successful, the guest
* in the source machine will be destroyed and large sptes will be
* created in the destination. However, if the guest continues to run
* in the source machine (for example if live migration fails), small
* sptes will remain around and cause bad performance.
*
* Scan sptes if dirty logging has been stopped, dropping those
* which can be collapsed into a single large-page spte. Later
* page faults will create the large-page sptes.
*/
if ((change != KVM_MR_DELETE) &&
(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_zap_collapsible_sptes(kvm, new);
/*
* Set up write protection and/or dirty logging for the new slot.
*