Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "s390:
   - ioctl hardening
   - selftests

  ARM:
   - ITS translation cache
   - support for 512 vCPUs
   - various cleanups and bugfixes

  PPC:
   - various minor fixes and preparation

  x86:
   - bugfixes all over the place (posted interrupts, SVM, emulation
     corner cases, blocked INIT)
   - some IPI optimizations"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (75 commits)
  KVM: X86: Use IPI shorthands in kvm guest when support
  KVM: x86: Fix INIT signal handling in various CPU states
  KVM: VMX: Introduce exit reason for receiving INIT signal on guest-mode
  KVM: VMX: Stop the preemption timer during vCPU reset
  KVM: LAPIC: Micro optimize IPI latency
  kvm: Nested KVM MMUs need PAE root too
  KVM: x86: set ctxt->have_exception in x86_decode_insn()
  KVM: x86: always stop emulation on page fault
  KVM: nVMX: trace nested VM-Enter failures detected by H/W
  KVM: nVMX: add tracepoint for failed nested VM-Enter
  x86: KVM: svm: Fix a check in nested_svm_vmrun()
  KVM: x86: Return to userspace with internal error on unexpected exit reason
  KVM: x86: Add kvm_emulate_{rd,wr}msr() to consolidate VXM/SVM code
  KVM: x86: Refactor up kvm_{g,s}et_msr() to simplify callers
  doc: kvm: Fix return description of KVM_SET_MSRS
  KVM: X86: Tune PLE Window tracepoint
  KVM: VMX: Change ple_window type to unsigned int
  KVM: X86: Remove tailing newline for tracepoints
  KVM: X86: Trace vcpu_id for vmexit
  KVM: x86: Manually calculate reserved bits when loading PDPTRS
  ...
This commit is contained in:
Linus Torvalds
2019-09-18 09:49:13 -07:00
63 changed files with 1701 additions and 804 deletions

View File

@@ -68,10 +68,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_FEATURE_SVML (1 << 2)
#define SVM_FEATURE_NRIP (1 << 3)
#define SVM_FEATURE_TSC_RATE (1 << 4)
#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
#define SVM_FEATURE_FLUSH_ASID (1 << 6)
@@ -770,7 +768,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
}
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -779,18 +777,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
svm->next_rip = svm->vmcb->control.next_rip;
}
if (!svm->next_rip) {
if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
}
if (!svm->next_rip)
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
__func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
svm_set_interrupt_shadow(vcpu, 0);
return EMULATE_DONE;
}
static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -821,7 +818,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
* raises a fault that is not intercepted. Still better than
* failing in all cases.
*/
skip_emulated_instruction(&svm->vcpu);
(void)skip_emulated_instruction(&svm->vcpu);
rip = kvm_rip_read(&svm->vcpu);
svm->int3_rip = rip + svm->vmcb->save.cs.base;
svm->int3_injected = rip - old_rip;
@@ -1269,11 +1266,11 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count_grow,
pause_filter_count_max);
if (control->pause_filter_count != old)
if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_grow(vcpu->vcpu_id,
control->pause_filter_count, old);
trace_kvm_ple_window_update(vcpu->vcpu_id,
control->pause_filter_count, old);
}
}
static void shrink_ple_window(struct kvm_vcpu *vcpu)
@@ -1287,11 +1284,11 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count,
pause_filter_count_shrink,
pause_filter_count);
if (control->pause_filter_count != old)
if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_shrink(vcpu->vcpu_id,
control->pause_filter_count, old);
trace_kvm_ple_window_update(vcpu->vcpu_id,
control->pause_filter_count, old);
}
}
static __init int svm_hardware_setup(void)
@@ -2136,6 +2133,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct page *nested_msrpm_pages;
int err;
BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
"struct kvm_vcpu must be at offset 0 for arch usercopy region");
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
if (!svm) {
err = -ENOMEM;
@@ -2903,13 +2903,11 @@ static int nop_on_interception(struct vcpu_svm *svm)
static int halt_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
return kvm_emulate_halt(&svm->vcpu);
}
static int vmmcall_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_emulate_hypercall(&svm->vcpu);
}
@@ -3588,9 +3586,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
mark_all_dirty(svm->vmcb);
}
static bool nested_svm_vmrun(struct vcpu_svm *svm)
static int nested_svm_vmrun(struct vcpu_svm *svm)
{
int rc;
int ret;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
@@ -3599,13 +3597,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
vmcb_gpa = svm->vmcb->save.rax;
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
return false;
ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (ret == -EINVAL) {
kvm_inject_gp(&svm->vcpu, 0);
return 1;
} else if (ret) {
return kvm_skip_emulated_instruction(&svm->vcpu);
}
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
@@ -3616,7 +3617,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
kvm_vcpu_unmap(&svm->vcpu, &map, true);
return false;
return ret;
}
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -3660,7 +3661,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
return true;
if (!nested_svm_vmrun_msrpm(svm)) {
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
nested_svm_vmexit(svm);
}
return ret;
}
static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@ -3697,7 +3707,6 @@ static int vmload_interception(struct vcpu_svm *svm)
nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
@@ -3724,7 +3733,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
@@ -3738,27 +3746,7 @@ static int vmrun_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
/* Save rip after vmrun instruction */
kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
if (!nested_svm_vmrun(svm))
return 1;
if (!nested_svm_vmrun_msrpm(svm))
goto failed;
return 1;
failed:
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
nested_svm_vmexit(svm);
return 1;
return nested_svm_vmrun(svm);
}
static int stgi_interception(struct vcpu_svm *svm)
@@ -3775,7 +3763,6 @@ static int stgi_interception(struct vcpu_svm *svm)
if (vgif_enabled(svm))
clr_intercept(svm, INTERCEPT_STGI);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@@ -3791,7 +3778,6 @@ static int clgi_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
disable_gif(svm);
@@ -3816,7 +3802,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -3839,7 +3824,6 @@ static int xsetbv_interception(struct vcpu_svm *svm)
u32 index = kvm_rcx_read(&svm->vcpu);
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -3898,25 +3882,29 @@ static int task_switch_interception(struct vcpu_svm *svm)
if (reason != TASK_SWITCH_GATE ||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
skip_emulated_instruction(&svm->vcpu);
(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
goto fail;
}
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
int_vec = -1;
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
has_error_code, error_code) == EMULATE_FAIL) {
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
svm->vcpu.run->internal.ndata = 0;
return 0;
}
has_error_code, error_code) == EMULATE_FAIL)
goto fail;
return 1;
fail:
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
svm->vcpu.run->internal.ndata = 0;
return 0;
}
static int cpuid_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_emulate_cpuid(&svm->vcpu);
}
@@ -4232,23 +4220,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int rdmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = kvm_rcx_read(&svm->vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
msr_info.host_initiated = false;
if (svm_get_msr(&svm->vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
return 1;
} else {
trace_kvm_msr_read(ecx, msr_info.data);
kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
return kvm_emulate_rdmsr(&svm->vcpu);
}
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -4438,23 +4410,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
struct msr_data msr;
u32 ecx = kvm_rcx_read(&svm->vcpu);
u64 data = kvm_read_edx_eax(&svm->vcpu);
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
return 1;
} else {
trace_kvm_msr_write(ecx, data);
return kvm_skip_emulated_instruction(&svm->vcpu);
}
return kvm_emulate_wrmsr(&svm->vcpu);
}
static int msr_interception(struct vcpu_svm *svm)
@@ -5025,9 +4981,14 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
dump_vmcb(vcpu);
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
vcpu->run->internal.ndata = 1;
vcpu->run->internal.data[0] = exit_code;
return 0;
}
return svm_exit_handlers[exit_code](svm);
@@ -5274,7 +5235,8 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
!kvm_irq_is_postable(&irq)) {
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
__func__, irq.vector);
return -1;
@@ -5328,6 +5290,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* 1. When cannot target interrupt to a specific vcpu.
* 2. Unsetting posted interrupt.
* 3. APIC virtialization is disabled for the vcpu.
* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
*/
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
kvm_vcpu_apicv_active(&svm->vcpu)) {
@@ -5933,6 +5896,8 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
}
#define F(x) bit(X86_FEATURE_##x)
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
@@ -5944,6 +5909,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
if (nested)
entry->ecx |= (1 << 2); /* Set SVM bit */
break;
case 0x80000008:
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->ebx |= F(VIRT_SSBD);
break;
case 0x8000000A:
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
@@ -5954,11 +5924,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
/* Support next_rip if host supports it */
if (boot_cpu_has(X86_FEATURE_NRIPS))
entry->edx |= SVM_FEATURE_NRIP;
entry->edx |= F(NRIPS);
/* Support NPT for the guest if enabled */
if (npt_enabled)
entry->edx |= SVM_FEATURE_NPT;
entry->edx |= F(NPT);
break;
case 0x8000001F:
@@ -6067,6 +6037,7 @@ static const struct __x86_intercept {
[x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
};
#undef PRE_EX
@@ -7193,6 +7164,21 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
return false;
}
static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
/*
* TODO: Last condition latch INIT signals on vCPU when
* vCPU is in guest-mode and vmcb12 defines intercept on INIT.
* To properly emulate the INIT intercept, SVM should implement
* kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
* there if an INIT signal is pending.
*/
return !gif_set(svm) ||
(svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
}
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7329,6 +7315,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.nested_get_evmcs_version = NULL,
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
};
static int __init svm_init(void)