Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull first set of KVM updates from Paolo Bonzini:
 "PPC:
   - minor code cleanups

  x86:
   - PCID emulation and CR3 caching for shadow page tables
   - nested VMX live migration
   - nested VMCS shadowing
   - optimized IPI hypercall
   - some optimizations

  ARM will come next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
  kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
  KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
  KVM: X86: Implement PV IPIs in linux guest
  KVM: X86: Add kvm hypervisor init time platform setup callback
  KVM: X86: Implement "send IPI" hypercall
  KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
  KVM: x86: Skip pae_root shadow allocation if tdp enabled
  KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
  KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
  KVM: vmx: move struct host_state usage to struct loaded_vmcs
  KVM: vmx: compute need to reload FS/GS/LDT on demand
  KVM: nVMX: remove a misleading comment regarding vmcs02 fields
  KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
  KVM: vmx: add dedicated utility to access guest's kernel_gs_base
  KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
  KVM: vmx: refactor segmentation code in vmx_save_host_state()
  kvm: nVMX: Fix fault priority for VMX operations
  kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
  ...
This commit is contained in:
Linus Torvalds
2018-08-19 10:38:36 -07:00
53 changed files with 3115 additions and 731 deletions

View File

@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
bool skip_tlb_flush = false;
#ifdef CONFIG_X86_64
bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
if (pcid_enabled)
cr3 &= ~CR3_PCID_INVD;
if (pcid_enabled) {
skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
cr3 &= ~X86_CR3_PCID_NOFLUSH;
}
#endif
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
if (!skip_tlb_flush) {
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
return 0;
}
@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
kvm_mmu_new_cr3(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.mcg_status = data;
break;
case MSR_IA32_MCG_CTL:
if (!(mcg_cap & MCG_CTL_P))
if (!(mcg_cap & MCG_CTL_P) &&
(data || !msr_info->host_initiated))
return 1;
if (data != 0 && data != ~(u64)0)
return -1;
return 1;
vcpu->arch.mcg_ctl = data;
break;
default:
@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
}
EXPORT_SYMBOL_GPL(kvm_get_msr);
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{
u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = vcpu->arch.mcg_cap;
break;
case MSR_IA32_MCG_CTL:
if (!(mcg_cap & MCG_CTL_P))
if (!(mcg_cap & MCG_CTL_P) && !host)
return 1;
data = vcpu->arch.mcg_ctl;
break;
@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated);
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_TSC_EMULATION_CONTROL:
case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data);
msr_info->index, &msr_info->data,
msr_info->host_initiated);
break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X2APIC_API:
r = KVM_X2APIC_API_VALID_FLAGS;
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
break;
default:
break;
}
@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
case KVM_GET_NESTED_STATE: {
struct kvm_nested_state __user *user_kvm_nested_state = argp;
u32 user_data_size;
r = -EINVAL;
if (!kvm_x86_ops->get_nested_state)
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
if (get_user(user_data_size, &user_kvm_nested_state->size))
return -EFAULT;
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
user_data_size);
if (r < 0)
return r;
if (r > user_data_size) {
if (put_user(r, &user_kvm_nested_state->size))
return -EFAULT;
return -E2BIG;
}
r = 0;
break;
}
case KVM_SET_NESTED_STATE: {
struct kvm_nested_state __user *user_kvm_nested_state = argp;
struct kvm_nested_state kvm_state;
r = -EINVAL;
if (!kvm_x86_ops->set_nested_state)
break;
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
return -EFAULT;
if (kvm_state.size < sizeof(kvm_state))
return -EINVAL;
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
return -EINVAL;
/* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
return -EINVAL;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break;
}
default:
r = -EINVAL;
}
@@ -6503,8 +6566,12 @@ static void kvm_set_mmio_spte_mask(void)
* Set the reserved bits and the present bit of an paging-structure
* entry to generate page fault with PFER.RSV = 1.
*/
/* Mask the reserved physical address bits. */
mask = rsvd_bits(maxphyaddr, 51);
/*
* Mask the uppermost physical address bit, which would be reserved as
* long as the supported physical address width is less than 52.
*/
mask = 1ull << 51;
/* Set the present bit. */
mask |= 1ull;
@@ -6769,6 +6836,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_CLOCK_PAIRING:
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
#endif
default:
ret = -KVM_ENOSYS;
@@ -7287,6 +7357,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
kvm_x86_ops->get_vmcs12_pages(vcpu);
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -7302,6 +7374,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
kvm_mmu_load_cr3(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -8013,6 +8087,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
return -EINVAL;
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
* When EFER.LME and CR0.PG are set, the processor is in
@@ -8043,10 +8121,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
struct desc_ptr dt;
int ret = -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
goto out;
if (kvm_valid_sregs(vcpu, sregs))
goto out;