Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "One of the largest releases for KVM...  Hardly any generic
  changes, but lots of architecture-specific updates.

  ARM:
   - VHE support so that we can run the kernel at EL2 on ARMv8.1 systems
   - PMU support for guests
   - 32bit world switch rewritten in C
   - various optimizations to the vgic save/restore code.

  PPC:
   - enabled KVM-VFIO integration ("VFIO device")
   - optimizations to speed up IPIs between vcpus
   - in-kernel handling of IOMMU hypercalls
   - support for dynamic DMA windows (DDW).

  s390:
   - provide the floating point registers via sync regs;
   - separated instruction vs.  data accesses
   - dirty log improvements for huge guests
   - bugfixes and documentation improvements.

  x86:
   - Hyper-V VMBus hypercall userspace exit
   - alternative implementation of lowest-priority interrupts using
     vector hashing (for better VT-d posted interrupt support)
   - fixed guest debugging with nested virtualizations
   - improved interrupt tracking in the in-kernel IOAPIC
   - generic infrastructure for tracking writes to guest
     memory - currently its only use is to speedup the legacy shadow
     paging (pre-EPT) case, but in the future it will be used for
     virtual GPUs as well
   - much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits)
  KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch
  KVM: x86: disable MPX if host did not enable MPX XSAVE features
  arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit
  arm64: KVM: vgic-v3: Reset LRs at boot time
  arm64: KVM: vgic-v3: Do not save an LR known to be empty
  arm64: KVM: vgic-v3: Save maintenance interrupt state only if required
  arm64: KVM: vgic-v3: Avoid accessing ICH registers
  KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit
  KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit
  KVM: arm/arm64: vgic-v2: Reset LRs at boot time
  KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty
  KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function
  KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required
  KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers
  KVM: s390: allocate only one DMA page per VM
  KVM: s390: enable STFLE interpretation only if enabled for the guest
  KVM: s390: wake up when the VCPU cpu timer expires
  KVM: s390: step the VCPU timer while in enabled wait
  KVM: s390: protect VCPU cpu timer with a seqcount
  KVM: s390: step VCPU cpu timer during kvm_run ioctl
  ...
This commit is contained in:
Linus Torvalds
2016-03-16 09:55:35 -07:00
142 changed files with 6754 additions and 2936 deletions

View File

@@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/kvm.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL);
kvm_timer_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* non-preemptible context.
*/
preempt_disable();
kvm_pmu_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);
kvm_vgic_flush_hwstate(vcpu);
@@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
vcpu->arch.power_off || vcpu->arch.pause) {
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
@@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We must sync the timer state before the vgic state so that
* the vgic can properly sample the updated state of the
* We must sync the PMU and timer state before the vgic state so
* that the vgic can properly sample the updated state of the
* interrupt line.
*/
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
@@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return 0;
}
static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
break;
}
return ret;
}
static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
break;
}
return ret;
}
static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
break;
}
return ret;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
@@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
return -E2BIG;
return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
}
case KVM_SET_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_set_attr(vcpu, &attr);
}
case KVM_GET_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_get_attr(vcpu, &attr);
}
case KVM_HAS_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_has_attr(vcpu, &attr);
}
default:
return -EINVAL;
}
@@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
static void cpu_init_stage2(void *dummy)
{
__cpu_init_stage2();
}
static void cpu_init_hyp_mode(void *dummy)
{
phys_addr_t boot_pgd_ptr;
@@ -985,6 +1054,7 @@ static void cpu_init_hyp_mode(void *dummy)
vector_ptr = (unsigned long)__kvm_hyp_vector;
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
kvm_arm_init_debug();
}
@@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void)
}
#endif
static void teardown_common_resources(void)
{
free_percpu(kvm_host_cpu_state);
}
static int init_common_resources(void)
{
kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
if (!kvm_host_cpu_state) {
kvm_err("Cannot allocate host CPU state\n");
return -ENOMEM;
}
return 0;
}
static int init_subsystems(void)
{
int err;
/*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
switch (err) {
case 0:
vgic_present = true;
break;
case -ENODEV:
case -ENXIO:
vgic_present = false;
break;
default:
return err;
}
/*
* Init HYP architected timer support
*/
err = kvm_timer_hyp_init();
if (err)
return err;
kvm_perf_init();
kvm_coproc_table_init();
return 0;
}
static void teardown_hyp_mode(void)
{
int cpu;
if (is_kernel_in_hyp_mode())
return;
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
}
static int init_vhe_mode(void)
{
/*
* Execute the init code on each CPU.
*/
on_each_cpu(cpu_init_stage2, NULL, 1);
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
kvm_info("VHE mode initialized successfully\n");
return 0;
}
/**
* Inits Hyp-mode on all online CPUs
*/
@@ -1065,7 +1211,7 @@ static int init_hyp_mode(void)
stack_page = __get_free_page(GFP_KERNEL);
if (!stack_page) {
err = -ENOMEM;
goto out_free_stack_pages;
goto out_err;
}
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
@@ -1074,16 +1220,16 @@ static int init_hyp_mode(void)
/*
* Map the Hyp-code called directly from the host
*/
err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
err = create_hyp_mappings(__hyp_text_start, __hyp_text_end);
if (err) {
kvm_err("Cannot map world-switch code\n");
goto out_free_mappings;
goto out_err;
}
err = create_hyp_mappings(__start_rodata, __end_rodata);
if (err) {
kvm_err("Cannot map rodata section\n");
goto out_free_mappings;
goto out_err;
}
/*
@@ -1095,20 +1241,10 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_free_mappings;
goto out_err;
}
}
/*
* Map the host CPU structures
*/
kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
if (!kvm_host_cpu_state) {
err = -ENOMEM;
kvm_err("Cannot allocate host CPU state\n");
goto out_free_mappings;
}
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;
@@ -1117,7 +1253,7 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map host CPU state: %d\n", err);
goto out_free_context;
goto out_err;
}
}
@@ -1126,34 +1262,22 @@ static int init_hyp_mode(void)
*/
on_each_cpu(cpu_init_hyp_mode, NULL, 1);
/*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
switch (err) {
case 0:
vgic_present = true;
break;
case -ENODEV:
case -ENXIO:
vgic_present = false;
break;
default:
goto out_free_context;
}
/*
* Init HYP architected timer support
*/
err = kvm_timer_hyp_init();
if (err)
goto out_free_context;
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif
kvm_perf_init();
cpu_notifier_register_begin();
err = __register_cpu_notifier(&hyp_init_cpu_nb);
cpu_notifier_register_done();
if (err) {
kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
goto out_err;
}
hyp_cpu_pm_init();
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
@@ -1162,14 +1286,9 @@ static int init_hyp_mode(void)
kvm_info("Hyp mode initialized successfully\n");
return 0;
out_free_context:
free_percpu(kvm_host_cpu_state);
out_free_mappings:
free_hyp_pgds();
out_free_stack_pages:
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
out_err:
teardown_hyp_mode();
kvm_err("error initializing Hyp mode: %d\n", err);
return err;
}
@@ -1213,26 +1332,27 @@ int kvm_arch_init(void *opaque)
}
}
cpu_notifier_register_begin();
err = init_common_resources();
if (err)
return err;
err = init_hyp_mode();
if (is_kernel_in_hyp_mode())
err = init_vhe_mode();
else
err = init_hyp_mode();
if (err)
goto out_err;
err = __register_cpu_notifier(&hyp_init_cpu_nb);
if (err) {
kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
goto out_err;
}
err = init_subsystems();
if (err)
goto out_hyp;
cpu_notifier_register_done();
hyp_cpu_pm_init();
kvm_coproc_table_init();
return 0;
out_hyp:
teardown_hyp_mode();
out_err:
cpu_notifier_register_done();
teardown_common_resources();
return err;
}