Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "First batch of KVM changes for 4.1

  The most interesting bit here is irqfd/ioeventfd support for ARM and
  ARM64.

  Summary:

  ARM/ARM64:
     fixes for live migration, irqfd and ioeventfd support (enabling
     vhost, too), page aging

  s390:
     interrupt handling rework, allowing to inject all local interrupts
     via new ioctl and to get/set the full local irq state for migration
     and introspection.  New ioctls to access memory by virtual address,
     and to get/set the guest storage keys.  SIMD support.

  MIPS:
     FPU and MIPS SIMD Architecture (MSA) support.  Includes some
     patches from Ralf Baechle's MIPS tree.

  x86:
     bugfixes (notably for pvclock, the others are small) and cleanups.
     Another small latency improvement for the TSC deadline timer"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits)
  KVM: use slowpath for cross page cached accesses
  kvm: mmu: lazy collapse small sptes into large sptes
  KVM: x86: Clear CR2 on VCPU reset
  KVM: x86: DR0-DR3 are not clear on reset
  KVM: x86: BSP in MSR_IA32_APICBASE is writable
  KVM: x86: simplify kvm_apic_map
  KVM: x86: avoid logical_map when it is invalid
  KVM: x86: fix mixed APIC mode broadcast
  KVM: x86: use MDA for interrupt matching
  kvm/ppc/mpic: drop unused IRQ_testbit
  KVM: nVMX: remove unnecessary double caching of MAXPHYADDR
  KVM: nVMX: checks for address bits beyond MAXPHYADDR on VM-entry
  KVM: x86: cache maxphyaddr CPUID leaf in struct kvm_vcpu
  KVM: vmx: pass error code with internal error #2
  x86: vdso: fix pvclock races with task migration
  KVM: remove kvm_read_hva and kvm_read_hva_atomic
  KVM: x86: optimize delivery of TSC deadline timer interrupt
  KVM: x86: extract blocking logic from __vcpu_run
  kvm: x86: fix x86 eflags fixed bit
  KVM: s390: migrate vcpu interrupt state
  ...
This commit is contained in:
Linus Torvalds
2015-04-13 09:47:01 -07:00
99 changed files with 5428 additions and 2084 deletions

View File

@@ -16,7 +16,7 @@
*
*/
#include "iodev.h"
#include <kvm/iodev.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
@@ -66,13 +66,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
unsigned int halt_poll_ns = 0;
static unsigned int halt_poll_ns;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
/*
* Ordering of locks:
*
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
DEFINE_SPINLOCK(kvm_lock);
@@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
static int kvm_usage_count = 0;
static int kvm_usage_count;
static atomic_t hardware_enable_failed;
struct kmem_cache *kvm_vcpu_cache;
@@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size)
return kzalloc(size, GFP_KERNEL);
}
void kvm_kvfree(const void *addr)
{
if (is_vmalloc_addr(addr))
vfree(addr);
else
kfree(addr);
}
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
{
if (!memslot->dirty_bitmap)
return;
kvm_kvfree(memslot->dirty_bitmap);
kvfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
}
@@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
* or moved, memslot will be created.
*
* validation of sp->gfn happens in:
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
* - kvm_is_visible_gfn (mmu_check_roots)
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
* - kvm_is_visible_gfn (mmu_check_roots)
*/
kvm_arch_flush_shadow_memslot(kvm, slot);
@@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
mask = xchg(&dirty_bitmap[i], 0);
dirty_bitmap_buffer[i] = mask;
offset = i * BITS_PER_LONG;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
mask);
if (mask) {
offset = i * BITS_PER_LONG;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
}
spin_unlock(&kvm->mmu_lock);
@@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
static int kvm_read_hva(void *data, void __user *hva, int len)
{
return __copy_from_user(data, hva, len);
}
static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
{
return __copy_from_user_inatomic(data, hva, len);
}
static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
{
@@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
return kvm_pfn_to_page(pfn);
}
EXPORT_SYMBOL_GPL(gfn_to_page);
void kvm_release_page_clean(struct page *page)
@@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn)
{
if (!kvm_is_reserved_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
SetPageDirty(page);
}
@@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = kvm_read_hva(data, (void __user *)addr + offset, len);
r = __copy_from_user(data, (void __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
@@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
pagefault_enable();
if (r)
return -EFAULT;
@@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
ghc->generation = slots->generation;
ghc->len = len;
ghc->memslot = gfn_to_memslot(kvm, start_gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
ghc->hva += offset;
} else {
/*
@@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
int offset = offset_in_page(gpa);
int ret;
while ((seg = next_segment(len, offset)) != 0) {
while ((seg = next_segment(len, offset)) != 0) {
ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
if (ret < 0)
return ret;
@@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
start = cur = ktime_get();
if (halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
@@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
* so vcpu_load() would break it.
*/
if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
#endif
@@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
/* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();
@@ -2205,7 +2191,7 @@ out_free1:
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &mp_state, sizeof mp_state))
if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
goto out;
r = 0;
break;
@@ -2214,7 +2200,7 @@ out_free1:
struct kvm_mp_state mp_state;
r = -EFAULT;
if (copy_from_user(&mp_state, argp, sizeof mp_state))
if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
goto out;
r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
break;
@@ -2223,13 +2209,13 @@ out_free1:
struct kvm_translation tr;
r = -EFAULT;
if (copy_from_user(&tr, argp, sizeof tr))
if (copy_from_user(&tr, argp, sizeof(tr)))
goto out;
r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &tr, sizeof tr))
if (copy_to_user(argp, &tr, sizeof(tr)))
goto out;
r = 0;
break;
@@ -2238,7 +2224,7 @@ out_free1:
struct kvm_guest_debug dbg;
r = -EFAULT;
if (copy_from_user(&dbg, argp, sizeof dbg))
if (copy_from_user(&dbg, argp, sizeof(dbg)))
goto out;
r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
break;
@@ -2252,14 +2238,14 @@ out_free1:
if (argp) {
r = -EFAULT;
if (copy_from_user(&kvm_sigmask, argp,
sizeof kvm_sigmask))
sizeof(kvm_sigmask)))
goto out;
r = -EINVAL;
if (kvm_sigmask.len != sizeof sigset)
if (kvm_sigmask.len != sizeof(sigset))
goto out;
r = -EFAULT;
if (copy_from_user(&sigset, sigmask_arg->sigset,
sizeof sigset))
sizeof(sigset)))
goto out;
p = &sigset;
}
@@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
if (argp) {
r = -EFAULT;
if (copy_from_user(&kvm_sigmask, argp,
sizeof kvm_sigmask))
sizeof(kvm_sigmask)))
goto out;
r = -EINVAL;
if (kvm_sigmask.len != sizeof csigset)
if (kvm_sigmask.len != sizeof(csigset))
goto out;
r = -EFAULT;
if (copy_from_user(&csigset, sigmask_arg->sigset,
sizeof csigset))
sizeof(csigset)))
goto out;
sigset_from_compat(&sigset, &csigset);
r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2525,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&kvm_userspace_mem, argp,
sizeof kvm_userspace_mem))
sizeof(kvm_userspace_mem)))
goto out;
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2535,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_dirty_log log;
r = -EFAULT;
if (copy_from_user(&log, argp, sizeof log))
if (copy_from_user(&log, argp, sizeof(log)))
goto out;
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
break;
@@ -2543,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp,
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
case KVM_REGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
if (copy_from_user(&zone, argp, sizeof(zone)))
goto out;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
break;
}
case KVM_UNREGISTER_COALESCED_MMIO: {
struct kvm_coalesced_mmio_zone zone;
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
if (copy_from_user(&zone, argp, sizeof(zone)))
goto out;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
break;
@@ -2562,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_irqfd data;
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof data))
if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_irqfd(kvm, &data);
break;
@@ -2571,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_ioeventfd data;
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof data))
if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_ioeventfd(kvm, &data);
break;
@@ -2592,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_msi msi;
r = -EFAULT;
if (copy_from_user(&msi, argp, sizeof msi))
if (copy_from_user(&msi, argp, sizeof(msi)))
goto out;
r = kvm_send_userspace_msi(kvm, &msi);
break;
@@ -2604,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp,
struct kvm_irq_level irq_event;
r = -EFAULT;
if (copy_from_user(&irq_event, argp, sizeof irq_event))
if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
goto out;
r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2614,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (ioctl == KVM_IRQ_LINE_STATUS) {
if (copy_to_user(argp, &irq_event, sizeof irq_event))
if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
goto out;
}
@@ -2647,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp,
goto out_free_irq_routing;
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
out_free_irq_routing:
out_free_irq_routing:
vfree(entries);
break;
}
@@ -2822,8 +2810,7 @@ static void hardware_enable_nolock(void *junk)
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
atomic_inc(&hardware_enable_failed);
printk(KERN_INFO "kvm: enabling virtualization on "
"CPU%d failed\n", cpu);
pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
}
}
@@ -2899,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
pr_info("kvm: disabling virtualization on CPU%d\n",
cpu);
hardware_disable();
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
pr_info("kvm: enabling virtualization on CPU%d\n",
cpu);
hardware_enable();
break;
@@ -2921,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
*
* And Intel TXT required VMX off for all cpu when system shutdown.
*/
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
pr_info("kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
on_each_cpu(hardware_disable_nolock, NULL, 1);
return NOTIFY_OK;
@@ -2945,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
}
static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
const struct kvm_io_range *r2)
const struct kvm_io_range *r2)
{
if (r1->addr < r2->addr)
return -1;
@@ -2998,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
return off;
}
static int __kvm_io_bus_write(struct kvm_io_bus *bus,
static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
struct kvm_io_range *range, const void *val)
{
int idx;
@@ -3009,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
while (idx < bus->dev_count &&
kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
range->len, val))
return idx;
idx++;
@@ -3019,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
}
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
struct kvm_io_bus *bus;
@@ -3031,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
r = __kvm_io_bus_write(bus, &range, val);
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val, long cookie)
int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
gpa_t addr, int len, const void *val, long cookie)
{
struct kvm_io_bus *bus;
struct kvm_io_range range;
@@ -3048,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
(kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
val))
return cookie;
@@ -3061,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
* cookie contained garbage; fall back to search and return the
* correct cookie value.
*/
return __kvm_io_bus_write(bus, &range, val);
return __kvm_io_bus_write(vcpu, bus, &range, val);
}
static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
void *val)
static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
struct kvm_io_range *range, void *val)
{
int idx;
@@ -3075,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
while (idx < bus->dev_count &&
kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
range->len, val))
return idx;
idx++;
@@ -3086,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
EXPORT_SYMBOL_GPL(kvm_io_bus_write);
/* kvm_io_bus_read - called under kvm->slots_lock */
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
struct kvm_io_bus *bus;
@@ -3098,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
r = __kvm_io_bus_read(bus, &range, val);
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3269,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
if (vcpu->preempted)
vcpu->preempted = false;
@@ -3350,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = misc_register(&kvm_dev);
if (r) {
printk(KERN_ERR "kvm: misc device register failed\n");
pr_err("kvm: misc device register failed\n");
goto out_unreg;
}
@@ -3361,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_init_debug();
if (r) {
printk(KERN_ERR "kvm: create debugfs files failed\n");
pr_err("kvm: create debugfs files failed\n");
goto out_undebugfs;
}