Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM update from Paolo Bonzini:
 "Fairly small update, but there are some interesting new features.

  Common:
     Optional support for adding a small amount of polling on each HLT
     instruction executed in the guest (or equivalent for other
     architectures).  This can improve latency up to 50% on some
     scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests).  This
     also has to be enabled manually for now, but the plan is to
     auto-tune this in the future.

  ARM/ARM64:
     The highlights are support for GICv3 emulation and dirty page
     tracking

  s390:
     Several optimizations and bugfixes.  Also a first: a feature
     exposed by KVM (UUID and long guest name in /proc/sysinfo) before
     it is available in IBM's hypervisor! :)

  MIPS:
     Bugfixes.

  x86:
     Support for PML (page modification logging, a new feature in
     Broadwell Xeons that speeds up dirty page tracking), nested
     virtualization improvements (nested APICv---a nice optimization),
     usual round of emulation fixes.

     There is also a new option to reduce latency of the TSC deadline
     timer in the guest; this needs to be tuned manually.

     Some commits are common between this pull and Catalin's; I see you
     have already included his tree.

  Powerpc:
     Nothing yet.

     The KVM/PPC changes will come in through the PPC maintainers,
     because I haven't received them yet and I might end up being
     offline for some part of next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits)
  KVM: ia64: drop kvm.h from installed user headers
  KVM: x86: fix build with !CONFIG_SMP
  KVM: x86: emulate: correct page fault error code for NoWrite instructions
  KVM: Disable compat ioctl for s390
  KVM: s390: add cpu model support
  KVM: s390: use facilities and cpu_id per KVM
  KVM: s390/CPACF: Choose crypto control block format
  s390/kernel: Update /proc/sysinfo file with Extended Name and UUID
  KVM: s390: reenable LPP facility
  KVM: s390: floating irqs: fix user triggerable endless loop
  kvm: add halt_poll_ns module parameter
  kvm: remove KVM_MMIO_SIZE
  KVM: MIPS: Don't leak FPU/DSP to guest
  KVM: MIPS: Disable HTW while in guest
  KVM: nVMX: Enable nested posted interrupt processing
  KVM: nVMX: Enable nested virtual interrupt delivery
  KVM: nVMX: Enable nested apic register virtualization
  KVM: nVMX: Make nested control MSRs per-cpu
  KVM: nVMX: Enable nested virtualize x2apic mode
  KVM: nVMX: Prepare for using hardware MSR bitmap
  ...
This commit is contained in:
Linus Torvalds
2015-02-13 09:55:09 -08:00
88 changed files with 6045 additions and 1645 deletions

View File

@@ -66,6 +66,9 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
unsigned int halt_poll_ns = 0;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
/*
* Ordering of locks:
*
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#endif
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
return called;
}
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
long dirty_count = kvm->tlbs_dirty;
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
void kvm_reload_remote_mmus(struct kvm *kvm)
{
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
if (!new->npages) {
WARN_ON(!mslots[i].npages);
new->base_gfn = 0;
new->flags = 0;
if (mslots[i].npages)
slots->used_slots--;
} else {
@@ -993,6 +999,86 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
/**
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
* are dirty write protect them for next write.
* @kvm: pointer to kvm instance
* @log: slot id and address to which we copy the log
* @is_dirty: flag set if any page is dirty
*
* We need to keep it in mind that VCPU threads can write to the bitmap
* concurrently. So, to avoid losing track of dirty pages we keep the
* following order:
*
* 1. Take a snapshot of the bit and clear it if needed.
* 2. Write protect the corresponding page.
* 3. Copy the snapshot to the userspace.
* 4. Upon return caller flushes TLB's if needed.
*
* Between 2 and 4, the guest may write to the page using the remaining TLB
* entry. This is not a problem because the page is reported dirty using
* the snapshot taken before and step 4 ensures that writes done after
* exiting to userspace will be logged for the next call.
*
*/
int kvm_get_dirty_log_protect(struct kvm *kvm,
struct kvm_dirty_log *log, bool *is_dirty)
{
struct kvm_memory_slot *memslot;
int r, i;
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
r = -EINVAL;
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
if (!dirty_bitmap)
goto out;
n = kvm_dirty_bitmap_bytes(memslot);
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
memset(dirty_bitmap_buffer, 0, n);
spin_lock(&kvm->mmu_lock);
*is_dirty = false;
for (i = 0; i < n / sizeof(long); i++) {
unsigned long mask;
gfn_t offset;
if (!dirty_bitmap[i])
continue;
*is_dirty = true;
mask = xchg(&dirty_bitmap[i], 0);
dirty_bitmap_buffer[i] = mask;
offset = i * BITS_PER_LONG;
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
mask);
}
spin_unlock(&kvm->mmu_lock);
r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
goto out;
r = 0;
out:
return r;
}
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
#endif
bool kvm_largepages_enabled(void)
{
return largepages_enabled;
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
}
return 0;
}
EXPORT_SYMBOL_GPL(kvm_write_guest);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len)
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(mark_page_dirty);
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
kvm_make_request(KVM_REQ_UNHALT, vcpu);
return -EINTR;
}
if (kvm_cpu_has_pending_timer(vcpu))
return -EINTR;
if (signal_pending(current))
return -EINTR;
return 0;
}
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
ktime_t start, cur;
DEFINE_WAIT(wait);
bool waited = false;
start = cur = ktime_get();
if (halt_poll_ns) {
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
* arrives.
*/
if (kvm_vcpu_check_block(vcpu) < 0) {
++vcpu->stat.halt_successful_poll;
goto out;
}
cur = ktime_get();
} while (single_task_running() && ktime_before(cur, stop));
}
for (;;) {
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
if (kvm_arch_vcpu_runnable(vcpu)) {
kvm_make_request(KVM_REQ_UNHALT, vcpu);
break;
}
if (kvm_cpu_has_pending_timer(vcpu))
break;
if (signal_pending(current))
if (kvm_vcpu_check_block(vcpu) < 0)
break;
waited = true;
schedule();
}
finish_wait(&vcpu->wq, &wait);
cur = ktime_get();
out:
trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
static struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
.compat_ioctl = kvm_vcpu_compat_ioctl,
#endif
.mmap = kvm_vcpu_mmap,
@@ -2182,7 +2300,7 @@ out:
return r;
}
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
static long kvm_vcpu_compat_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
static const struct file_operations kvm_device_fops = {
.unlocked_ioctl = kvm_device_ioctl,
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
.compat_ioctl = kvm_device_ioctl,
#endif
.release = kvm_device_release,
@@ -2561,7 +2679,7 @@ out:
return r;
}
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
struct compat_kvm_dirty_log {
__u32 slot;
__u32 padding1;
@@ -2608,7 +2726,7 @@ out:
static struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
#ifdef CONFIG_COMPAT
#ifdef CONFIG_KVM_COMPAT
.compat_ioctl = kvm_vm_compat_ioctl,
#endif
.llseek = noop_llseek,