Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM update from Paolo Bonzini: "Fairly small update, but there are some interesting new features. Common: Optional support for adding a small amount of polling on each HLT instruction executed in the guest (or equivalent for other architectures). This can improve latency up to 50% on some scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests). This also has to be enabled manually for now, but the plan is to auto-tune this in the future. ARM/ARM64: The highlights are support for GICv3 emulation and dirty page tracking s390: Several optimizations and bugfixes. Also a first: a feature exposed by KVM (UUID and long guest name in /proc/sysinfo) before it is available in IBM's hypervisor! :) MIPS: Bugfixes. x86: Support for PML (page modification logging, a new feature in Broadwell Xeons that speeds up dirty page tracking), nested virtualization improvements (nested APICv---a nice optimization), usual round of emulation fixes. There is also a new option to reduce latency of the TSC deadline timer in the guest; this needs to be tuned manually. Some commits are common between this pull and Catalin's; I see you have already included his tree. Powerpc: Nothing yet. The KVM/PPC changes will come in through the PPC maintainers, because I haven't received them yet and I might end up being offline for some part of next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: ia64: drop kvm.h from installed user headers KVM: x86: fix build with !CONFIG_SMP KVM: x86: emulate: correct page fault error code for NoWrite instructions KVM: Disable compat ioctl for s390 KVM: s390: add cpu model support KVM: s390: use facilities and cpu_id per KVM KVM: s390/CPACF: Choose crypto control block format s390/kernel: Update /proc/sysinfo file with Extended Name and UUID KVM: s390: reenable LPP facility KVM: s390: floating irqs: fix user triggerable endless loop kvm: add halt_poll_ns module parameter kvm: remove KVM_MMIO_SIZE KVM: MIPS: Don't leak FPU/DSP to guest KVM: MIPS: Disable HTW while in guest KVM: nVMX: Enable nested posted interrupt processing KVM: nVMX: Enable nested virtual interrupt delivery KVM: nVMX: Enable nested apic register virtualization KVM: nVMX: Make nested control MSRs per-cpu KVM: nVMX: Enable nested virtualize x2apic mode KVM: nVMX: Prepare for using hardware MSR bitmap ...
This commit is contained in:
@@ -66,6 +66,9 @@
|
||||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
unsigned int halt_poll_ns = 0;
|
||||
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
|
||||
|
||||
/*
|
||||
* Ordering of locks:
|
||||
*
|
||||
@@ -89,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
|
||||
|
||||
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
|
||||
unsigned long arg);
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
|
||||
unsigned long arg);
|
||||
#endif
|
||||
@@ -176,6 +179,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
|
||||
return called;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
{
|
||||
long dirty_count = kvm->tlbs_dirty;
|
||||
@@ -186,6 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
|
||||
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
|
||||
#endif
|
||||
|
||||
void kvm_reload_remote_mmus(struct kvm *kvm)
|
||||
{
|
||||
@@ -673,6 +678,7 @@ static void update_memslots(struct kvm_memslots *slots,
|
||||
if (!new->npages) {
|
||||
WARN_ON(!mslots[i].npages);
|
||||
new->base_gfn = 0;
|
||||
new->flags = 0;
|
||||
if (mslots[i].npages)
|
||||
slots->used_slots--;
|
||||
} else {
|
||||
@@ -993,6 +999,86 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
/**
|
||||
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
|
||||
* are dirty write protect them for next write.
|
||||
* @kvm: pointer to kvm instance
|
||||
* @log: slot id and address to which we copy the log
|
||||
* @is_dirty: flag set if any page is dirty
|
||||
*
|
||||
* We need to keep it in mind that VCPU threads can write to the bitmap
|
||||
* concurrently. So, to avoid losing track of dirty pages we keep the
|
||||
* following order:
|
||||
*
|
||||
* 1. Take a snapshot of the bit and clear it if needed.
|
||||
* 2. Write protect the corresponding page.
|
||||
* 3. Copy the snapshot to the userspace.
|
||||
* 4. Upon return caller flushes TLB's if needed.
|
||||
*
|
||||
* Between 2 and 4, the guest may write to the page using the remaining TLB
|
||||
* entry. This is not a problem because the page is reported dirty using
|
||||
* the snapshot taken before and step 4 ensures that writes done after
|
||||
* exiting to userspace will be logged for the next call.
|
||||
*
|
||||
*/
|
||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, bool *is_dirty)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
int r, i;
|
||||
unsigned long n;
|
||||
unsigned long *dirty_bitmap;
|
||||
unsigned long *dirty_bitmap_buffer;
|
||||
|
||||
r = -EINVAL;
|
||||
if (log->slot >= KVM_USER_MEM_SLOTS)
|
||||
goto out;
|
||||
|
||||
memslot = id_to_memslot(kvm->memslots, log->slot);
|
||||
|
||||
dirty_bitmap = memslot->dirty_bitmap;
|
||||
r = -ENOENT;
|
||||
if (!dirty_bitmap)
|
||||
goto out;
|
||||
|
||||
n = kvm_dirty_bitmap_bytes(memslot);
|
||||
|
||||
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
|
||||
memset(dirty_bitmap_buffer, 0, n);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
*is_dirty = false;
|
||||
for (i = 0; i < n / sizeof(long); i++) {
|
||||
unsigned long mask;
|
||||
gfn_t offset;
|
||||
|
||||
if (!dirty_bitmap[i])
|
||||
continue;
|
||||
|
||||
*is_dirty = true;
|
||||
|
||||
mask = xchg(&dirty_bitmap[i], 0);
|
||||
dirty_bitmap_buffer[i] = mask;
|
||||
|
||||
offset = i * BITS_PER_LONG;
|
||||
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
|
||||
mask);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
||||
goto out;
|
||||
|
||||
r = 0;
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
|
||||
#endif
|
||||
|
||||
bool kvm_largepages_enabled(void)
|
||||
{
|
||||
return largepages_enabled;
|
||||
@@ -1551,6 +1637,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_write_guest);
|
||||
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len)
|
||||
@@ -1687,29 +1774,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mark_page_dirty);
|
||||
|
||||
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_UNHALT, vcpu);
|
||||
return -EINTR;
|
||||
}
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
return -EINTR;
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
|
||||
*/
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ktime_t start, cur;
|
||||
DEFINE_WAIT(wait);
|
||||
bool waited = false;
|
||||
|
||||
start = cur = ktime_get();
|
||||
if (halt_poll_ns) {
|
||||
ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
|
||||
do {
|
||||
/*
|
||||
* This sets KVM_REQ_UNHALT if an interrupt
|
||||
* arrives.
|
||||
*/
|
||||
if (kvm_vcpu_check_block(vcpu) < 0) {
|
||||
++vcpu->stat.halt_successful_poll;
|
||||
goto out;
|
||||
}
|
||||
cur = ktime_get();
|
||||
} while (single_task_running() && ktime_before(cur, stop));
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_make_request(KVM_REQ_UNHALT, vcpu);
|
||||
break;
|
||||
}
|
||||
if (kvm_cpu_has_pending_timer(vcpu))
|
||||
break;
|
||||
if (signal_pending(current))
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
break;
|
||||
|
||||
waited = true;
|
||||
schedule();
|
||||
}
|
||||
|
||||
finish_wait(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
out:
|
||||
trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
|
||||
|
||||
@@ -1892,7 +2010,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
|
||||
static struct file_operations kvm_vcpu_fops = {
|
||||
.release = kvm_vcpu_release,
|
||||
.unlocked_ioctl = kvm_vcpu_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_vcpu_compat_ioctl,
|
||||
#endif
|
||||
.mmap = kvm_vcpu_mmap,
|
||||
@@ -2182,7 +2300,7 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
static long kvm_vcpu_compat_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@@ -2274,7 +2392,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
|
||||
|
||||
static const struct file_operations kvm_device_fops = {
|
||||
.unlocked_ioctl = kvm_device_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_device_ioctl,
|
||||
#endif
|
||||
.release = kvm_device_release,
|
||||
@@ -2561,7 +2679,7 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
struct compat_kvm_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 padding1;
|
||||
@@ -2608,7 +2726,7 @@ out:
|
||||
static struct file_operations kvm_vm_fops = {
|
||||
.release = kvm_vm_release,
|
||||
.unlocked_ioctl = kvm_vm_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
#ifdef CONFIG_KVM_COMPAT
|
||||
.compat_ioctl = kvm_vm_compat_ioctl,
|
||||
#endif
|
||||
.llseek = noop_llseek,
|
||||
|
Reference in New Issue
Block a user