Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity: "Highlights of the changes for this release include support for vfio level triggered interrupts, improved big real mode support on older Intels, a streamlines guest page table walker, guest APIC speedups, PIO optimizations, better overcommit handling, and read-only memory." * tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: s390: Fix vcpu_load handling in interrupt code KVM: x86: Fix guest debug across vcpu INIT reset KVM: Add resampling irqfds for level triggered interrupts KVM: optimize apic interrupt delivery KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() ... Conflicts: arch/s390/include/asm/processor.h arch/x86/kvm/i8259.c
This commit is contained in:
@@ -100,13 +100,7 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
|
||||
|
||||
static bool largepages_enabled = true;
|
||||
|
||||
static struct page *hwpoison_page;
|
||||
static pfn_t hwpoison_pfn;
|
||||
|
||||
struct page *fault_page;
|
||||
pfn_t fault_pfn;
|
||||
|
||||
inline int kvm_is_mmio_pfn(pfn_t pfn)
|
||||
bool kvm_is_mmio_pfn(pfn_t pfn)
|
||||
{
|
||||
if (pfn_valid(pfn)) {
|
||||
int reserved;
|
||||
@@ -137,11 +131,12 @@ inline int kvm_is_mmio_pfn(pfn_t pfn)
|
||||
/*
|
||||
* Switches to specified vcpu, until a matching vcpu_put()
|
||||
*/
|
||||
void vcpu_load(struct kvm_vcpu *vcpu)
|
||||
int vcpu_load(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
mutex_lock(&vcpu->mutex);
|
||||
if (mutex_lock_killable(&vcpu->mutex))
|
||||
return -EINTR;
|
||||
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
|
||||
/* The thread running this VCPU changed. */
|
||||
struct pid *oldpid = vcpu->pid;
|
||||
@@ -154,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
|
||||
preempt_notifier_register(&vcpu->preempt_notifier);
|
||||
kvm_arch_vcpu_load(vcpu, cpu);
|
||||
put_cpu();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vcpu_put(struct kvm_vcpu *vcpu)
|
||||
@@ -236,6 +232,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
||||
}
|
||||
vcpu->run = page_address(page);
|
||||
|
||||
kvm_vcpu_set_in_spin_loop(vcpu, false);
|
||||
kvm_vcpu_set_dy_eligible(vcpu, false);
|
||||
|
||||
r = kvm_arch_vcpu_init(vcpu);
|
||||
if (r < 0)
|
||||
goto fail_free_run;
|
||||
@@ -332,8 +331,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||
* count is also read inside the mmu_lock critical section.
|
||||
*/
|
||||
kvm->mmu_notifier_count++;
|
||||
for (; start < end; start += PAGE_SIZE)
|
||||
need_tlb_flush |= kvm_unmap_hva(kvm, start);
|
||||
need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
|
||||
need_tlb_flush |= kvm->tlbs_dirty;
|
||||
/* we've to flush the tlb before the pages can be freed */
|
||||
if (need_tlb_flush)
|
||||
@@ -412,7 +410,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
|
||||
int idx;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_arch_flush_shadow(kvm);
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
}
|
||||
|
||||
@@ -551,16 +549,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
|
||||
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
|
||||
struct kvm_memory_slot *dont)
|
||||
{
|
||||
if (!dont || free->rmap != dont->rmap)
|
||||
vfree(free->rmap);
|
||||
|
||||
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
|
||||
kvm_destroy_dirty_bitmap(free);
|
||||
|
||||
kvm_arch_free_memslot(free, dont);
|
||||
|
||||
free->npages = 0;
|
||||
free->rmap = NULL;
|
||||
}
|
||||
|
||||
void kvm_free_physmem(struct kvm *kvm)
|
||||
@@ -590,7 +584,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
|
||||
#else
|
||||
kvm_arch_flush_shadow(kvm);
|
||||
kvm_arch_flush_shadow_all(kvm);
|
||||
#endif
|
||||
kvm_arch_destroy_vm(kvm);
|
||||
kvm_free_physmem(kvm);
|
||||
@@ -686,6 +680,20 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
|
||||
slots->generation++;
|
||||
}
|
||||
|
||||
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
|
||||
{
|
||||
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
|
||||
|
||||
#ifdef KVM_CAP_READONLY_MEM
|
||||
valid_flags |= KVM_MEM_READONLY;
|
||||
#endif
|
||||
|
||||
if (mem->flags & ~valid_flags)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate some memory and give it an address in the guest physical address
|
||||
* space.
|
||||
@@ -706,6 +714,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
struct kvm_memory_slot old, new;
|
||||
struct kvm_memslots *slots, *old_memslots;
|
||||
|
||||
r = check_memory_region_flags(mem);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = -EINVAL;
|
||||
/* General sanity checks */
|
||||
if (mem->memory_size & (PAGE_SIZE - 1))
|
||||
@@ -769,11 +781,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
if (npages && !old.npages) {
|
||||
new.user_alloc = user_alloc;
|
||||
new.userspace_addr = mem->userspace_addr;
|
||||
#ifndef CONFIG_S390
|
||||
new.rmap = vzalloc(npages * sizeof(*new.rmap));
|
||||
if (!new.rmap)
|
||||
goto out_free;
|
||||
#endif /* not defined CONFIG_S390 */
|
||||
|
||||
if (kvm_arch_create_memslot(&new, npages))
|
||||
goto out_free;
|
||||
}
|
||||
@@ -785,7 +793,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
/* destroy any largepage mappings for dirty tracking */
|
||||
}
|
||||
|
||||
if (!npages) {
|
||||
if (!npages || base_gfn != old.base_gfn) {
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
r = -ENOMEM;
|
||||
@@ -801,14 +809,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
old_memslots = kvm->memslots;
|
||||
rcu_assign_pointer(kvm->memslots, slots);
|
||||
synchronize_srcu_expedited(&kvm->srcu);
|
||||
/* From this point no new shadow pages pointing to a deleted
|
||||
* memslot will be created.
|
||||
/* From this point no new shadow pages pointing to a deleted,
|
||||
* or moved, memslot will be created.
|
||||
*
|
||||
* validation of sp->gfn happens in:
|
||||
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
|
||||
* - kvm_is_visible_gfn (mmu_check_roots)
|
||||
*/
|
||||
kvm_arch_flush_shadow(kvm);
|
||||
kvm_arch_flush_shadow_memslot(kvm, slot);
|
||||
kfree(old_memslots);
|
||||
}
|
||||
|
||||
@@ -832,7 +840,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
|
||||
/* actual memory is freed via old in kvm_free_physmem_slot below */
|
||||
if (!npages) {
|
||||
new.rmap = NULL;
|
||||
new.dirty_bitmap = NULL;
|
||||
memset(&new.arch, 0, sizeof(new.arch));
|
||||
}
|
||||
@@ -844,13 +851,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
|
||||
|
||||
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
|
||||
|
||||
/*
|
||||
* If the new memory slot is created, we need to clear all
|
||||
* mmio sptes.
|
||||
*/
|
||||
if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
|
||||
kvm_arch_flush_shadow(kvm);
|
||||
|
||||
kvm_free_physmem_slot(&old, &new);
|
||||
kfree(old_memslots);
|
||||
|
||||
@@ -932,53 +932,6 @@ void kvm_disable_largepages(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_disable_largepages);
|
||||
|
||||
int is_error_page(struct page *page)
|
||||
{
|
||||
return page == bad_page || page == hwpoison_page || page == fault_page;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_error_page);
|
||||
|
||||
int is_error_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_error_pfn);
|
||||
|
||||
int is_hwpoison_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == hwpoison_pfn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
|
||||
|
||||
int is_fault_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == fault_pfn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_fault_pfn);
|
||||
|
||||
int is_noslot_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == bad_pfn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_noslot_pfn);
|
||||
|
||||
int is_invalid_pfn(pfn_t pfn)
|
||||
{
|
||||
return pfn == hwpoison_pfn || pfn == fault_pfn;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_invalid_pfn);
|
||||
|
||||
static inline unsigned long bad_hva(void)
|
||||
{
|
||||
return PAGE_OFFSET;
|
||||
}
|
||||
|
||||
int kvm_is_error_hva(unsigned long addr)
|
||||
{
|
||||
return addr == bad_hva();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
|
||||
|
||||
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
|
||||
@@ -1021,28 +974,62 @@ out:
|
||||
return size;
|
||||
}
|
||||
|
||||
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
gfn_t *nr_pages)
|
||||
static bool memslot_is_readonly(struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot->flags & KVM_MEM_READONLY;
|
||||
}
|
||||
|
||||
static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
gfn_t *nr_pages, bool write)
|
||||
{
|
||||
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
|
||||
return bad_hva();
|
||||
return KVM_HVA_ERR_BAD;
|
||||
|
||||
if (memslot_is_readonly(slot) && write)
|
||||
return KVM_HVA_ERR_RO_BAD;
|
||||
|
||||
if (nr_pages)
|
||||
*nr_pages = slot->npages - (gfn - slot->base_gfn);
|
||||
|
||||
return gfn_to_hva_memslot(slot, gfn);
|
||||
return __gfn_to_hva_memslot(slot, gfn);
|
||||
}
|
||||
|
||||
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
gfn_t *nr_pages)
|
||||
{
|
||||
return __gfn_to_hva_many(slot, gfn, nr_pages, true);
|
||||
}
|
||||
|
||||
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return gfn_to_hva_many(slot, gfn, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
|
||||
|
||||
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||
|
||||
static pfn_t get_fault_pfn(void)
|
||||
/*
|
||||
* The hva returned by this function is only allowed to be read.
|
||||
* It should pair with kvm_read_hva() or kvm_read_hva_atomic().
|
||||
*/
|
||||
static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
get_page(fault_page);
|
||||
return fault_pfn;
|
||||
return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
|
||||
}
|
||||
|
||||
static int kvm_read_hva(void *data, void __user *hva, int len)
|
||||
{
|
||||
return __copy_from_user(data, hva, len);
|
||||
}
|
||||
|
||||
static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
|
||||
{
|
||||
return __copy_from_user_inatomic(data, hva, len);
|
||||
}
|
||||
|
||||
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
||||
@@ -1065,108 +1052,186 @@ static inline int check_user_page_hwpoison(unsigned long addr)
|
||||
return rc == -EHWPOISON;
|
||||
}
|
||||
|
||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable)
|
||||
/*
|
||||
* The atomic path to get the writable pfn which will be stored in @pfn,
|
||||
* true indicates success, otherwise false is returned.
|
||||
*/
|
||||
static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable, pfn_t *pfn)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages;
|
||||
|
||||
if (!(async || atomic))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Fast pin a writable pfn only if it is a write fault request
|
||||
* or the caller allows to map a writable pfn for a read fault
|
||||
* request.
|
||||
*/
|
||||
if (!(write_fault || writable))
|
||||
return false;
|
||||
|
||||
npages = __get_user_pages_fast(addr, 1, 1, page);
|
||||
if (npages == 1) {
|
||||
*pfn = page_to_pfn(page[0]);
|
||||
|
||||
if (writable)
|
||||
*writable = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* The slow path to get the pfn of the specified host virtual address,
|
||||
* 1 indicates success, -errno is returned if error is detected.
|
||||
*/
|
||||
static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
|
||||
bool *writable, pfn_t *pfn)
|
||||
{
|
||||
struct page *page[1];
|
||||
int npages = 0;
|
||||
pfn_t pfn;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (writable)
|
||||
*writable = write_fault;
|
||||
|
||||
if (async) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
npages = get_user_page_nowait(current, current->mm,
|
||||
addr, write_fault, page);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
} else
|
||||
npages = get_user_pages_fast(addr, 1, write_fault,
|
||||
page);
|
||||
if (npages != 1)
|
||||
return npages;
|
||||
|
||||
/* map read fault as writable if possible */
|
||||
if (unlikely(!write_fault) && writable) {
|
||||
struct page *wpage[1];
|
||||
|
||||
npages = __get_user_pages_fast(addr, 1, 1, wpage);
|
||||
if (npages == 1) {
|
||||
*writable = true;
|
||||
put_page(page[0]);
|
||||
page[0] = wpage[0];
|
||||
}
|
||||
|
||||
npages = 1;
|
||||
}
|
||||
*pfn = page_to_pfn(page[0]);
|
||||
return npages;
|
||||
}
|
||||
|
||||
static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
|
||||
{
|
||||
if (unlikely(!(vma->vm_flags & VM_READ)))
|
||||
return false;
|
||||
|
||||
if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pin guest page in memory and return its pfn.
|
||||
* @addr: host virtual address which maps memory to the guest
|
||||
* @atomic: whether this function can sleep
|
||||
* @async: whether this function need to wait IO complete if the
|
||||
* host page is not in the memory
|
||||
* @write_fault: whether we should get a writable host page
|
||||
* @writable: whether it allows to map a writable host page for !@write_fault
|
||||
*
|
||||
* The function will map a writable host page for these two cases:
|
||||
* 1): @write_fault = true
|
||||
* 2): @write_fault = false && @writable, @writable will tell the caller
|
||||
* whether the mapping is writable.
|
||||
*/
|
||||
static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
pfn_t pfn = 0;
|
||||
int npages;
|
||||
|
||||
/* we can do it either atomically or asynchronously, not both */
|
||||
BUG_ON(atomic && async);
|
||||
|
||||
BUG_ON(!write_fault && !writable);
|
||||
if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
|
||||
return pfn;
|
||||
|
||||
if (writable)
|
||||
*writable = true;
|
||||
if (atomic)
|
||||
return KVM_PFN_ERR_FAULT;
|
||||
|
||||
if (atomic || async)
|
||||
npages = __get_user_pages_fast(addr, 1, 1, page);
|
||||
npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
|
||||
if (npages == 1)
|
||||
return pfn;
|
||||
|
||||
if (unlikely(npages != 1) && !atomic) {
|
||||
might_sleep();
|
||||
|
||||
if (writable)
|
||||
*writable = write_fault;
|
||||
|
||||
if (async) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
npages = get_user_page_nowait(current, current->mm,
|
||||
addr, write_fault, page);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
} else
|
||||
npages = get_user_pages_fast(addr, 1, write_fault,
|
||||
page);
|
||||
|
||||
/* map read fault as writable if possible */
|
||||
if (unlikely(!write_fault) && npages == 1) {
|
||||
struct page *wpage[1];
|
||||
|
||||
npages = __get_user_pages_fast(addr, 1, 1, wpage);
|
||||
if (npages == 1) {
|
||||
*writable = true;
|
||||
put_page(page[0]);
|
||||
page[0] = wpage[0];
|
||||
}
|
||||
npages = 1;
|
||||
}
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
if (npages == -EHWPOISON ||
|
||||
(!async && check_user_page_hwpoison(addr))) {
|
||||
pfn = KVM_PFN_ERR_HWPOISON;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (unlikely(npages != 1)) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (atomic)
|
||||
return get_fault_pfn();
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
if (npages == -EHWPOISON ||
|
||||
(!async && check_user_page_hwpoison(addr))) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
get_page(hwpoison_page);
|
||||
return page_to_pfn(hwpoison_page);
|
||||
}
|
||||
|
||||
vma = find_vma_intersection(current->mm, addr, addr+1);
|
||||
|
||||
if (vma == NULL)
|
||||
pfn = get_fault_pfn();
|
||||
else if ((vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
||||
vma->vm_pgoff;
|
||||
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
||||
} else {
|
||||
if (async && (vma->vm_flags & VM_WRITE))
|
||||
*async = true;
|
||||
pfn = get_fault_pfn();
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
} else
|
||||
pfn = page_to_pfn(page[0]);
|
||||
vma = find_vma_intersection(current->mm, addr, addr + 1);
|
||||
|
||||
if (vma == NULL)
|
||||
pfn = KVM_PFN_ERR_FAULT;
|
||||
else if ((vma->vm_flags & VM_PFNMAP)) {
|
||||
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
||||
vma->vm_pgoff;
|
||||
BUG_ON(!kvm_is_mmio_pfn(pfn));
|
||||
} else {
|
||||
if (async && vma_is_valid(vma, write_fault))
|
||||
*async = true;
|
||||
pfn = KVM_PFN_ERR_FAULT;
|
||||
}
|
||||
exit:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return pfn;
|
||||
}
|
||||
|
||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
|
||||
static pfn_t
|
||||
__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
|
||||
bool *async, bool write_fault, bool *writable)
|
||||
{
|
||||
return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
|
||||
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
|
||||
|
||||
if (addr == KVM_HVA_ERR_RO_BAD)
|
||||
return KVM_PFN_ERR_RO_FAULT;
|
||||
|
||||
if (kvm_is_error_hva(addr))
|
||||
return KVM_PFN_ERR_BAD;
|
||||
|
||||
/* Do not map writable pfn in the readonly memslot. */
|
||||
if (writable && memslot_is_readonly(slot)) {
|
||||
*writable = false;
|
||||
writable = NULL;
|
||||
}
|
||||
|
||||
return hva_to_pfn(addr, atomic, async, write_fault,
|
||||
writable);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
||||
|
||||
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
||||
bool write_fault, bool *writable)
|
||||
{
|
||||
unsigned long addr;
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
if (async)
|
||||
*async = false;
|
||||
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
if (kvm_is_error_hva(addr)) {
|
||||
get_page(bad_page);
|
||||
return page_to_pfn(bad_page);
|
||||
}
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
|
||||
return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
|
||||
writable);
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||
@@ -1195,13 +1260,17 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
||||
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
||||
return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
|
||||
return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
|
||||
}
|
||||
|
||||
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
|
||||
|
||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
int nr_pages)
|
||||
{
|
||||
@@ -1219,30 +1288,42 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
|
||||
|
||||
static struct page *kvm_pfn_to_page(pfn_t pfn)
|
||||
{
|
||||
if (is_error_pfn(pfn))
|
||||
return KVM_ERR_PTR_BAD_PAGE;
|
||||
|
||||
if (kvm_is_mmio_pfn(pfn)) {
|
||||
WARN_ON(1);
|
||||
return KVM_ERR_PTR_BAD_PAGE;
|
||||
}
|
||||
|
||||
return pfn_to_page(pfn);
|
||||
}
|
||||
|
||||
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
pfn_t pfn;
|
||||
|
||||
pfn = gfn_to_pfn(kvm, gfn);
|
||||
if (!kvm_is_mmio_pfn(pfn))
|
||||
return pfn_to_page(pfn);
|
||||
|
||||
WARN_ON(kvm_is_mmio_pfn(pfn));
|
||||
|
||||
get_page(bad_page);
|
||||
return bad_page;
|
||||
return kvm_pfn_to_page(pfn);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(gfn_to_page);
|
||||
|
||||
void kvm_release_page_clean(struct page *page)
|
||||
{
|
||||
WARN_ON(is_error_page(page));
|
||||
|
||||
kvm_release_pfn_clean(page_to_pfn(page));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_page_clean);
|
||||
|
||||
void kvm_release_pfn_clean(pfn_t pfn)
|
||||
{
|
||||
WARN_ON(is_error_pfn(pfn));
|
||||
|
||||
if (!kvm_is_mmio_pfn(pfn))
|
||||
put_page(pfn_to_page(pfn));
|
||||
}
|
||||
@@ -1250,6 +1331,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
|
||||
|
||||
void kvm_release_page_dirty(struct page *page)
|
||||
{
|
||||
WARN_ON(is_error_page(page));
|
||||
|
||||
kvm_release_pfn_dirty(page_to_pfn(page));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
|
||||
@@ -1305,10 +1388,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||
int r;
|
||||
unsigned long addr;
|
||||
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
addr = gfn_to_hva_read(kvm, gfn);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -EFAULT;
|
||||
r = __copy_from_user(data, (void __user *)addr + offset, len);
|
||||
r = kvm_read_hva(data, (void __user *)addr + offset, len);
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
@@ -1343,11 +1426,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
int offset = offset_in_page(gpa);
|
||||
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
addr = gfn_to_hva_read(kvm, gfn);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return -EFAULT;
|
||||
pagefault_disable();
|
||||
r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
|
||||
r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
|
||||
pagefault_enable();
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
@@ -1580,6 +1663,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
/*
|
||||
* Helper that checks whether a VCPU is eligible for directed yield.
|
||||
* Most eligible candidate to yield is decided by following heuristics:
|
||||
*
|
||||
* (a) VCPU which has not done pl-exit or cpu relax intercepted recently
|
||||
* (preempted lock holder), indicated by @in_spin_loop.
|
||||
* Set at the beiginning and cleared at the end of interception/PLE handler.
|
||||
*
|
||||
* (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
|
||||
* chance last time (mostly it has become eligible now since we have probably
|
||||
* yielded to lockholder in last iteration. This is done by toggling
|
||||
* @dy_eligible each time a VCPU checked for eligibility.)
|
||||
*
|
||||
* Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
|
||||
* to preempted lock-holder could result in wrong VCPU selection and CPU
|
||||
* burning. Giving priority for a potential lock-holder increases lock
|
||||
* progress.
|
||||
*
|
||||
* Since algorithm is based on heuristics, accessing another VCPU data without
|
||||
* locking does not harm. It may result in trying to yield to same VCPU, fail
|
||||
* and continue with next VCPU and so on.
|
||||
*/
|
||||
bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool eligible;
|
||||
|
||||
eligible = !vcpu->spin_loop.in_spin_loop ||
|
||||
(vcpu->spin_loop.in_spin_loop &&
|
||||
vcpu->spin_loop.dy_eligible);
|
||||
|
||||
if (vcpu->spin_loop.in_spin_loop)
|
||||
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
|
||||
|
||||
return eligible;
|
||||
}
|
||||
#endif
|
||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
{
|
||||
struct kvm *kvm = me->kvm;
|
||||
@@ -1589,6 +1709,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
int pass;
|
||||
int i;
|
||||
|
||||
kvm_vcpu_set_in_spin_loop(me, true);
|
||||
/*
|
||||
* We boost the priority of a VCPU that is runnable but not
|
||||
* currently running, because it got preempted by something
|
||||
@@ -1607,6 +1728,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
continue;
|
||||
if (waitqueue_active(&vcpu->wq))
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
||||
if (kvm_vcpu_yield_to(vcpu)) {
|
||||
kvm->last_boosted_vcpu = i;
|
||||
yielded = 1;
|
||||
@@ -1614,6 +1737,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||
}
|
||||
}
|
||||
}
|
||||
kvm_vcpu_set_in_spin_loop(me, false);
|
||||
|
||||
/* Ensure vcpu is not eligible during next spinloop */
|
||||
kvm_vcpu_set_dy_eligible(me, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
|
||||
|
||||
@@ -1766,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
|
||||
#endif
|
||||
|
||||
|
||||
vcpu_load(vcpu);
|
||||
r = vcpu_load(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
switch (ioctl) {
|
||||
case KVM_RUN:
|
||||
r = -EINVAL;
|
||||
@@ -2093,6 +2222,29 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||
r = kvm_send_userspace_msi(kvm, &msi);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef __KVM_HAVE_IRQ_LINE
|
||||
case KVM_IRQ_LINE_STATUS:
|
||||
case KVM_IRQ_LINE: {
|
||||
struct kvm_irq_level irq_event;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&irq_event, argp, sizeof irq_event))
|
||||
goto out;
|
||||
|
||||
r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = -EFAULT;
|
||||
if (ioctl == KVM_IRQ_LINE_STATUS) {
|
||||
if (copy_to_user(argp, &irq_event, sizeof irq_event))
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
|
||||
@@ -2698,9 +2850,6 @@ static struct syscore_ops kvm_syscore_ops = {
|
||||
.resume = kvm_resume,
|
||||
};
|
||||
|
||||
struct page *bad_page;
|
||||
pfn_t bad_pfn;
|
||||
|
||||
static inline
|
||||
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
|
||||
{
|
||||
@@ -2732,33 +2881,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
|
||||
if (r)
|
||||
goto out_fail;
|
||||
|
||||
bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
|
||||
if (bad_page == NULL) {
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bad_pfn = page_to_pfn(bad_page);
|
||||
|
||||
hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
|
||||
if (hwpoison_page == NULL) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_0;
|
||||
}
|
||||
|
||||
hwpoison_pfn = page_to_pfn(hwpoison_page);
|
||||
|
||||
fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
|
||||
if (fault_page == NULL) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_0;
|
||||
}
|
||||
|
||||
fault_pfn = page_to_pfn(fault_page);
|
||||
|
||||
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_0;
|
||||
@@ -2833,12 +2955,6 @@ out_free_1:
|
||||
out_free_0a:
|
||||
free_cpumask_var(cpus_hardware_enabled);
|
||||
out_free_0:
|
||||
if (fault_page)
|
||||
__free_page(fault_page);
|
||||
if (hwpoison_page)
|
||||
__free_page(hwpoison_page);
|
||||
__free_page(bad_page);
|
||||
out:
|
||||
kvm_arch_exit();
|
||||
out_fail:
|
||||
return r;
|
||||
@@ -2858,8 +2974,5 @@ void kvm_exit(void)
|
||||
kvm_arch_hardware_unsetup();
|
||||
kvm_arch_exit();
|
||||
free_cpumask_var(cpus_hardware_enabled);
|
||||
__free_page(fault_page);
|
||||
__free_page(hwpoison_page);
|
||||
__free_page(bad_page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||
|
Reference in New Issue
Block a user