Merge tag 'kvm-arm-for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into 'kvm-next'

KVM/ARM changes for v4.1:

- fixes for live migration
- irqfd support
- kvm-io-bus & vgic rework to enable ioeventfd
- page ageing for stage-2 translation
- various cleanups
This commit is contained in:
Paolo Bonzini
2015-04-07 18:09:20 +02:00
47 changed files with 1038 additions and 729 deletions

View File

@@ -18,6 +18,7 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on MMU && OF
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@ config KVM
select KVM_ARM_HOST
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU
depends on ARM_VIRT_EXT && ARM_LPAE
select MMU_NOTIFIER
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines. You will also
need to select one or more of the processor modules below.
Support hosting virtualized guest machines.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
@@ -37,10 +40,7 @@ config KVM
If unsure, say N.
config KVM_ARM_HOST
bool "KVM host support for ARM cpus."
depends on KVM
depends on MMU
select MMU_NOTIFIER
bool
---help---
Provides host support for ARM processors.
@@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
large, so only choose a reasonable number that you expect to
actually use.
config KVM_ARM_VGIC
bool "KVM support for Virtual GIC"
depends on KVM_ARM_HOST && OF
select HAVE_KVM_IRQCHIP
default y
---help---
Adds support for a hardware assisted, in-kernel GIC emulation.
config KVM_ARM_TIMER
bool "KVM support for Architected Timers"
depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
select HAVE_KVM_IRQCHIP
default y
---help---
Adds support for the Architected Timers in virtual machines
endif # VIRTUALIZATION

View File

@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
plus_virt_def := -DREQUIRES_VIRT=1
endif
ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
ccflags-y += -Iarch/arm/kvm
CFLAGS_arm.o := -I. $(plus_virt_def)
CFLAGS_mmu.o := -I.
@@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
obj-y += $(KVM)/arm/vgic.o
obj-y += $(KVM)/arm/vgic-v2.o
obj-y += $(KVM)/arm/vgic-v2-emul.o
obj-y += $(KVM)/arm/arch_timer.o

View File

@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u8 kvm_next_vmid;
static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
return 0;
return kvm_timer_should_fire(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL;
if (vcpu->arch.pause)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL;
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.pause = false;
break;
case KVM_MP_STATE_STOPPED:
vcpu->arch.pause = true;
break;
default:
return -EINVAL;
}
return 0;
}
/**
@@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return 0;
}
bool kvm_arch_intc_initialized(struct kvm *kvm)
{
return vgic_initialized(kvm);
}
static void vcpu_pause(struct kvm_vcpu *vcpu)
{
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
switch (dev_id) {
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
@@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (vgic_present)
return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
else
return -ENXIO;
return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
}
case KVM_ARM_SET_DEVICE_ADDR: {
struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@ static int init_hyp_mode(void)
if (err)
goto out_free_context;
#ifdef CONFIG_KVM_ARM_VGIC
vgic_present = true;
#endif
/*
* Init HYP architected timer support
*/

View File

@@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
#ifndef CONFIG_KVM_ARM_TIMER
#define NUM_TIMER_REGS 0
static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
return 0;
}
static bool is_timer_reg(u64 index)
{
return false;
}
#else
#define NUM_TIMER_REGS 3
static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return 0;
}
#endif
static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(long)reg->addr;

View File

@@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0
* Assumes vcpu pointer in vcpu reg
*/
.macro save_vgic_state
#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 )
subs r4, r4, #1
bne 1b
2:
#endif
.endm
/*
@@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 )
* Assumes vcpu pointer in vcpu reg
*/
.macro restore_vgic_state
#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 )
subs r4, r4, #1
bne 1b
2:
#endif
.endm
#define CNTHCTL_PL1PCTEN (1 << 0)
@@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 )
* Clobbers r2-r5
*/
.macro save_timer_state
#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 )
mcrr p15, 4, r2, r2, c14 @ CNTVOFF
1:
#endif
@ Allow physical timer/counter access for the host
mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 )
bic r2, r2, #CNTHCTL_PL1PCEN
mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 )
and r2, r2, #3
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
1:
#endif
.endm
.equ vmentry, 0

View File

@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_exit_mmio *mmio)
static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
{
unsigned long rt;
int len;
bool is_write, sign_extend;
int access_size;
bool sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
/* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return 1;
}
len = kvm_vcpu_dabt_get_as(vcpu);
if (unlikely(len < 0))
return len;
access_size = kvm_vcpu_dabt_get_as(vcpu);
if (unlikely(access_size < 0))
return access_size;
is_write = kvm_vcpu_dabt_iswrite(vcpu);
*is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);
mmio->is_write = is_write;
mmio->phys_addr = fault_ipa;
mmio->len = len;
*len = access_size;
vcpu->arch.mmio_decode.sign_extend = sign_extend;
vcpu->arch.mmio_decode.rt = rt;
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
struct kvm_exit_mmio mmio;
unsigned long data;
unsigned long rt;
int ret;
bool is_write;
int len;
u8 data_buf[8];
/*
* Prepare MMIO operation. First stash it in a private
* structure that we can use for in-kernel emulation. If the
* kernel can't handle it, copy it into run->mmio and let user
* space do its magic.
* Prepare MMIO operation. First decode the syndrome data we get
* from the CPU. Then try if some in-kernel emulation feels
* responsible, otherwise let user space do its magic.
*/
if (kvm_vcpu_dabt_isvalid(vcpu)) {
ret = decode_hsr(vcpu, fault_ipa, &mmio);
ret = decode_hsr(vcpu, &is_write, &len);
if (ret)
return ret;
} else {
@@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
rt = vcpu->arch.mmio_decode.rt;
if (mmio.is_write) {
data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
mmio.len);
if (is_write) {
data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
fault_ipa, data);
mmio_write_buf(mmio.data, mmio.len, data);
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
mmio_write_buf(data_buf, len, data);
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
} else {
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
fault_ipa, 0);
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
}
if (vgic_handle_mmio(vcpu, run, &mmio))
return 1;
/* Now prepare kvm_run for the potential return to userland. */
run->mmio.is_write = is_write;
run->mmio.phys_addr = fault_ipa;
run->mmio.len = len;
memcpy(run->mmio.data, data_buf, len);
kvm_prepare_mmio(run, &mmio);
if (!ret) {
/* We handled the access successfully in the kernel. */
kvm_handle_mmio_return(vcpu, run);
return 1;
}
run->exit_reason = KVM_EXIT_MMIO;
return 0;
}

View File

@@ -1330,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
out_unlock:
spin_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret;
}
/*
* Resolve the access fault by making the page young again.
* Note that because the faulting entry is guaranteed not to be
* cached in the TLB, we don't need to invalidate anything.
*/
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
pmd_t *pmd;
pte_t *pte;
pfn_t pfn;
bool pfn_valid = false;
trace_kvm_access_fault(fault_ipa);
spin_lock(&vcpu->kvm->mmu_lock);
pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
if (!pmd || pmd_none(*pmd)) /* Nothing there */
goto out;
if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
*pmd = pmd_mkyoung(*pmd);
pfn = pmd_pfn(*pmd);
pfn_valid = true;
goto out;
}
pte = pte_offset_kernel(pmd, fault_ipa);
if (pte_none(*pte)) /* Nothing there either */
goto out;
*pte = pte_mkyoung(*pte); /* Just a page... */
pfn = pte_pfn(*pte);
pfn_valid = true;
out:
spin_unlock(&vcpu->kvm->mmu_lock);
if (pfn_valid)
kvm_set_pfn_accessed(pfn);
}
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
@@ -1364,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Check the stage-2 fault is trans. fault or write fault */
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
fault_status != FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1400,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
if (fault_status == FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;
}
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
@@ -1408,15 +1456,16 @@ out_unlock:
return ret;
}
static void handle_hva_to_gpa(struct kvm *kvm,
unsigned long start,
unsigned long end,
void (*handler)(struct kvm *kvm,
gpa_t gpa, void *data),
void *data)
static int handle_hva_to_gpa(struct kvm *kvm,
unsigned long start,
unsigned long end,
int (*handler)(struct kvm *kvm,
gpa_t gpa, void *data),
void *data)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int ret = 0;
slots = kvm_memslots(kvm);
@@ -1440,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
for (; gfn < gfn_end; ++gfn) {
gpa_t gpa = gfn << PAGE_SHIFT;
handler(kvm, gpa, data);
ret |= handler(kvm, gpa, data);
}
}
return ret;
}
static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
unmap_stage2_range(kvm, gpa, PAGE_SIZE);
return 0;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1473,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
}
static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pte_t *pte = (pte_t *)data;
@@ -1485,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
* through this calling path.
*/
stage2_set_pte(kvm, NULL, gpa, pte, 0);
return 0;
}
@@ -1501,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
}
static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pmd_t *pmd;
pte_t *pte;
pmd = stage2_get_pmd(kvm, NULL, gpa);
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
if (pmd_young(*pmd)) {
*pmd = pmd_mkold(*pmd);
return 1;
}
return 0;
}
pte = pte_offset_kernel(pmd, gpa);
if (pte_none(*pte))
return 0;
if (pte_young(*pte)) {
*pte = pte_mkold(*pte); /* Just a page... */
return 1;
}
return 0;
}
static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pmd_t *pmd;
pte_t *pte;
pmd = stage2_get_pmd(kvm, NULL, gpa);
if (!pmd || pmd_none(*pmd)) /* Nothing there */
return 0;
if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */
return pmd_young(*pmd);
pte = pte_offset_kernel(pmd, gpa);
if (!pte_none(*pte)) /* Just a page... */
return pte_young(*pte);
return 0;
}
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
trace_kvm_age_hva(start, end);
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
trace_kvm_test_age_hva(hva);
return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
}
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);

View File

@@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
__entry->hxfar, __entry->vcpu_pc)
);
TRACE_EVENT(kvm_access_fault,
TP_PROTO(unsigned long ipa),
TP_ARGS(ipa),
TP_STRUCT__entry(
__field( unsigned long, ipa )
),
TP_fast_assign(
__entry->ipa = ipa;
),
TP_printk("IPA: %lx", __entry->ipa)
);
TRACE_EVENT(kvm_irq_line,
TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
);
TRACE_EVENT(kvm_age_hva,
TP_PROTO(unsigned long start, unsigned long end),
TP_ARGS(start, end),
TP_STRUCT__entry(
__field( unsigned long, start )
__field( unsigned long, end )
),
TP_fast_assign(
__entry->start = start;
__entry->end = end;
),
TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
__entry->start, __entry->end)
);
TRACE_EVENT(kvm_test_age_hva,
TP_PROTO(unsigned long hva),
TP_ARGS(hva),
TP_STRUCT__entry(
__field( unsigned long, hva )
),
TP_fast_assign(
__entry->hva = hva;
),
TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
);
TRACE_EVENT(kvm_hvc,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),