Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "ARM and x86 bugfixes of all kinds.

  The most visible one is that migrating a nested hypervisor has always
  been busted on Broadwell and newer processors, and that has finally
  been fixed"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits)
  KVM: x86: omit "impossible" pmu MSRs from MSR list
  KVM: nVMX: Fix consistency check on injected exception error code
  KVM: x86: omit absent pmu MSRs from MSR list
  selftests: kvm: Fix libkvm build error
  kvm: vmx: Limit guest PMCs to those supported on the host
  kvm: x86, powerpc: do not allow clearing largepages debugfs entry
  KVM: selftests: x86: clarify what is reported on KVM_GET_MSRS failure
  KVM: VMX: Set VMENTER_L1D_FLUSH_NOT_REQUIRED if !X86_BUG_L1TF
  selftests: kvm: add test for dirty logging inside nested guests
  KVM: x86: fix nested guest live migration with PML
  KVM: x86: assign two bits to track SPTE kinds
  KVM: x86: Expose XSAVEERPTR to the guest
  kvm: x86: Enumerate support for CLZERO instruction
  kvm: x86: Use AMD CPUID semantics for AMD vCPUs
  kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH
  KVM: X86: Fix userspace set invalid CR4
  kvm: x86: Fix a spurious -E2BIG in __do_cpuid_func
  KVM: LAPIC: Loosen filter for adaptive tuning of lapic_timer_advance_ns
  KVM: arm/arm64: vgic: Use the appropriate TRACE_INCLUDE_PATH
  arm64: KVM: Kill hyp_alternate_select()
  ...
This commit is contained in:
Linus Torvalds
2019-10-04 11:17:51 -07:00
23 changed files with 593 additions and 191 deletions

View File

@@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
@@ -48,7 +49,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
# On s390, build the testcases KVM-enabled
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \

View File

@@ -1083,6 +1083,9 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F

View File

@@ -569,6 +569,10 @@ struct vmx_pages {
void *enlightened_vmcs_hva;
uint64_t enlightened_vmcs_gpa;
void *enlightened_vmcs;
void *eptp_hva;
uint64_t eptp_gpa;
void *eptp;
};
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
@@ -576,4 +580,14 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size,
uint32_t eptp_memslot);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot, uint32_t eptp_memslot);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
#endif /* SELFTEST_KVM_VMX_H */

View File

@@ -705,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
* on error (e.g. currently no memory region using memslot as a KVM
* memory slot ID).
*/
static struct userspace_mem_region *
struct userspace_mem_region *
memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;

View File

@@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
struct userspace_mem_region *
memslot2region(struct kvm_vm *vm, uint32_t memslot);
#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */

View File

@@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
for (i = 0; i < nmsrs; i++)
state->msrs.entries[i].index = list->indices[i];
r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
r, r == nmsrs ? -1 : list->indices[r]);
r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);

View File

@@ -7,11 +7,39 @@
#include "test_util.h"
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
#define PAGE_SHIFT_4K 12
#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
bool enable_evmcs;
struct eptPageTableEntry {
uint64_t readable:1;
uint64_t writable:1;
uint64_t executable:1;
uint64_t memory_type:3;
uint64_t ignore_pat:1;
uint64_t page_size:1;
uint64_t accessed:1;
uint64_t dirty:1;
uint64_t ignored_11_10:2;
uint64_t address:40;
uint64_t ignored_62_52:11;
uint64_t suppress_ve:1;
};
struct eptPageTablePointer {
uint64_t memory_type:3;
uint64_t page_walk_length:3;
uint64_t ad_enabled:1;
uint64_t reserved_11_07:5;
uint64_t address:40;
uint64_t reserved_63_52:12;
};
int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
{
uint16_t evmcs_ver;
@@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx)
*/
static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
{
uint32_t sec_exec_ctl = 0;
vmwrite(VIRTUAL_PROCESSOR_ID, 0);
vmwrite(POSTED_INTR_NV, 0);
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
if (vmx->eptp_gpa) {
uint64_t ept_paddr;
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
.ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
vmwrite(EPT_POINTER, ept_paddr);
sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
}
if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl))
vmwrite(CPU_BASED_VM_EXEC_CONTROL,
rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
else
else {
vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
GUEST_ASSERT(!sec_exec_ctl);
}
vmwrite(EXCEPTION_BITMAP, 0);
vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
@@ -327,3 +375,152 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_host_state();
init_vmcs_guest_state(guest_rip, guest_rsp);
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
{
uint16_t index[4];
struct eptPageTableEntry *pml4e;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr % vm->page_size) == 0,
"Nested physical address not on page boundary,\n"
" nested_paddr: 0x%lx vm->page_size: 0x%x",
nested_paddr, vm->page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
TEST_ASSERT((paddr % vm->page_size) == 0,
"Physical address not on page boundary,\n"
" paddr: 0x%lx vm->page_size: 0x%x",
paddr, vm->page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
index[0] = (nested_paddr >> 12) & 0x1ffu;
index[1] = (nested_paddr >> 21) & 0x1ffu;
index[2] = (nested_paddr >> 30) & 0x1ffu;
index[3] = (nested_paddr >> 39) & 0x1ffu;
/* Allocate page directory pointer table if not present. */
pml4e = vmx->eptp_hva;
if (!pml4e[index[3]].readable) {
pml4e[index[3]].address = vm_phy_page_alloc(vm,
KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
>> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].readable = true;
pml4e[index[3]].executable = true;
}
/* Allocate page directory table if not present. */
struct eptPageTableEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].readable) {
pdpe[index[2]].address = vm_phy_page_alloc(vm,
KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
>> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].readable = true;
pdpe[index[2]].executable = true;
}
/* Allocate page table if not present. */
struct eptPageTableEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].readable) {
pde[index[1]].address = vm_phy_page_alloc(vm,
KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
>> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].readable = true;
pde[index[1]].executable = true;
}
/* Fill in page table entry. */
struct eptPageTableEntry *pte;
pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
pte[index[0]].address = paddr >> vm->page_shift;
pte[index[0]].writable = true;
pte[index[0]].readable = true;
pte[index[0]].executable = true;
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
pte[index[0]].accessed = true;
pte[index[0]].dirty = true;
}
/*
* Map a range of EPT guest physical addresses to the VM's physical address
*
* Input Args:
* vm - Virtual Machine
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
* eptp_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
* Return: None
*
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint64_t size,
uint32_t eptp_memslot)
{
size_t page_size = vm->page_size;
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
nested_paddr += page_size;
paddr += page_size;
}
}
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot, uint32_t eptp_memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
memslot2region(vm, memslot);
i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
last = i + (region->region.memory_size >> vm->page_shift);
for (;;) {
i = sparsebit_next_clear(region->unused_phy_pages, i);
if (i > last)
break;
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
1 << vm->page_shift,
eptp_memslot);
}
}
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}

View File

@@ -0,0 +1,156 @@
// SPDX-License-Identifier: GPL-2.0
/*
* KVM dirty page logging test
*
* Copyright (C) 2018, Red Hat, Inc.
*/
#define _GNU_SOURCE /* for program_invocation_name */
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
#define VCPU_ID 1
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
#define TEST_MEM_SIZE 3
/* L1 guest test virtual memory offset */
#define GUEST_TEST_MEM 0xc0000000
/* L2 guest test virtual memory offset */
#define NESTED_TEST_MEM1 0xc0001000
#define NESTED_TEST_MEM2 0xc0002000
static void l2_guest_code(void)
{
*(volatile uint64_t *)NESTED_TEST_MEM1;
*(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
GUEST_SYNC(true);
GUEST_SYNC(false);
*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
GUEST_SYNC(true);
*(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
GUEST_SYNC(true);
GUEST_SYNC(false);
/* Exit to L1 and never come back. */
vmcall();
}
void l1_guest_code(struct vmx_pages *vmx)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
GUEST_ASSERT(load_vmcs(vmx));
prepare_vmcs(vmx, l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(false);
GUEST_ASSERT(!vmlaunch());
GUEST_SYNC(false);
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
GUEST_DONE();
}
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0;
struct vmx_pages *vmx;
unsigned long *bmap;
uint64_t *host_test_mem;
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
bool done = false;
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
run = vcpu_state(vm, VCPU_ID);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
GUEST_TEST_MEM,
TEST_MEM_SLOT_INDEX,
TEST_MEM_SIZE,
KVM_MEM_LOG_DIRTY_PAGES);
/*
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
TEST_MEM_SIZE * 4096, 0);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
* 0xc0000000.
*
* Note that prepare_eptp should be called only L1's GPA map is done,
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
nested_map_memslot(vmx, vm, 0, 0);
nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
bmap = bitmap_alloc(TEST_MEM_SIZE);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
__FILE__, uc.args[1]);
/* NOT REACHED */
case UCALL_SYNC:
/*
* The nested guest wrote at offset 0x1000 in the memslot, but the
* dirty bitmap must be filled in according to L1 GPA, not L2.
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
if (uc.args[1]) {
TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
} else {
TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
}
TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
break;
case UCALL_DONE:
done = true;
break;
default:
TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
}
}
}