Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "ARM:
   - support for SVE and Pointer Authentication in guests
   - PMU improvements

  POWER:
   - support for direct access to the POWER9 XIVE interrupt controller
   - memory and performance optimizations

  x86:
   - support for accessing memory not backed by struct page
   - fixes and refactoring

  Generic:
   - dirty page tracking improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits)
  kvm: fix compilation on aarch64
  Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU"
  kvm: x86: Fix L1TF mitigation for shadow MMU
  KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible
  KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device
  KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing"
  KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs
  kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete
  tests: kvm: Add tests for KVM_SET_NESTED_STATE
  KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state
  tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID
  tests: kvm: Add tests to .gitignore
  KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
  KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one
  KVM: Fix the bitmap range to copy during clear dirty
  KVM: arm64: Fix ptrauth ID register masking logic
  KVM: x86: use direct accessors for RIP and RSP
  KVM: VMX: Use accessors for GPRs outside of dedicated caching logic
  KVM: x86: Omit caching logic for always-available GPRs
  kvm, x86: Properly check whether a pfn is an MMIO or not
  ...
This commit is contained in:
Linus Torvalds
2019-05-17 10:33:30 -07:00
91 changed files with 5580 additions and 971 deletions

View File

@@ -125,9 +125,16 @@ int main(void)
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));

View File

@@ -1913,7 +1913,7 @@ static void verify_sve_features(void)
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
if (len < safe_len || sve_verify_vq_map()) {
pr_crit("CPU%d: SVE: required vector length(s) missing\n",
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}

View File

@@ -18,6 +18,7 @@
*/
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/bottom_half.h>
#include <linux/bug.h>
#include <linux/cache.h>
@@ -48,6 +49,7 @@
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
#include <asm/virt.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -119,6 +121,8 @@
*/
struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
void *sve_state;
unsigned int sve_vl;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -130,14 +134,23 @@ static int sve_default_vl = -1;
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = SVE_VL_MIN;
/* Set of available vector lengths, as vq_to_bit(vq): */
static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
/*
* Set of available vector lengths,
* where length vq encoded as bit __vq_to_bit(vq):
*/
__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/* Set of vector lengths present on at least one cpu: */
static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
static void __percpu *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
@@ -235,14 +248,15 @@ static void task_fpsimd_load(void)
*/
void fpsimd_save(void)
{
struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
WARN_ON(!in_softirq() && !irqs_disabled());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
@@ -252,31 +266,14 @@ void fpsimd_save(void)
return;
}
sve_save_state(sve_pffr(&current->thread), &st->fpsr);
sve_save_state((char *)last->sve_state +
sve_ffr_offset(last->sve_vl),
&last->st->fpsr);
} else
fpsimd_save_state(st);
fpsimd_save_state(last->st);
}
}
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
* _maximum_ VQ not exceeding a certain value.
*/
static unsigned int vq_to_bit(unsigned int vq)
{
return SVE_VQ_MAX - vq;
}
static unsigned int bit_to_vq(unsigned int bit)
{
if (WARN_ON(bit >= SVE_VQ_MAX))
bit = SVE_VQ_MAX - 1;
return SVE_VQ_MAX - bit;
}
/*
* All vector length selection from userspace comes through here.
* We're on a slow path, so some sanity-checks are included.
@@ -298,8 +295,8 @@ static unsigned int find_supported_vector_length(unsigned int vl)
vl = max_vl;
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(bit_to_vq(bit));
__vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(__bit_to_vq(bit));
}
#ifdef CONFIG_SYSCTL
@@ -550,7 +547,6 @@ int sve_set_vector_length(struct task_struct *task,
local_bh_disable();
fpsimd_save();
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
fpsimd_flush_task_state(task);
@@ -624,12 +620,6 @@ int sve_get_current_vl(void)
return sve_prctl_status(0);
}
/*
* Bitmap for temporary storage of the per-CPU set of supported vector lengths
* during secondary boot.
*/
static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
@@ -644,40 +634,82 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
vl = sve_get_vl();
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
set_bit(vq_to_bit(vq), map);
set_bit(__vq_to_bit(vq), map);
}
}
/*
* Initialise the set of known supported VQs for the boot CPU.
* This is called during kernel boot, before secondary CPUs are brought up.
*/
void __init sve_init_vq_map(void)
{
sve_probe_vqs(sve_vq_map);
bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
}
/*
* If we haven't committed to the set of supported VQs yet, filter out
* those not supported by the current CPU.
* This function is called during the bring-up of early secondary CPUs only.
*/
void sve_update_vq_map(void)
{
sve_probe_vqs(sve_secondary_vq_map);
bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
sve_probe_vqs(tmp_map);
bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
}
/* Check whether the current CPU supports all VQs in the committed set */
/*
* Check whether the current CPU supports all VQs in the committed set.
* This function is called during the bring-up of late secondary CPUs only.
*/
int sve_verify_vq_map(void)
{
int ret = 0;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
sve_probe_vqs(sve_secondary_vq_map);
bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
SVE_VQ_MAX);
if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
sve_probe_vqs(tmp_map);
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
smp_processor_id());
ret = -EINVAL;
return -EINVAL;
}
return ret;
if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
return 0;
/*
* For KVM, it is necessary to ensure that this CPU doesn't
* support any vector length that guests may have probed as
* unsupported.
*/
/* Recover the set of supported VQs: */
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
/* Find VQs supported that are not globally supported: */
bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
/* Find the lowest such VQ, if any: */
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
return 0; /* no mismatches */
/*
* Mismatches above sve_max_virtualisable_vl are fine, since
* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
*/
if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
smp_processor_id());
return -EINVAL;
}
return 0;
}
static void __init sve_efi_setup(void)
@@ -744,6 +776,8 @@ u64 read_zcr_features(void)
void __init sve_setup(void)
{
u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
if (!system_supports_sve())
return;
@@ -753,8 +787,8 @@ void __init sve_setup(void)
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
@@ -772,11 +806,31 @@ void __init sve_setup(void)
*/
sve_default_vl = find_supported_vector_length(64);
bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
SVE_VQ_MAX);
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
/* No non-virtualisable VLs found */
sve_max_virtualisable_vl = SVE_VQ_MAX;
else if (WARN_ON(b == SVE_VQ_MAX - 1))
/* No virtualisable VLs? This is architecturally forbidden. */
sve_max_virtualisable_vl = SVE_VQ_MIN;
else /* b + 1 < SVE_VQ_MAX */
sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
if (sve_max_virtualisable_vl > sve_max_vl)
sve_max_virtualisable_vl = sve_max_vl;
pr_info("SVE: maximum available vector length %u bytes per vector\n",
sve_max_vl);
pr_info("SVE: default vector length %u bytes per vector\n",
sve_default_vl);
/* KVM decides whether to support mismatched systems. Just warn here: */
if (sve_max_virtualisable_vl < sve_max_vl)
pr_warn("SVE: unvirtualisable vector lengths present\n");
sve_efi_setup();
}
@@ -816,12 +870,11 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
local_bh_disable();
fpsimd_save();
fpsimd_to_sve(current);
/* Force ret_to_user to reload the registers: */
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
fpsimd_to_sve(current);
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
@@ -894,9 +947,9 @@ void fpsimd_flush_thread(void)
local_bh_disable();
fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
fpsimd_flush_task_state(current);
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
@@ -933,8 +986,6 @@ void fpsimd_flush_thread(void)
current->thread.sve_vl_onexec = 0;
}
set_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
}
@@ -974,6 +1025,8 @@ void fpsimd_bind_task_to_cpu(void)
this_cpu_ptr(&fpsimd_last_state);
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
last->sve_vl = current->thread.sve_vl;
current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sve()) {
@@ -987,7 +1040,8 @@ void fpsimd_bind_task_to_cpu(void)
}
}
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
unsigned int sve_vl)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -995,6 +1049,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
WARN_ON(!in_softirq() && !irqs_disabled());
last->st = st;
last->sve_state = sve_state;
last->sve_vl = sve_vl;
}
/*
@@ -1043,12 +1099,29 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
/*
* Invalidate live CPU copies of task t's FPSIMD state
*
* This function may be called with preemption enabled. The barrier()
* ensures that the assignment to fpsimd_cpu is visible to any
* preemption/softirq that could race with set_tsk_thread_flag(), so
* that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
*
* The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
* subsequent code.
*/
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
barrier();
set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
barrier();
}
/*
* Invalidate any task's FPSIMD state that is present on this cpu.
* This function must be called with softirqs disabled.
*/
void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);

View File

@@ -26,6 +26,7 @@
#include <linux/acpi.h>
#include <linux/clocksource.h>
#include <linux/kvm_host.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -528,12 +529,21 @@ static inline int armv8pmu_enable_counter(int idx)
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
int idx = event->hw.idx;
u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
armv8pmu_enable_counter(idx);
if (armv8pmu_event_is_chained(event))
armv8pmu_enable_counter(idx - 1);
isb();
counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
kvm_set_pmu_events(counter_bits, attr);
/* We rely on the hypervisor switch code to enable guest counters */
if (!kvm_pmu_counter_deferred(attr)) {
armv8pmu_enable_counter(idx);
if (armv8pmu_event_is_chained(event))
armv8pmu_enable_counter(idx - 1);
}
}
static inline int armv8pmu_disable_counter(int idx)
@@ -546,11 +556,21 @@ static inline int armv8pmu_disable_counter(int idx)
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_event_attr *attr = &event->attr;
int idx = hwc->idx;
u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
if (armv8pmu_event_is_chained(event))
armv8pmu_disable_counter(idx - 1);
armv8pmu_disable_counter(idx);
counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
kvm_clr_pmu_events(counter_bits);
/* We rely on the hypervisor switch code to disable guest counters */
if (!kvm_pmu_counter_deferred(attr)) {
if (armv8pmu_event_is_chained(event))
armv8pmu_disable_counter(idx - 1);
armv8pmu_disable_counter(idx);
}
}
static inline int armv8pmu_enable_intens(int idx)
@@ -827,14 +847,23 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
* with other architectures (x86 and Power).
*/
if (is_kernel_in_hyp_mode()) {
if (!attr->exclude_kernel)
if (!attr->exclude_kernel && !attr->exclude_host)
config_base |= ARMV8_PMU_INCLUDE_EL2;
} else {
if (attr->exclude_kernel)
if (attr->exclude_guest)
config_base |= ARMV8_PMU_EXCLUDE_EL1;
if (!attr->exclude_hv)
if (attr->exclude_host)
config_base |= ARMV8_PMU_EXCLUDE_EL0;
} else {
if (!attr->exclude_hv && !attr->exclude_host)
config_base |= ARMV8_PMU_INCLUDE_EL2;
}
/*
* Filter out !VHE kernels and guest kernels
*/
if (attr->exclude_kernel)
config_base |= ARMV8_PMU_EXCLUDE_EL1;
if (attr->exclude_user)
config_base |= ARMV8_PMU_EXCLUDE_EL0;
@@ -864,6 +893,9 @@ static void armv8pmu_reset(void *info)
armv8pmu_disable_intens(idx);
}
/* Clear the counters we flip at guest entry/exit */
kvm_clr_pmu_events(U32_MAX);
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().

View File

@@ -296,11 +296,6 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
*/
fpsimd_flush_task_state(current);
barrier();
/* From now, fpsimd_thread_switch() won't clear TIF_FOREIGN_FPSTATE */
set_thread_flag(TIF_FOREIGN_FPSTATE);
barrier();
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sve_alloc(current);