Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "s390: - implement diag318 x86: - Report last CPU for debugging - Emulate smaller MAXPHYADDR in the guest than in the host - .noinstr and tracing fixes from Thomas - nested SVM page table switching optimization and fixes Generic: - Unify shadow MMU cache data structures across architectures" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (127 commits) KVM: SVM: Fix sev_pin_memory() error handling KVM: LAPIC: Set the TDCR settable bits KVM: x86: Specify max TDP level via kvm_configure_mmu() KVM: x86/mmu: Rename max_page_level to max_huge_page_level KVM: x86: Dynamically calculate TDP level from max level and MAXPHYADDR KVM: VXM: Remove temporary WARN on expected vs. actual EPTP level mismatch KVM: x86: Pull the PGD's level from the MMU instead of recalculating it KVM: VMX: Make vmx_load_mmu_pgd() static KVM: x86/mmu: Add separate helper for shadow NPT root page role calc KVM: VMX: Drop a duplicate declaration of construct_eptp() KVM: nSVM: Correctly set the shadow NPT root level in its MMU role KVM: Using macros instead of magic values MIPS: KVM: Fix build error caused by 'kvm_run' cleanup KVM: nSVM: remove nonsensical EXITINFO1 adjustment on nested NPF KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support KVM: VMX: optimize #PF injection when MAXPHYADDR does not match KVM: VMX: Add guest physical address check in EPT violation and misconfig KVM: VMX: introduce vmx_need_pf_intercept KVM: x86: update exception bitmap on CPUID changes KVM: x86: rename update_bp_intercept to update_exception_bitmap ...
2020-08-06 12:59:31 -07:00
parent 7b4ea9456d f3633c2683
commit 921d2597ab
71 changed files with 1640 additions and 1219 deletions
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -7,8 +7,11 @@
 *   Authors: Anthony Liguori <aliguori@us.ibm.com>
 */

+#define pr_fmt(fmt) "kvm-guest: " fmt
+
 #include <linux/context_tracking.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/kvm_para.h>
 #include <linux/cpu.h>
@@ -232,16 +235,11 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);

 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
-	u32 reason = kvm_read_and_reset_apf_flags();
+	u32 flags = kvm_read_and_reset_apf_flags();
 	irqentry_state_t state;

-	switch (reason) {
-	case KVM_PV_REASON_PAGE_NOT_PRESENT:
-	case KVM_PV_REASON_PAGE_READY:
-		break;
-	default:
+	if (!flags)
 		return false;
-	}

 	state = irqentry_enter(regs);
 	instrumentation_begin();
@@ -254,13 +252,13 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 	if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
 		panic("Host injected async #PF in interrupt disabled region\n");

-	if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
+	if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
 		if (unlikely(!(user_mode(regs))))
 			panic("Host injected async #PF in kernel mode\n");
 		/* Page is swapped out by the host. */
 		kvm_async_pf_task_wait_schedule(token);
 	} else {
-		kvm_async_pf_task_wake(token);
+		WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
 	}

 	instrumentation_end();
@@ -268,6 +266,27 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 	return true;
 }

+DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	u32 token;
+	irqentry_state_t state;
+
+	state = irqentry_enter(regs);
+
+	inc_irq_stat(irq_hv_callback_count);
+
+	if (__this_cpu_read(apf_reason.enabled)) {
+		token = __this_cpu_read(apf_reason.token);
+		kvm_async_pf_task_wake(token);
+		__this_cpu_write(apf_reason.token, 0);
+		wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+	}
+
+	irqentry_exit(regs, state);
+	set_irq_regs(old_regs);
+}
+
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
@@ -289,8 +308,8 @@ static void kvm_register_steal_time(void)
 		return;

 	wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
-	pr_info("kvm-stealtime: cpu %d, msr %llx\n",
-		cpu, (unsigned long long) slow_virt_to_phys(st));
+	pr_info("stealtime: cpu %d, msr %llx\n", cpu,
+		(unsigned long long) slow_virt_to_phys(st));
 }

 static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -311,17 +330,19 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)

 static void kvm_guest_cpu_init(void)
 {
-	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
-		u64 pa;
+	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
+		u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));

 		WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));

 		pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-		pa |= KVM_ASYNC_PF_ENABLED;
+		pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;

 		if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
 			pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;

+		wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
+
 		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
 		pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
@@ -493,7 +514,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
 		} else {
 			ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
 				(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
-			WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
+			WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
+				  ret);
 			min = max = apic_id;
 			ipi_bitmap = 0;
 		}
@@ -503,7 +525,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
 	if (ipi_bitmap) {
 		ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
 			(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
-		WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
+		WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
+			  ret);
 	}

 	local_irq_restore(flags);
@@ -533,7 +556,7 @@ static void kvm_setup_pv_ipi(void)
 {
 	apic->send_IPI_mask = kvm_send_ipi_mask;
 	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
-	pr_info("KVM setup pv IPIs\n");
+	pr_info("setup PV IPIs\n");
 }

 static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
@@ -551,13 +574,6 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
 	}
 }

-static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
-{
-	native_smp_prepare_cpus(max_cpus);
-	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
-		static_branch_disable(&virt_spin_lock_key);
-}
-
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
 	/*
@@ -646,19 +662,20 @@ static void __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		apic_set_eoi_write(kvm_guest_apic_eoi_write);

-	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
+	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
 		static_branch_enable(&kvm_async_pf_enabled);
+		alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
+	}

 #ifdef CONFIG_SMP
-	smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 	if (pv_sched_yield_supported()) {
 		smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
-		pr_info("KVM setup pv sched yield\n");
+		pr_info("setup PV sched yield\n");
 	}
 	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
 				      kvm_cpu_online, kvm_cpu_down_prepare) < 0)
-		pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
+		pr_err("failed to install cpu hotplug callbacks\n");
 #else
 	sev_map_percpu_data();
 	kvm_guest_cpu_init();
@@ -854,16 +871,36 @@ asm(
 */
 void __init kvm_spinlock_init(void)
 {
-	/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
-	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+	/*
+	 * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
+	 * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
+	 * preferred over native qspinlock when vCPU is preempted.
+	 */
+	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
+		pr_info("PV spinlocks disabled, no host support\n");
 		return;
+	}

-	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
-		return;
+	/*
+	 * Disable PV spinlocks and use native qspinlock when dedicated pCPUs
+	 * are available.
+	 */
+	if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
+		pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
+		goto out;
+	}

-	/* Don't use the pvqspinlock code if there is only 1 vCPU. */
-	if (num_possible_cpus() == 1)
-		return;
+	if (num_possible_cpus() == 1) {
+		pr_info("PV spinlocks disabled, single CPU\n");
+		goto out;
+	}
+
+	if (nopvspin) {
+		pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
+		goto out;
+	}
+
+	pr_info("PV spinlocks enabled\n");

 	__pv_init_lock_hash();
 	pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
@@ -876,6 +913,13 @@ void __init kvm_spinlock_init(void)
 		pv_ops.lock.vcpu_is_preempted =
 			PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
 	}
+	/*
+	 * When PV spinlock is enabled which is preferred over
+	 * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
+	 * Just disable it anyway.
+	 */
+out:
+	static_branch_disable(&virt_spin_lock_key);
 }

 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
@@ -895,8 +939,8 @@ static void kvm_enable_host_haltpoll(void *i)
 void arch_haltpoll_enable(unsigned int cpu)
 {
 	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
-		pr_err_once("kvm: host does not support poll control\n");
-		pr_err_once("kvm: host upgrade recommended\n");
+		pr_err_once("host does not support poll control\n");
+		pr_err_once("host upgrade recommended\n");
 		return;
 	}