Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini: "ARM: - Move the arch-specific code into arch/arm64/kvm - Start the post-32bit cleanup - Cherry-pick a few non-invasive pre-NV patches x86: - Rework of TLB flushing - Rework of event injection, especially with respect to nested virtualization - Nested AMD event injection facelift, building on the rework of generic code and fixing a lot of corner cases - Nested AMD live migration support - Optimization for TSC deadline MSR writes and IPIs - Various cleanups - Asynchronous page fault cleanups (from tglx, common topic branch with tip tree) - Interrupt-based delivery of asynchronous "page ready" events (host side) - Hyper-V MSRs and hypercalls for guest debugging - VMX preemption timer fixes s390: - Cleanups Generic: - switch vCPU thread wakeup from swait to rcuwait The other architectures, and the guest side of the asynchronous page fault work, will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits) KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test KVM: check userspace_addr for all memslots KVM: selftests: update hyperv_cpuid with SynDBG tests x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls x86/kvm/hyper-v: enable hypercalls regardless of hypercall page x86/kvm/hyper-v: Add support for synthetic debugger interface x86/hyper-v: Add synthetic debugger definitions KVM: selftests: VMX preemption timer migration test KVM: nVMX: Fix VMX preemption timer migration x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit KVM: x86/pmu: Support full width counting KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT KVM: x86: acknowledgment mechanism for async pf page ready notifications KVM: x86: interrupt based APF 'page ready' event delivery KVM: introduce kvm_read_guest_offset_cached() KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present() KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously" KVM: VMX: Replace zero-length array with flexible-array ...
2020-06-03 15:13:47 -07:00
parent 6b2591c212 13ffbd8db1
commit 039aeb9deb
155 changed files with 5318 additions and 3057 deletions
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -101,12 +101,14 @@ static inline void context_tracking_init(void) { }

 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 /* must be called with irqs disabled */
-static inline void guest_enter_irqoff(void)
+static __always_inline void guest_enter_irqoff(void)
 {
+	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_guest_enter(current);
 	else
 		current->flags |= PF_VCPU;
+	instrumentation_end();

 	if (context_tracking_enabled())
 		__context_tracking_enter(CONTEXT_GUEST);
@@ -118,39 +120,48 @@ static inline void guest_enter_irqoff(void)
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.
 	 */
-	if (!context_tracking_enabled_this_cpu())
+	if (!context_tracking_enabled_this_cpu()) {
+		instrumentation_begin();
 		rcu_virt_note_context_switch(smp_processor_id());
+		instrumentation_end();
+	}
 }

-static inline void guest_exit_irqoff(void)
+static __always_inline void guest_exit_irqoff(void)
 {
 	if (context_tracking_enabled())
 		__context_tracking_exit(CONTEXT_GUEST);

+	instrumentation_begin();
 	if (vtime_accounting_enabled_this_cpu())
 		vtime_guest_exit(current);
 	else
 		current->flags &= ~PF_VCPU;
+	instrumentation_end();
 }

 #else
-static inline void guest_enter_irqoff(void)
+static __always_inline void guest_enter_irqoff(void)
 {
 	/*
 	 * This is running in ioctl context so its safe
 	 * to assume that it's the stime pending cputime
 	 * to flush.
 	 */
+	instrumentation_begin();
 	vtime_account_kernel(current);
 	current->flags |= PF_VCPU;
 	rcu_virt_note_context_switch(smp_processor_id());
+	instrumentation_end();
 }

-static inline void guest_exit_irqoff(void)
+static __always_inline void guest_exit_irqoff(void)
 {
+	instrumentation_begin();
 	/* Flush the guest cputime we spent on the guest */
 	vtime_account_kernel(current);
 	current->flags &= ~PF_VCPU;
+	instrumentation_end();
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,16 +19,20 @@
 #ifdef CONFIG_PROVE_LOCKING
  extern void lockdep_softirqs_on(unsigned long ip);
  extern void lockdep_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on_prepare(unsigned long ip);
  extern void lockdep_hardirqs_on(unsigned long ip);
  extern void lockdep_hardirqs_off(unsigned long ip);
 #else
  static inline void lockdep_softirqs_on(unsigned long ip) { }
  static inline void lockdep_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { }
  static inline void lockdep_hardirqs_on(unsigned long ip) { }
  static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif

 #ifdef CONFIG_TRACE_IRQFLAGS
+  extern void trace_hardirqs_on_prepare(void);
+  extern void trace_hardirqs_off_prepare(void);
  extern void trace_hardirqs_on(void);
  extern void trace_hardirqs_off(void);
 # define lockdep_hardirq_context(p)	((p)->hardirq_context)
@@ -96,6 +100,8 @@ do {						\
 	  } while (0)

 #else
+# define trace_hardirqs_on_prepare()		do { } while (0)
+# define trace_hardirqs_off_prepare()		do { } while (0)
 # define trace_hardirqs_on()		do { } while (0)
 # define trace_hardirqs_off()		do { } while (0)
 # define lockdep_hardirq_context(p)	0
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;

-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -503,6 +503,7 @@ struct kvm {
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 	pid_t userspace_pid;
+	unsigned int max_halt_poll_ns;
 };

 #define kvm_err(fmt, ...) \
@@ -733,6 +734,9 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			   void *data, unsigned long len);
+int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+				 void *data, unsigned int offset,
+				 unsigned long len);
 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
 			 int offset, int len);
 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
@@ -869,7 +873,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state);
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg);
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);

 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
@@ -959,12 +963,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif

-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
-	return vcpu->arch.wqp;
+	return vcpu->arch.waitp;
 #else
-	return &vcpu->wq;
+	return &vcpu->wait;
 #endif
 }

@@ -1133,6 +1137,11 @@ struct kvm_stats_debugfs_item {
 #define KVM_DBGFS_GET_MODE(dbgfs_item)                                         \
 	((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)

+#define VM_STAT(n, x, ...) 							\
+	{ n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
+#define VCPU_STAT(n, x, ...)							\
+	{ n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;

@@ -1355,6 +1364,12 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */

+static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
+{
+	return (memslot && memslot->id < KVM_USER_MEM_SLOTS &&
+		!(memslot->flags & KVM_MEMSLOT_INVALID));
+}
+
 struct kvm_vcpu *kvm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);

--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -25,16 +25,38 @@ static inline void rcuwait_init(struct rcuwait *w)
 	w->task = NULL;
 }

-extern void rcuwait_wake_up(struct rcuwait *w);
+/*
+ * Note: this provides no serialization and, just as with waitqueues,
+ * requires care to estimate as to whether or not the wait is active.
+ */
+static inline int rcuwait_active(struct rcuwait *w)
+{
+	return !!rcu_access_pointer(w->task);
+}
+
+extern int rcuwait_wake_up(struct rcuwait *w);

 /*
 * The caller is responsible for locking around rcuwait_wait_event(),
- * such that writes to @task are properly serialized.
+ * and [prepare_to/finish]_rcuwait() such that writes to @task are
+ * properly serialized.
 */
+
+static inline void prepare_to_rcuwait(struct rcuwait *w)
+{
+	rcu_assign_pointer(w->task, current);
+}
+
+static inline void finish_rcuwait(struct rcuwait *w)
+{
+        rcu_assign_pointer(w->task, NULL);
+	__set_current_state(TASK_RUNNING);
+}
+
 #define rcuwait_wait_event(w, condition, state)				\
 ({									\
 	int __ret = 0;							\
-	rcu_assign_pointer((w)->task, current);				\
+	prepare_to_rcuwait(w);						\
 	for (;;) {							\
 		/*							\
 		 * Implicit barrier (A) pairs with (B) in		\
@@ -51,9 +73,7 @@ extern void rcuwait_wake_up(struct rcuwait *w);
 									\
 		schedule();						\
 	}								\
-									\
-	WRITE_ONCE((w)->task, NULL);					\
-	__set_current_state(TASK_RUNNING);				\
+	finish_rcuwait(w);						\
 	__ret;								\
 })

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -992,6 +992,7 @@ struct task_struct {
 	unsigned int			hardirq_disable_event;
 	int				hardirqs_enabled;
 	int				hardirq_context;
+	u64				hardirq_chain_key;
 	unsigned long			softirq_disable_ip;
 	unsigned long			softirq_enable_ip;
 	unsigned int			softirq_disable_event;