Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar: "The main changes are: - Debloat RCU headers - Parallelize SRCU callback handling (plus overlapping patches) - Improve the performance of Tree SRCU on a CPU-hotplug stress test - Documentation updates - Miscellaneous fixes" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (74 commits) rcu: Open-code the rcu_cblist_n_lazy_cbs() function rcu: Open-code the rcu_cblist_n_cbs() function rcu: Open-code the rcu_cblist_empty() function rcu: Separately compile large rcu_segcblist functions srcu: Debloat the <linux/rcu_segcblist.h> header srcu: Adjust default auto-expediting holdoff srcu: Specify auto-expedite holdoff time srcu: Expedite first synchronize_srcu() when idle srcu: Expedited grace periods with reduced memory contention srcu: Make rcutorture writer stalls print SRCU GP state srcu: Exact tracking of srcu_data structures containing callbacks srcu: Make SRCU be built by default srcu: Fix Kconfig botch when SRCU not selected rcu: Make non-preemptive schedule be Tasks RCU quiescent state srcu: Expedite srcu_schedule_cbs_snp() callback invocation srcu: Parallelize callback handling kvm: Move srcu_struct fields to end of struct kvm rcu: Fix typo in PER_RCU_NODE_PERIOD header comment rcu: Use true/false in assignment to bool rcu: Use bool value directly ...
2017-05-10 09:50:55 -07:00
parent dc9edaab90 20652ed6e4
commit de4d195308
75 changed files with 3903 additions and 1128 deletions
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
 *
 * Function returns NULL if no refcount could be obtained, or the fence.
 * This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU),
 * so long as the caller is using RCU on the pointer to the fence.
 *
 * An alternative mechanism is to employ a seqlock to protect a bunch of
@@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
 		 * have successfully acquire a reference to it. If it no
 		 * longer matches, we are holding a reference to some other
 		 * reallocated pointer. This is possible if the allocator
-		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * is using a freelist like SLAB_TYPESAFE_BY_RCU where the
 		 * fence remains valid for the RCU grace period, but it
 		 * may be reallocated. When using such allocators, we are
 		 * responsible for ensuring the reference we get is to
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -384,8 +384,6 @@ struct kvm {
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
-	struct srcu_struct srcu;
-	struct srcu_struct irq_srcu;
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];

 	/*
@@ -438,6 +436,8 @@ struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 };

 #define kvm_err(fmt, ...) \
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h
@@ -0,0 +1,99 @@
+/*
+ * RCU node combining tree definitions.  These are used to compute
+ * global attributes while avoiding common-case global contention.  A key
+ * property that these computations rely on is a tournament-style approach
+ * where only one of the tasks contending a lower level in the tree need
+ * advance to the next higher level.  If properly configured, this allows
+ * unlimited scalability while maintaining a constant level of contention
+ * on the root node.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_RCU_NODE_TREE_H
+#define __LINUX_RCU_NODE_TREE_H
+
+/*
+ * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
+ * CONFIG_RCU_FANOUT_LEAF.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
+ */
+
+#ifdef CONFIG_RCU_FANOUT
+#define RCU_FANOUT CONFIG_RCU_FANOUT
+#else /* #ifdef CONFIG_RCU_FANOUT */
+# ifdef CONFIG_64BIT
+# define RCU_FANOUT 64
+# else
+# define RCU_FANOUT 32
+# endif
+#endif /* #else #ifdef CONFIG_RCU_FANOUT */
+
+#ifdef CONFIG_RCU_FANOUT_LEAF
+#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
+#define RCU_FANOUT_LEAF 16
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
+
+#define RCU_FANOUT_1	      (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT)
+#define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT)
+#define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
+#  define RCU_NUM_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_NODES	      NUM_RCU_LVL_0
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+#elif NR_CPUS <= RCU_FANOUT_2
+#  define RCU_NUM_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+#elif NR_CPUS <= RCU_FANOUT_3
+#  define RCU_NUM_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+#elif NR_CPUS <= RCU_FANOUT_4
+#  define RCU_NUM_LVLS	      4
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+#  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+#  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
+
+#endif /* __LINUX_RCU_NODE_TREE_H */
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -0,0 +1,90 @@
+/*
+ * RCU segmented callback lists
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright IBM Corporation, 2017
+ *
+ * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
+#define __INCLUDE_LINUX_RCU_SEGCBLIST_H
+
+/* Simple unsegmented callback lists. */
+struct rcu_cblist {
+	struct rcu_head *head;
+	struct rcu_head **tail;
+	long len;
+	long len_lazy;
+};
+
+#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
+
+/* Complicated segmented callback lists.  ;-) */
+
+/*
+ * Index values for segments in rcu_segcblist structure.
+ *
+ * The segments are as follows:
+ *
+ * [head, *tails[RCU_DONE_TAIL]):
+ *	Callbacks whose grace period has elapsed, and thus can be invoked.
+ * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
+ *	Callbacks waiting for the current GP from the current CPU's viewpoint.
+ * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
+ *	Callbacks that arrived before the next GP started, again from
+ *	the current CPU's viewpoint.  These can be handled by the next GP.
+ * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
+ *	Callbacks that might have arrived after the next GP started.
+ *	There is some uncertainty as to when a given GP starts and
+ *	ends, but a CPU knows the exact times if it is the one starting
+ *	or ending the GP.  Other CPUs know that the previous GP ends
+ *	before the next one starts.
+ *
+ * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
+ * empty.
+ *
+ * The ->gp_seq[] array contains the grace-period number at which the
+ * corresponding segment of callbacks will be ready to invoke.  A given
+ * element of this array is meaningful only when the corresponding segment
+ * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
+ * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
+ * not yet been assigned a grace-period number).
+ */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_CBLIST_NSEGS	4
+
+struct rcu_segcblist {
+	struct rcu_head *head;
+	struct rcu_head **tails[RCU_CBLIST_NSEGS];
+	unsigned long gp_seq[RCU_CBLIST_NSEGS];
+	long len;
+	long len_lazy;
+};
+
+#define RCU_SEGCBLIST_INITIALIZER(n) \
+{ \
+	.head = NULL, \
+	.tails[RCU_DONE_TAIL] = &n.head, \
+	.tails[RCU_WAIT_TAIL] = &n.head, \
+	.tails[RCU_NEXT_READY_TAIL] = &n.head, \
+	.tails[RCU_NEXT_TAIL] = &n.head, \
+}
+
+#endif /* __INCLUDE_LINUX_RCU_SEGCBLIST_H */
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n,
 {
 	struct hlist_node *i, *last = NULL;

-	for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i))
+	/* Note: write side code, so rcu accessors are not needed. */
+	for (i = h->first; i; i = i->next)
 		last = i;

 	if (last) {
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -368,15 +368,20 @@ static inline void rcu_init_nohz(void)
 #ifdef CONFIG_TASKS_RCU
 #define TASKS_RCU(x) x
 extern struct srcu_struct tasks_rcu_exit_srcu;
-#define rcu_note_voluntary_context_switch(t) \
+#define rcu_note_voluntary_context_switch_lite(t) \
 	do { \
-		rcu_all_qs(); \
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
+#define rcu_note_voluntary_context_switch(t) \
+	do { \
+		rcu_all_qs(); \
+		rcu_note_voluntary_context_switch_lite(t); \
+	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-#define rcu_note_voluntary_context_switch(t)	rcu_all_qs()
+#define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
+#define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #endif /* #else #ifdef CONFIG_TASKS_RCU */

 /**
@@ -1132,11 +1137,11 @@ do { \
 * if the UNLOCK and LOCK are executed by the same CPU or if the
 * UNLOCK and LOCK operate on the same lock variable.
 */
-#ifdef CONFIG_PPC
+#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
 #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
-#else /* #ifdef CONFIG_PPC */
+#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 #define smp_mb__after_unlock_lock()	do { } while (0)
-#endif /* #else #ifdef CONFIG_PPC */
+#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */


 #endif /* __LINUX_RCUPDATE_H */
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 	return 0;
 }

+static inline bool rcu_eqs_special_set(int cpu)
+{
+	return false;  /* Never flag non-existent other CPUs! */
+}
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -87,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }

-static inline void rcu_note_context_switch(void)
-{
-	rcu_sched_qs();
-}
+#define rcu_note_context_switch(preempt) \
+	do { \
+		rcu_sched_qs(); \
+		rcu_note_voluntary_context_switch_lite(current); \
+	} while (0)

 /*
 * Take advantage of the fact that there is only one CPU, which
@@ -212,14 +218,14 @@ static inline void exit_rcu(void)
 {
 }

-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 extern int rcu_scheduler_active __read_mostly;
 void rcu_scheduler_starting(void);
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
 static inline void rcu_scheduler_starting(void)
 {
 }
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */

 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)

@@ -237,6 +243,10 @@ static inline bool rcu_is_watching(void)

 #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */

+static inline void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+}
+
 static inline void rcu_all_qs(void)
 {
 	barrier(); /* Avoid RCU read-side critical sections leaking across. */
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,7 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H

-void rcu_note_context_switch(void);
+void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);

@@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void);
 */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
-	rcu_note_context_switch();
+	rcu_note_context_switch(false);
 }

 void synchronize_rcu_bh(void);
@@ -108,6 +108,7 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;

 bool rcu_is_watching(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);

 void rcu_all_qs(void);

--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -28,7 +28,7 @@
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
 /*
- * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
+ * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
 *
 * This delays freeing the SLAB page by a grace period, it does _NOT_
 * delay object freeing. This means that if you do kmem_cache_free()
@@ -61,8 +61,10 @@
 *
 * rcu_read_lock before reading the address, then rcu_read_unlock after
 * taking the spinlock within the structure expected at that address.
+ *
+ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
 */
-#define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
+#define SLAB_TYPESAFE_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */

--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -22,7 +22,7 @@
 *	   Lai Jiangshan <laijs@cn.fujitsu.com>
 *
 * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
+ *		Documentation/RCU/ *.txt
 *
 */

@@ -32,35 +32,9 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/rcu_segcblist.h>

-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
-};
-
-struct rcu_batch {
-	struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
-struct srcu_struct {
-	unsigned long completed;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
-	/* callbacks just queued */
-	struct rcu_batch batch_queue;
-	/* callbacks try to do the first check_zero */
-	struct rcu_batch batch_check0;
-	/* callbacks done with the first check_zero and the flip */
-	struct rcu_batch batch_check1;
-	struct rcu_batch batch_done;
-	struct delayed_work work;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-};
+struct srcu_struct;

 #ifdef CONFIG_DEBUG_LOCK_ALLOC

@@ -82,46 +56,15 @@ int init_srcu_struct(struct srcu_struct *sp);
 #define __SRCU_DEP_MAP_INIT(srcu_name)
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */

-void process_srcu(struct work_struct *work);
-
-#define __SRCU_STRUCT_INIT(name)					\
-	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
-		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
-		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
-		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
-		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
-
-/*
- * Define and initialize a srcu struct at build time.
- * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
- *
- * Note that although DEFINE_STATIC_SRCU() hides the name from other
- * files, the per-CPU variable rules nevertheless require that the
- * chosen name be globally unique.  These rules also prohibit use of
- * DEFINE_STATIC_SRCU() within a function.  If these rules are too
- * restrictive, declare the srcu_struct manually.  For example, in
- * each file:
- *
- *	static struct srcu_struct my_srcu;
- *
- * Then, before the first use of each my_srcu, manually initialize it:
- *
- *	init_srcu_struct(&my_srcu);
- *
- * See include/linux/percpu-defs.h for the rules on per-CPU variables.
- */
-#define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
-#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
-#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+#ifdef CONFIG_TINY_SRCU
+#include <linux/srcutiny.h>
+#elif defined(CONFIG_TREE_SRCU)
+#include <linux/srcutree.h>
+#elif defined(CONFIG_CLASSIC_SRCU)
+#include <linux/srcuclassic.h>
+#else
+#error "Unknown SRCU implementation specified to kernel configuration"
+#endif

 /**
 * call_srcu() - Queue a callback for invocation after an SRCU grace period
@@ -147,9 +90,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);

 #ifdef CONFIG_DEBUG_LOCK_ALLOC

--- a/include/linux/srcuclassic.h
+++ b/include/linux/srcuclassic.h
@@ -0,0 +1,115 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	classic v4.11 variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_CLASSIC_H
+#define _LINUX_SRCU_CLASSIC_H
+
+struct srcu_array {
+	unsigned long lock_count[2];
+	unsigned long unlock_count[2];
+};
+
+struct rcu_batch {
+	struct rcu_head *head, **tail;
+};
+
+#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
+
+struct srcu_struct {
+	unsigned long completed;
+	struct srcu_array __percpu *per_cpu_ref;
+	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+	bool running;
+	/* callbacks just queued */
+	struct rcu_batch batch_queue;
+	/* callbacks try to do the first check_zero */
+	struct rcu_batch batch_check0;
+	/* callbacks done with the first check_zero and the flip */
+	struct rcu_batch batch_check1;
+	struct rcu_batch batch_done;
+	struct delayed_work work;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.completed = -300,					\
+		.per_cpu_ref = &name##_srcu_array,			\
+		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
+		.running = false,					\
+		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
+		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
+		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
+		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
+		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
+ */
+#define __DEFINE_SRCU(name, is_static)					\
+	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
+#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+
+void synchronize_srcu_expedited(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->completed;
+	*gpnum = *completed;
+	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
+		(*gpnum)++;
+}
+
+#endif
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -0,0 +1,93 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tiny variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TINY_H
+#define _LINUX_SRCU_TINY_H
+
+#include <linux/swait.h>
+
+struct srcu_struct {
+	int srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	struct swait_queue_head srcu_wq;
+					/* Last srcu_read_unlock() wakes GP. */
+	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */
+	struct rcu_segcblist srcu_cblist;
+					/* Pending SRCU callbacks. */
+	int srcu_idx;			/* Current reader array element. */
+	bool srcu_gp_running;		/* GP workqueue running? */
+	bool srcu_gp_waiting;		/* GP waiting for readers? */
+	struct work_struct srcu_work;	/* For driving grace periods. */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+void srcu_drive_gp(struct work_struct *wp);
+
+#define __SRCU_STRUCT_INIT(name)					\
+{									\
+	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
+	.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),	\
+	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	__SRCU_DEP_MAP_INIT(name)					\
+}
+
+/*
+ * This odd _STATIC_ arrangement is needed for API compatibility with
+ * Tree SRCU, which needs some per-CPU data.
+ */
+#define DEFINE_SRCU(name) \
+	struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_STATIC_SRCU(name) \
+	static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+
+void synchronize_srcu(struct srcu_struct *sp);
+
+static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+static inline void srcu_barrier(struct srcu_struct *sp)
+{
+	synchronize_srcu(sp);
+}
+
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return 0;
+}
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->srcu_gp_seq;
+	*gpnum = *completed;
+}
+
+#endif
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -0,0 +1,150 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion,
+ *	tree variant.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2017
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ */
+
+#ifndef _LINUX_SRCU_TREE_H
+#define _LINUX_SRCU_TREE_H
+
+#include <linux/rcu_node_tree.h>
+#include <linux/completion.h>
+
+struct srcu_node;
+struct srcu_struct;
+
+/*
+ * Per-CPU structure feeding into leaf srcu_node, similar in function
+ * to rcu_node.
+ */
+struct srcu_data {
+	/* Read-side state. */
+	unsigned long srcu_lock_count[2];	/* Locks per CPU. */
+	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
+
+	/* Update-side state. */
+	spinlock_t lock ____cacheline_internodealigned_in_smp;
+	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
+	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	bool srcu_cblist_invoking;		/* Invoking these CBs? */
+	struct delayed_work work;		/* Context for CB invoking. */
+	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
+	struct srcu_node *mynode;		/* Leaf srcu_node. */
+	unsigned long grpmask;			/* Mask for leaf srcu_node */
+						/*  ->srcu_data_have_cbs[]. */
+	int cpu;
+	struct srcu_struct *sp;
+};
+
+/*
+ * Node in SRCU combining tree, similar in function to rcu_data.
+ */
+struct srcu_node {
+	spinlock_t lock;
+	unsigned long srcu_have_cbs[4];		/* GP seq for children */
+						/*  having CBs, but only */
+						/*  is > ->srcu_gq_seq. */
+	unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs */
+						/*  have CBs for given GP? */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	struct srcu_node *srcu_parent;		/* Next up in tree. */
+	int grplo;				/* Least CPU for node. */
+	int grphi;				/* Biggest CPU for node. */
+};
+
+/*
+ * Per-SRCU-domain structure, similar in function to rcu_state.
+ */
+struct srcu_struct {
+	struct srcu_node node[NUM_RCU_NODES];	/* Combining tree. */
+	struct srcu_node *level[RCU_NUM_LVLS + 1];
+						/* First node at each level. */
+	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
+	spinlock_t gp_lock;			/* protect ->srcu_cblist */
+	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
+	unsigned int srcu_idx;			/* Current rdr array element. */
+	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
+	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
+	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */
+	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */
+	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */
+	struct completion srcu_barrier_completion;
+						/* Awaken barrier rq at end. */
+	atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */
+						/*  callback for the barrier */
+						/*  operation. */
+	struct delayed_work work;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+};
+
+/* Values for state variable (bottom bits of ->srcu_gp_seq). */
+#define SRCU_STATE_IDLE		0
+#define SRCU_STATE_SCAN1	1
+#define SRCU_STATE_SCAN2	2
+
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name)					\
+	{								\
+		.sda = &name##_srcu_data,				\
+		.gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock),		\
+		.srcu_gp_seq_needed = 0 - 1,				\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}
+
+/*
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
+ */
+#define __DEFINE_SRCU(name, is_static)					\
+	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
+#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
+
+void synchronize_srcu_expedited(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed);
+
+#endif
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -209,7 +209,7 @@ struct ustat {
 * naturally due ABI requirements, but some architectures (like CRIS) have
 * weird ABI and we need to ask it explicitly.
 *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * The alignment is required to guarantee that bit 0 of @next will be
 * clear under normal conditions -- as long as we use call_rcu(),
 * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
 *