Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar: "The biggest part of this tree is the new auto-generated atomics API wrappers by Mark Rutland. The primary motivation was to allow instrumentation without uglifying the primary source code. The linecount increase comes from adding the auto-generated files to the Git space as well: include/asm-generic/atomic-instrumented.h | 1689 ++++++++++++++++-- include/asm-generic/atomic-long.h | 1174 ++++++++++--- include/linux/atomic-fallback.h | 2295 +++++++++++++++++++++++++ include/linux/atomic.h | 1241 +------------ I preferred this approach, so that the full call stack of the (already complex) locking APIs is still fully visible in 'git grep'. But if this is excessive we could certainly hide them. There's a separate build-time mechanism to determine whether the headers are out of date (they should never be stale if we do our job right). Anyway, nothing from this should be visible to regular kernel developers. Other changes: - Add support for dynamic keys, which removes a source of false positives in the workqueue code, among other things (Bart Van Assche) - Updates to tools/memory-model (Andrea Parri, Paul E. McKenney) - qspinlock, wake_q and lockdep micro-optimizations (Waiman Long) - misc other updates and enhancements" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (48 commits) locking/lockdep: Shrink struct lock_class_key locking/lockdep: Add module_param to enable consistency checks lockdep/lib/tests: Test dynamic key registration lockdep/lib/tests: Fix run_tests.sh kernel/workqueue: Use dynamic lockdep keys for workqueues locking/lockdep: Add support for dynamic keys locking/lockdep: Verify whether lock objects are small enough to be used as class keys locking/lockdep: Check data structure consistency locking/lockdep: Reuse lock chains that have been freed locking/lockdep: Fix a comment in add_chain_cache() locking/lockdep: Introduce lockdep_next_lockchain() and lock_chain_count() locking/lockdep: Reuse list entries that are no longer in use locking/lockdep: Free lock classes that are no longer in use locking/lockdep: Update two outdated comments locking/lockdep: Make it easy to detect whether or not inside a selftest locking/lockdep: Split lockdep_free_key_range() and lockdep_reset_lock() locking/lockdep: Initialize the locks_before and locks_after lists earlier locking/lockdep: Make zap_class() remove all matching lock order entries locking/lockdep: Reorder struct lock_class members locking/lockdep: Avoid that add_chain_cache() adds an invalid chain to the cache ...
2019-03-06 07:17:17 -08:00
commit 3478588b51
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -22,6 +22,10 @@ enum lock_usage_bit {
 	LOCK_USAGE_STATES
 };

+#define LOCK_USAGE_READ_MASK 1
+#define LOCK_USAGE_DIR_MASK  2
+#define LOCK_USAGE_STATE_MASK (~(LOCK_USAGE_READ_MASK | LOCK_USAGE_DIR_MASK))
+
 /*
 * Usage-state bitmasks:
 */
@@ -96,7 +100,8 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);

 extern unsigned long nr_lock_classes;
 extern unsigned long nr_list_entries;
-extern unsigned long nr_lock_chains;
+long lockdep_next_lockchain(long i);
+unsigned long lock_chain_count(void);
 extern int nr_chain_hlocks;
 extern unsigned long nr_stack_trace_entries;

--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -104,18 +104,18 @@ static const struct seq_operations lockdep_ops = {
 #ifdef CONFIG_PROVE_LOCKING
 static void *lc_start(struct seq_file *m, loff_t *pos)
 {
+	if (*pos < 0)
+		return NULL;
+
 	if (*pos == 0)
 		return SEQ_START_TOKEN;

-	if (*pos - 1 < nr_lock_chains)
-		return lock_chains + (*pos - 1);
-
-	return NULL;
+	return lock_chains + (*pos - 1);
 }

 static void *lc_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	(*pos)++;
+	*pos = lockdep_next_lockchain(*pos - 1) + 1;
 	return lc_start(m, pos);
 }

@@ -268,7 +268,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)

 #ifdef CONFIG_PROVE_LOCKING
 	seq_printf(m, " dependency chains:             %11lu [max: %lu]\n",
-			nr_lock_chains, MAX_LOCKDEP_CHAINS);
+			lock_chain_count(), MAX_LOCKDEP_CHAINS);
 	seq_printf(m, " dependency chain hlocks:       %11d [max: %lu]\n",
 			nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
 #endif
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -124,9 +124,6 @@ static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;

-#ifdef CONFIG_DEBUG_SPINLOCK
-	BUG_ON(idx > 3);
-#endif
 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */

@@ -412,12 +409,28 @@ pv_queue:
 	idx = node->count++;
 	tail = encode_tail(smp_processor_id(), idx);

+	/*
+	 * 4 nodes are allocated based on the assumption that there will
+	 * not be nested NMIs taking spinlocks. That may not be true in
+	 * some architectures even though the chance of needing more than
+	 * 4 nodes will still be extremely unlikely. When that happens,
+	 * we fall back to spinning on the lock directly without using
+	 * any MCS node. This is not the most elegant solution, but is
+	 * simple enough.
+	 */
+	if (unlikely(idx >= MAX_NODES)) {
+		qstat_inc(qstat_lock_no_node, true);
+		while (!queued_spin_trylock(lock))
+			cpu_relax();
+		goto release;
+	}
+
 	node = grab_mcs_node(node, idx);

 	/*
 	 * Keep counts of non-zero index values:
 	 */
-	qstat_inc(qstat_lock_idx1 + idx - 1, idx);
+	qstat_inc(qstat_lock_use_node2 + idx - 1, idx);

 	/*
 	 * Ensure that we increment the head node->count before initialising
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -30,6 +30,13 @@
 *   pv_wait_node	- # of vCPU wait's at a non-head queue node
 *   lock_pending	- # of locking operations via pending code
 *   lock_slowpath	- # of locking operations via MCS lock queue
+ *   lock_use_node2	- # of locking operations that use 2nd per-CPU node
+ *   lock_use_node3	- # of locking operations that use 3rd per-CPU node
+ *   lock_use_node4	- # of locking operations that use 4th per-CPU node
+ *   lock_no_node	- # of locking operations without using per-CPU node
+ *
+ * Subtracting lock_use_node[234] from lock_slowpath will give you
+ * lock_use_node1.
 *
 * Writing to the "reset_counters" file will reset all the above counter
 * values.
@@ -55,9 +62,10 @@ enum qlock_stats {
 	qstat_pv_wait_node,
 	qstat_lock_pending,
 	qstat_lock_slowpath,
-	qstat_lock_idx1,
-	qstat_lock_idx2,
-	qstat_lock_idx3,
+	qstat_lock_use_node2,
+	qstat_lock_use_node3,
+	qstat_lock_use_node4,
+	qstat_lock_no_node,
 	qstat_num,	/* Total number of statistical counters */
 	qstat_reset_cnts = qstat_num,
 };
@@ -85,9 +93,10 @@ static const char * const qstat_names[qstat_num + 1] = {
 	[qstat_pv_wait_node]       = "pv_wait_node",
 	[qstat_lock_pending]       = "lock_pending",
 	[qstat_lock_slowpath]      = "lock_slowpath",
-	[qstat_lock_idx1]	   = "lock_index1",
-	[qstat_lock_idx2]	   = "lock_index2",
-	[qstat_lock_idx3]	   = "lock_index3",
+	[qstat_lock_use_node2]	   = "lock_use_node2",
+	[qstat_lock_use_node3]	   = "lock_use_node3",
+	[qstat_lock_use_node4]	   = "lock_use_node4",
+	[qstat_lock_no_node]	   = "lock_no_node",
 	[qstat_reset_cnts]         = "reset_counters",
 };

--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * Ensure issuing the wakeup (either by us or someone else)
 		 * after setting the reader waiter to nil.
 		 */
-		wake_q_add(wake_q, tsk);
-		/* wake_q_add() already take the task ref */
-		put_task_struct(tsk);
+		wake_q_add_safe(wake_q, tsk);
 	}

 	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;