Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar: "The locking tree was busier in this cycle than the usual pattern - a couple of major projects happened to coincide. The main changes are: - implement the atomic_fetch_{add,sub,and,or,xor}() API natively across all SMP architectures (Peter Zijlstra) - add atomic_fetch_{inc/dec}() as well, using the generic primitives (Davidlohr Bueso) - optimize various aspects of rwsems (Jason Low, Davidlohr Bueso, Waiman Long) - optimize smp_cond_load_acquire() on arm64 and implement LSE based atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() on arm64 (Will Deacon) - introduce smp_acquire__after_ctrl_dep() and fix various barrier mis-uses and bugs (Peter Zijlstra) - after discovering ancient spin_unlock_wait() barrier bugs in its implementation and usage, strengthen its semantics and update/fix usage sites (Peter Zijlstra) - optimize mutex_trylock() fastpath (Peter Zijlstra) - ... misc fixes and cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits) locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() locking/static_keys: Fix non static symbol Sparse warning locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec() locking/atomic, arch/tile: Fix tilepro build locking/atomic, arch/m68k: Remove comment locking/atomic, arch/arc: Fix build locking/Documentation: Clarify limited control-dependency scope locking/atomic, arch/rwsem: Employ atomic_long_fetch_add() locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire() locking/atomic, arch/mips: Convert to _relaxed atomics locking/atomic, arch/alpha: Convert to _relaxed atomics locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions locking/atomic: Remove linux/atomic.h:atomic_fetch_or() locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() locking/atomic: Fix atomic64_relaxed() bits locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}() locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() ...
2016-07-25 12:41:29 -07:00
parent a2303849a6 f06628638c
commit c86ad14d30
108 changed files with 3341 additions and 1031 deletions
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }

+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val, orig;						\
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
 #else	/* !CONFIG_ARC_HAS_LLSC */

 #ifndef CONFIG_SMP
@@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }

+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	unsigned long orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
 #endif /* !CONFIG_ARC_HAS_LLSC */

 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)

 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)

 #define atomic_andnot atomic_andnot

-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)

-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)

 #else /* CONFIG_ARC_PLAT_EZNPS */

@@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }

+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int temp = i;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(temp)							\
+	: "r"(&v->counter), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	return temp;							\
+}
+
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)

 ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 #define atomic_sub(i, v) atomic_add(-(i), (v))
 #define atomic_sub_return(i, v) atomic_add_return(-(i), (v))

-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
 #define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)

 #endif /* CONFIG_ARC_PLAT_EZNPS */

 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP

--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@

 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}

 #ifdef CONFIG_ARC_HAS_LLSC