s390/spinlock: optimize spin_unlock code
Use a memory barrier + store sequence instead of a load + compare and swap sequence to unlock a spinlock and an rw lock. For the spinlock case this saves us two memory reads and a not needed cpu serialization after the compare and swap instruction stored the new value. The kernel size (performance_defconfig) gets reduced by ~14k. Average execution time of a tight inlined spin_unlock loop drops from 5.8ns to 0.7ns on a zEC12 machine. An artificial stress test case where several counters are protected with a single spinlock and which are only incremented while holding the spinlock shows ~30% improvement on a 4 cpu machine. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:

committed by
Martin Schwidefsky

parent
3d1e220d08
commit
4423028203
@@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
|
||||
_raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
|
||||
}
|
||||
|
||||
static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
|
||||
{
|
||||
return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
|
||||
}
|
||||
|
||||
static inline void arch_spin_lock(arch_spinlock_t *lp)
|
||||
{
|
||||
if (!arch_spin_trylock_once(lp))
|
||||
@@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
|
||||
|
||||
static inline void arch_spin_unlock(arch_spinlock_t *lp)
|
||||
{
|
||||
arch_spin_tryrelease_once(lp);
|
||||
typecheck(unsigned int, lp->lock);
|
||||
asm volatile(
|
||||
__ASM_BARRIER
|
||||
"st %1,%0\n"
|
||||
: "+Q" (lp->lock)
|
||||
: "d" (0)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
|
||||
@@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
|
||||
|
||||
static inline void arch_write_unlock(arch_rwlock_t *rw)
|
||||
{
|
||||
_raw_compare_and_swap(&rw->lock, 0x80000000, 0);
|
||||
typecheck(unsigned int, rw->lock);
|
||||
asm volatile(
|
||||
__ASM_BARRIER
|
||||
"st %1,%0\n"
|
||||
: "+Q" (rw->lock)
|
||||
: "d" (0)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
static inline int arch_read_trylock(arch_rwlock_t *rw)
|
||||
|
Reference in New Issue
Block a user