ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics
After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:

committed by
Russell King

parent
6ea41c8011
commit
c32ffce0f6
@@ -37,6 +37,11 @@ UNWIND( .fnstart )
|
||||
add r1, r1, r0, lsl #2 @ Get word offset
|
||||
mov r3, r2, lsl r3 @ create mask
|
||||
smp_dmb
|
||||
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
|
||||
.arch_extension mp
|
||||
ALT_SMP(W(pldw) [r1])
|
||||
ALT_UP(W(nop))
|
||||
#endif
|
||||
1: ldrex r2, [r1]
|
||||
ands r0, r2, r3 @ save old value of bit
|
||||
\instr r2, r2, r3 @ toggle bit
|
||||
|
Reference in New Issue
Block a user