Files
android_kernel_xiaomi_sm8450/arch/arm64/include/asm/atomic_lse.h
Mark Rutland 057f5ddfbc arm64: cmpxchg_double*: hazard against entire exchange variable
[ Upstream commit 031af50045ea97ed4386eb3751ca2c134d0fc911 ]

The inline assembly for arm64's cmpxchg_double*() implementations use a
+Q constraint to hazard against other accesses to the memory location
being exchanged. However, the pointer passed to the constraint is a
pointer to unsigned long, and thus the hazard only applies to the first
8 bytes of the location.

GCC can take advantage of this, assuming that other portions of the
location are unchanged, leading to a number of potential problems.

This is similar to what we fixed back in commit:

  fee960bed5 ("arm64: xchg: hazard against entire exchange variable")

... but we forgot to adjust cmpxchg_double*() similarly at the same
time.

The same problem applies, as demonstrated with the following test:

| struct big {
|         u64 lo, hi;
| } __aligned(128);
|
| unsigned long foo(struct big *b)
| {
|         u64 hi_old, hi_new;
|
|         hi_old = b->hi;
|         cmpxchg_double_local(&b->lo, &b->hi, 0x12, 0x34, 0x56, 0x78);
|         hi_new = b->hi;
|
|         return hi_old ^ hi_new;
| }

... which GCC 12.1.0 compiles as:

| 0000000000000000 <foo>:
|    0:   d503233f        paciasp
|    4:   aa0003e4        mov     x4, x0
|    8:   1400000e        b       40 <foo+0x40>
|    c:   d2800240        mov     x0, #0x12                       // #18
|   10:   d2800681        mov     x1, #0x34                       // #52
|   14:   aa0003e5        mov     x5, x0
|   18:   aa0103e6        mov     x6, x1
|   1c:   d2800ac2        mov     x2, #0x56                       // #86
|   20:   d2800f03        mov     x3, #0x78                       // #120
|   24:   48207c82        casp    x0, x1, x2, x3, [x4]
|   28:   ca050000        eor     x0, x0, x5
|   2c:   ca060021        eor     x1, x1, x6
|   30:   aa010000        orr     x0, x0, x1
|   34:   d2800000        mov     x0, #0x0                        // #0    <--- BANG
|   38:   d50323bf        autiasp
|   3c:   d65f03c0        ret
|   40:   d2800240        mov     x0, #0x12                       // #18
|   44:   d2800681        mov     x1, #0x34                       // #52
|   48:   d2800ac2        mov     x2, #0x56                       // #86
|   4c:   d2800f03        mov     x3, #0x78                       // #120
|   50:   f9800091        prfm    pstl1strm, [x4]
|   54:   c87f1885        ldxp    x5, x6, [x4]
|   58:   ca0000a5        eor     x5, x5, x0
|   5c:   ca0100c6        eor     x6, x6, x1
|   60:   aa0600a6        orr     x6, x5, x6
|   64:   b5000066        cbnz    x6, 70 <foo+0x70>
|   68:   c8250c82        stxp    w5, x2, x3, [x4]
|   6c:   35ffff45        cbnz    w5, 54 <foo+0x54>
|   70:   d2800000        mov     x0, #0x0                        // #0     <--- BANG
|   74:   d50323bf        autiasp
|   78:   d65f03c0        ret

Notice that at the lines with "BANG" comments, GCC has assumed that the
higher 8 bytes are unchanged by the cmpxchg_double() call, and that
`hi_old ^ hi_new` can be reduced to a constant zero, for both LSE and
LL/SC versions of cmpxchg_double().

This patch fixes the issue by passing a pointer to __uint128_t into the
+Q constraint, ensuring that the compiler hazards against the entire 16
bytes being modified.

With this change, GCC 12.1.0 compiles the above test as:

| 0000000000000000 <foo>:
|    0:   f9400407        ldr     x7, [x0, #8]
|    4:   d503233f        paciasp
|    8:   aa0003e4        mov     x4, x0
|    c:   1400000f        b       48 <foo+0x48>
|   10:   d2800240        mov     x0, #0x12                       // #18
|   14:   d2800681        mov     x1, #0x34                       // #52
|   18:   aa0003e5        mov     x5, x0
|   1c:   aa0103e6        mov     x6, x1
|   20:   d2800ac2        mov     x2, #0x56                       // #86
|   24:   d2800f03        mov     x3, #0x78                       // #120
|   28:   48207c82        casp    x0, x1, x2, x3, [x4]
|   2c:   ca050000        eor     x0, x0, x5
|   30:   ca060021        eor     x1, x1, x6
|   34:   aa010000        orr     x0, x0, x1
|   38:   f9400480        ldr     x0, [x4, #8]
|   3c:   d50323bf        autiasp
|   40:   ca0000e0        eor     x0, x7, x0
|   44:   d65f03c0        ret
|   48:   d2800240        mov     x0, #0x12                       // #18
|   4c:   d2800681        mov     x1, #0x34                       // #52
|   50:   d2800ac2        mov     x2, #0x56                       // #86
|   54:   d2800f03        mov     x3, #0x78                       // #120
|   58:   f9800091        prfm    pstl1strm, [x4]
|   5c:   c87f1885        ldxp    x5, x6, [x4]
|   60:   ca0000a5        eor     x5, x5, x0
|   64:   ca0100c6        eor     x6, x6, x1
|   68:   aa0600a6        orr     x6, x5, x6
|   6c:   b5000066        cbnz    x6, 78 <foo+0x78>
|   70:   c8250c82        stxp    w5, x2, x3, [x4]
|   74:   35ffff45        cbnz    w5, 5c <foo+0x5c>
|   78:   f9400480        ldr     x0, [x4, #8]
|   7c:   d50323bf        autiasp
|   80:   ca0000e0        eor     x0, x7, x0
|   84:   d65f03c0        ret

... sampling the high 8 bytes before and after the cmpxchg, and
performing an EOR, as we'd expect.

For backporting, I've tested this atop linux-4.9.y with GCC 5.5.0. Note
that linux-4.9.y is oldest currently supported stable release, and
mandates GCC 5.1+. Unfortunately I couldn't get a GCC 5.1 binary to run
on my machines due to library incompatibilities.

I've also used a standalone test to check that we can use a __uint128_t
pointer in a +Q constraint at least as far back as GCC 4.8.5 and LLVM
3.9.1.

Fixes: 5284e1b4bc ("arm64: xchg: Implement cmpxchg_double")
Fixes: e9a4b79565 ("arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU")
Reported-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/lkml/Y6DEfQXymYVgL3oJ@boqun-archlinux/
Reported-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/lkml/Y6GXoO4qmH9OIZ5Q@hirez.programming.kicks-ass.net/
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: stable@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230104151626.3262137-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2023-01-18 11:45:01 +01:00

420 lines
11 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/atomic.h
*
* Copyright (C) 1996 Russell King.
* Copyright (C) 2002 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_ATOMIC_LSE_H
#define __ASM_ATOMIC_LSE_H
#define ATOMIC_OP(op, asm_op) \
static inline void __lse_atomic_##op(int i, atomic_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" " #asm_op " %w[i], %[v]\n" \
: [i] "+r" (i), [v] "+Q" (v->counter) \
: "r" (v)); \
}
ATOMIC_OP(andnot, stclr)
ATOMIC_OP(or, stset)
ATOMIC_OP(xor, steor)
ATOMIC_OP(add, stadd)
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" " #asm_op #mb " %w[i], %w[i], %[v]" \
: [i] "+r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
#define ATOMIC_FETCH_OPS(op, asm_op) \
ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \
ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \
ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \
ATOMIC_FETCH_OP( , al, op, asm_op, "memory")
ATOMIC_FETCH_OPS(andnot, ldclr)
ATOMIC_FETCH_OPS(or, ldset)
ATOMIC_FETCH_OPS(xor, ldeor)
ATOMIC_FETCH_OPS(add, ldadd)
#undef ATOMIC_FETCH_OP
#undef ATOMIC_FETCH_OPS
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
{ \
u32 tmp; \
\
asm volatile( \
__LSE_PREAMBLE \
" ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
" add %w[i], %w[i], %w[tmp]" \
: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC_OP_ADD_RETURN(_relaxed, )
ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
ATOMIC_OP_ADD_RETURN(_release, l, "memory")
ATOMIC_OP_ADD_RETURN( , al, "memory")
#undef ATOMIC_OP_ADD_RETURN
static inline void __lse_atomic_and(int i, atomic_t *v)
{
asm volatile(
__LSE_PREAMBLE
" mvn %w[i], %w[i]\n"
" stclr %w[i], %[v]"
: [i] "+&r" (i), [v] "+Q" (v->counter)
: "r" (v));
}
#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" mvn %w[i], %w[i]\n" \
" ldclr" #mb " %w[i], %w[i], %[v]" \
: [i] "+&r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC_FETCH_OP_AND(_relaxed, )
ATOMIC_FETCH_OP_AND(_acquire, a, "memory")
ATOMIC_FETCH_OP_AND(_release, l, "memory")
ATOMIC_FETCH_OP_AND( , al, "memory")
#undef ATOMIC_FETCH_OP_AND
static inline void __lse_atomic_sub(int i, atomic_t *v)
{
asm volatile(
__LSE_PREAMBLE
" neg %w[i], %w[i]\n"
" stadd %w[i], %[v]"
: [i] "+&r" (i), [v] "+Q" (v->counter)
: "r" (v));
}
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \
{ \
u32 tmp; \
\
asm volatile( \
__LSE_PREAMBLE \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
" add %w[i], %w[i], %w[tmp]" \
: [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC_OP_SUB_RETURN(_relaxed, )
ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
ATOMIC_OP_SUB_RETURN(_release, l, "memory")
ATOMIC_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC_OP_SUB_RETURN
#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \
static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], %w[i], %[v]" \
: [i] "+&r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC_FETCH_OP_SUB(_relaxed, )
ATOMIC_FETCH_OP_SUB(_acquire, a, "memory")
ATOMIC_FETCH_OP_SUB(_release, l, "memory")
ATOMIC_FETCH_OP_SUB( , al, "memory")
#undef ATOMIC_FETCH_OP_SUB
#define ATOMIC64_OP(op, asm_op) \
static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" " #asm_op " %[i], %[v]\n" \
: [i] "+r" (i), [v] "+Q" (v->counter) \
: "r" (v)); \
}
ATOMIC64_OP(andnot, stclr)
ATOMIC64_OP(or, stset)
ATOMIC64_OP(xor, steor)
ATOMIC64_OP(add, stadd)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
{ \
asm volatile( \
__LSE_PREAMBLE \
" " #asm_op #mb " %[i], %[i], %[v]" \
: [i] "+r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
#define ATOMIC64_FETCH_OPS(op, asm_op) \
ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \
ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \
ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \
ATOMIC64_FETCH_OP( , al, op, asm_op, "memory")
ATOMIC64_FETCH_OPS(andnot, ldclr)
ATOMIC64_FETCH_OPS(or, ldset)
ATOMIC64_FETCH_OPS(xor, ldeor)
ATOMIC64_FETCH_OPS(add, ldadd)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
{ \
unsigned long tmp; \
\
asm volatile( \
__LSE_PREAMBLE \
" ldadd" #mb " %[i], %x[tmp], %[v]\n" \
" add %[i], %[i], %x[tmp]" \
: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC64_OP_ADD_RETURN(_relaxed, )
ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
ATOMIC64_OP_ADD_RETURN( , al, "memory")
#undef ATOMIC64_OP_ADD_RETURN
static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
{
asm volatile(
__LSE_PREAMBLE
" mvn %[i], %[i]\n"
" stclr %[i], %[v]"
: [i] "+&r" (i), [v] "+Q" (v->counter)
: "r" (v));
}
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" mvn %[i], %[i]\n" \
" ldclr" #mb " %[i], %[i], %[v]" \
: [i] "+&r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC64_FETCH_OP_AND(_relaxed, )
ATOMIC64_FETCH_OP_AND(_acquire, a, "memory")
ATOMIC64_FETCH_OP_AND(_release, l, "memory")
ATOMIC64_FETCH_OP_AND( , al, "memory")
#undef ATOMIC64_FETCH_OP_AND
static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(
__LSE_PREAMBLE
" neg %[i], %[i]\n"
" stadd %[i], %[v]"
: [i] "+&r" (i), [v] "+Q" (v->counter)
: "r" (v));
}
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
{ \
unsigned long tmp; \
\
asm volatile( \
__LSE_PREAMBLE \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], %x[tmp], %[v]\n" \
" add %[i], %[i], %x[tmp]" \
: [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC64_OP_SUB_RETURN(_relaxed, )
ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
ATOMIC64_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC64_OP_SUB_RETURN
#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
{ \
asm volatile( \
__LSE_PREAMBLE \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], %[i], %[v]" \
: [i] "+&r" (i), [v] "+Q" (v->counter) \
: "r" (v) \
: cl); \
\
return i; \
}
ATOMIC64_FETCH_OP_SUB(_relaxed, )
ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory")
ATOMIC64_FETCH_OP_SUB(_release, l, "memory")
ATOMIC64_FETCH_OP_SUB( , al, "memory")
#undef ATOMIC64_FETCH_OP_SUB
static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
{
unsigned long tmp;
asm volatile(
__LSE_PREAMBLE
"1: ldr %x[tmp], %[v]\n"
" subs %[ret], %x[tmp], #1\n"
" b.lt 2f\n"
" casal %x[tmp], %[ret], %[v]\n"
" sub %x[tmp], %x[tmp], #1\n"
" sub %x[tmp], %x[tmp], %[ret]\n"
" cbnz %x[tmp], 1b\n"
"2:"
: [ret] "+&r" (v), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)
:
: "cc", "memory");
return (long)v;
}
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
static __always_inline u##sz \
__lse__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
register u##sz x1 asm ("x1") = old; \
register u##sz x2 asm ("x2") = new; \
unsigned long tmp; \
\
asm volatile( \
__LSE_PREAMBLE \
" mov %" #w "[tmp], %" #w "[old]\n" \
" cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \
" mov %" #w "[ret], %" #w "[tmp]" \
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr), \
[tmp] "=&r" (tmp) \
: [old] "r" (x1), [new] "r" (x2) \
: cl); \
\
return x0; \
}
__CMPXCHG_CASE(w, b, , 8, )
__CMPXCHG_CASE(w, h, , 16, )
__CMPXCHG_CASE(w, , , 32, )
__CMPXCHG_CASE(x, , , 64, )
__CMPXCHG_CASE(w, b, acq_, 8, a, "memory")
__CMPXCHG_CASE(w, h, acq_, 16, a, "memory")
__CMPXCHG_CASE(w, , acq_, 32, a, "memory")
__CMPXCHG_CASE(x, , acq_, 64, a, "memory")
__CMPXCHG_CASE(w, b, rel_, 8, l, "memory")
__CMPXCHG_CASE(w, h, rel_, 16, l, "memory")
__CMPXCHG_CASE(w, , rel_, 32, l, "memory")
__CMPXCHG_CASE(x, , rel_, 64, l, "memory")
__CMPXCHG_CASE(w, b, mb_, 8, al, "memory")
__CMPXCHG_CASE(w, h, mb_, 16, al, "memory")
__CMPXCHG_CASE(w, , mb_, 32, al, "memory")
__CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
#define __CMPXCHG_DBL(name, mb, cl...) \
static __always_inline long \
__lse__cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
volatile void *ptr) \
{ \
unsigned long oldval1 = old1; \
unsigned long oldval2 = old2; \
register unsigned long x0 asm ("x0") = old1; \
register unsigned long x1 asm ("x1") = old2; \
register unsigned long x2 asm ("x2") = new1; \
register unsigned long x3 asm ("x3") = new2; \
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
\
asm volatile( \
__LSE_PREAMBLE \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
" eor %[old1], %[old1], %[oldval1]\n" \
" eor %[old2], %[old2], %[oldval2]\n" \
" orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
[v] "+Q" (*(__uint128_t *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
: cl); \
\
return x0; \
}
__CMPXCHG_DBL( , )
__CMPXCHG_DBL(_mb, al, "memory")
#undef __CMPXCHG_DBL
#endif /* __ASM_ATOMIC_LSE_H */