sync.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. #ifndef __MIPS_ASM_SYNC_H__
  3. #define __MIPS_ASM_SYNC_H__
  4. /*
  5. * sync types are defined by the MIPS64 Instruction Set documentation in Volume
  6. * II-A of the MIPS Architecture Reference Manual, which can be found here:
  7. *
  8. * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
  9. *
  10. * Two types of barrier are provided:
  11. *
  12. * 1) Completion barriers, which ensure that a memory operation has actually
  13. * completed & often involve stalling the CPU pipeline to do so.
  14. *
  15. * 2) Ordering barriers, which only ensure that affected memory operations
  16. * won't be reordered in the CPU pipeline in a manner that violates the
  17. * restrictions imposed by the barrier.
  18. *
  19. * Ordering barriers can be more efficient than completion barriers, since:
  20. *
  21. * a) Ordering barriers only require memory access instructions which preceed
  22. * them in program order (older instructions) to reach a point in the
  23. * load/store datapath beyond which reordering is not possible before
  24. * allowing memory access instructions which follow them (younger
  25. * instructions) to be performed. That is, older instructions don't
  26. * actually need to complete - they just need to get far enough that all
  27. * other coherent CPUs will observe their completion before they observe
  28. * the effects of younger instructions.
  29. *
  30. * b) Multiple variants of ordering barrier are provided which allow the
  31. * effects to be restricted to different combinations of older or younger
  32. * loads or stores. By way of example, if we only care that stores older
  33. * than a barrier are observed prior to stores that are younger than a
  34. * barrier & don't care about the ordering of loads then the 'wmb'
  35. * ordering barrier can be used. Limiting the barrier's effects to stores
  36. * allows loads to continue unaffected & potentially allows the CPU to
  37. * make progress faster than if younger loads had to wait for older stores
  38. * to complete.
  39. */
  40. /*
  41. * No sync instruction at all; used to allow code to nullify the effect of the
  42. * __SYNC() macro without needing lots of #ifdefery.
  43. */
  44. #define __SYNC_none -1
  45. /*
  46. * A full completion barrier; all memory accesses appearing prior to this sync
  47. * instruction in program order must complete before any memory accesses
  48. * appearing after this sync instruction in program order.
  49. */
  50. #define __SYNC_full 0x00
  51. /*
  52. * For now we use a full completion barrier to implement all sync types, until
  53. * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
  54. * sufficient to uphold our desired memory model.
  55. */
  56. #define __SYNC_aq __SYNC_full
  57. #define __SYNC_rl __SYNC_full
  58. #define __SYNC_mb __SYNC_full
  59. /*
  60. * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
  61. * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
  62. * speculative reads.
  63. */
  64. #ifdef CONFIG_CPU_CAVIUM_OCTEON
  65. # define __SYNC_rmb __SYNC_none
  66. # define __SYNC_wmb 0x04
  67. #else
  68. # define __SYNC_rmb __SYNC_full
  69. # define __SYNC_wmb __SYNC_full
  70. #endif
  71. /*
  72. * A GINV sync is a little different; it doesn't relate directly to loads or
  73. * stores, but instead causes synchronization of an icache or TLB global
  74. * invalidation operation triggered by the ginvi or ginvt instructions
  75. * respectively. In cases where we need to know that a ginvi or ginvt operation
  76. * has been performed by all coherent CPUs, we must issue a sync instruction of
  77. * this type. Once this instruction graduates all coherent CPUs will have
  78. * observed the invalidation.
  79. */
  80. #define __SYNC_ginv 0x14
  81. /* Trivial; indicate that we always need this sync instruction. */
  82. #define __SYNC_always (1 << 0)
  83. /*
  84. * Indicate that we need this sync instruction only on systems with weakly
  85. * ordered memory access. In general this is most MIPS systems, but there are
  86. * exceptions which provide strongly ordered memory.
  87. */
  88. #ifdef CONFIG_WEAK_ORDERING
  89. # define __SYNC_weak_ordering (1 << 1)
  90. #else
  91. # define __SYNC_weak_ordering 0
  92. #endif
  93. /*
  94. * Indicate that we need this sync instruction only on systems where LL/SC
  95. * don't implicitly provide a memory barrier. In general this is most MIPS
  96. * systems.
  97. */
  98. #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
  99. # define __SYNC_weak_llsc (1 << 2)
  100. #else
  101. # define __SYNC_weak_llsc 0
  102. #endif
  103. /*
  104. * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
  105. * store or prefetch) in between an LL & SC can cause the SC instruction to
  106. * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
  107. * containing such sequences, this bug bites harder than we might otherwise
  108. * expect due to reordering & speculation:
  109. *
  110. * 1) A memory access appearing prior to the LL in program order may actually
  111. * be executed after the LL - this is the reordering case.
  112. *
  113. * In order to avoid this we need to place a memory barrier (ie. a SYNC
  114. * instruction) prior to every LL instruction, in between it and any earlier
  115. * memory access instructions.
  116. *
  117. * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
  118. *
  119. * 2) If a conditional branch exists between an LL & SC with a target outside
  120. * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
  121. * or similar, then misprediction of the branch may allow speculative
  122. * execution of memory accesses from outside of the LL-SC loop.
  123. *
  124. * In order to avoid this we need a memory barrier (ie. a SYNC instruction)
  125. * at each affected branch target.
  126. *
  127. * This case affects all current Loongson 3 CPUs.
  128. *
  129. * The above described cases cause an error in the cache coherence protocol;
  130. * such that the Invalidate of a competing LL-SC goes 'missing' and SC
  131. * erroneously observes its core still has Exclusive state and lets the SC
  132. * proceed.
  133. *
  134. * Therefore the error only occurs on SMP systems.
  135. */
  136. #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
  137. # define __SYNC_loongson3_war (1 << 31)
  138. #else
  139. # define __SYNC_loongson3_war 0
  140. #endif
  141. /*
  142. * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
  143. * barrier to be ineffective, requiring the use of 2 in sequence to provide an
  144. * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
  145. * optimized memory barrier primitives."). Here we specify that the affected
  146. * sync instructions should be emitted twice.
  147. * Note that this expression is evaluated by the assembler (not the compiler),
  148. * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
  149. */
  150. #ifdef CONFIG_CPU_CAVIUM_OCTEON
  151. # define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
  152. #else
  153. # define __SYNC_rpt(type) 1
  154. #endif
  155. /*
  156. * The main event. Here we actually emit a sync instruction of a given type, if
  157. * reason is non-zero.
  158. *
  159. * In future we have the option of emitting entries in a fixups-style table
  160. * here that would allow us to opportunistically remove some sync instructions
  161. * when we detect at runtime that we're running on a CPU that doesn't need
  162. * them.
  163. */
  164. #ifdef CONFIG_CPU_HAS_SYNC
  165. # define ____SYNC(_type, _reason, _else) \
  166. .if (( _type ) != -1) && ( _reason ); \
  167. .set push; \
  168. .set MIPS_ISA_LEVEL_RAW; \
  169. .rept __SYNC_rpt(_type); \
  170. sync _type; \
  171. .endr; \
  172. .set pop; \
  173. .else; \
  174. _else; \
  175. .endif
  176. #else
  177. # define ____SYNC(_type, _reason, _else)
  178. #endif
  179. /*
  180. * Preprocessor magic to expand macros used as arguments before we insert them
  181. * into assembly code.
  182. */
  183. #ifdef __ASSEMBLY__
  184. # define ___SYNC(type, reason, else) \
  185. ____SYNC(type, reason, else)
  186. #else
  187. # define ___SYNC(type, reason, else) \
  188. __stringify(____SYNC(type, reason, else))
  189. #endif
  190. #define __SYNC(type, reason) \
  191. ___SYNC(__SYNC_##type, __SYNC_##reason, )
  192. #define __SYNC_ELSE(type, reason, else) \
  193. ___SYNC(__SYNC_##type, __SYNC_##reason, else)
  194. #endif /* __MIPS_ASM_SYNC_H__ */