fpsimdmacros.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * FP/SIMD state saving and restoring macros
  4. *
  5. * Copyright (C) 2012 ARM Ltd.
  6. * Author: Catalin Marinas <[email protected]>
  7. */
  8. #include <asm/assembler.h>
  9. .macro fpsimd_save state, tmpnr
  10. stp q0, q1, [\state, #16 * 0]
  11. stp q2, q3, [\state, #16 * 2]
  12. stp q4, q5, [\state, #16 * 4]
  13. stp q6, q7, [\state, #16 * 6]
  14. stp q8, q9, [\state, #16 * 8]
  15. stp q10, q11, [\state, #16 * 10]
  16. stp q12, q13, [\state, #16 * 12]
  17. stp q14, q15, [\state, #16 * 14]
  18. stp q16, q17, [\state, #16 * 16]
  19. stp q18, q19, [\state, #16 * 18]
  20. stp q20, q21, [\state, #16 * 20]
  21. stp q22, q23, [\state, #16 * 22]
  22. stp q24, q25, [\state, #16 * 24]
  23. stp q26, q27, [\state, #16 * 26]
  24. stp q28, q29, [\state, #16 * 28]
  25. stp q30, q31, [\state, #16 * 30]!
  26. mrs x\tmpnr, fpsr
  27. str w\tmpnr, [\state, #16 * 2]
  28. mrs x\tmpnr, fpcr
  29. str w\tmpnr, [\state, #16 * 2 + 4]
  30. .endm
  31. .macro fpsimd_restore_fpcr state, tmp
  32. /*
  33. * Writes to fpcr may be self-synchronising, so avoid restoring
  34. * the register if it hasn't changed.
  35. */
  36. mrs \tmp, fpcr
  37. cmp \tmp, \state
  38. b.eq 9999f
  39. msr fpcr, \state
  40. 9999:
  41. .endm
  42. /* Clobbers \state */
  43. .macro fpsimd_restore state, tmpnr
  44. ldp q0, q1, [\state, #16 * 0]
  45. ldp q2, q3, [\state, #16 * 2]
  46. ldp q4, q5, [\state, #16 * 4]
  47. ldp q6, q7, [\state, #16 * 6]
  48. ldp q8, q9, [\state, #16 * 8]
  49. ldp q10, q11, [\state, #16 * 10]
  50. ldp q12, q13, [\state, #16 * 12]
  51. ldp q14, q15, [\state, #16 * 14]
  52. ldp q16, q17, [\state, #16 * 16]
  53. ldp q18, q19, [\state, #16 * 18]
  54. ldp q20, q21, [\state, #16 * 20]
  55. ldp q22, q23, [\state, #16 * 22]
  56. ldp q24, q25, [\state, #16 * 24]
  57. ldp q26, q27, [\state, #16 * 26]
  58. ldp q28, q29, [\state, #16 * 28]
  59. ldp q30, q31, [\state, #16 * 30]!
  60. ldr w\tmpnr, [\state, #16 * 2]
  61. msr fpsr, x\tmpnr
  62. ldr w\tmpnr, [\state, #16 * 2 + 4]
  63. fpsimd_restore_fpcr x\tmpnr, \state
  64. .endm
  65. /* Sanity-check macros to help avoid encoding garbage instructions */
  66. .macro _check_general_reg nr
  67. .if (\nr) < 0 || (\nr) > 30
  68. .error "Bad register number \nr."
  69. .endif
  70. .endm
  71. .macro _sve_check_zreg znr
  72. .if (\znr) < 0 || (\znr) > 31
  73. .error "Bad Scalable Vector Extension vector register number \znr."
  74. .endif
  75. .endm
  76. .macro _sve_check_preg pnr
  77. .if (\pnr) < 0 || (\pnr) > 15
  78. .error "Bad Scalable Vector Extension predicate register number \pnr."
  79. .endif
  80. .endm
  81. .macro _check_num n, min, max
  82. .if (\n) < (\min) || (\n) > (\max)
  83. .error "Number \n out of range [\min,\max]"
  84. .endif
  85. .endm
  86. .macro _sme_check_wv v
  87. .if (\v) < 12 || (\v) > 15
  88. .error "Bad vector select register \v."
  89. .endif
  90. .endm
  91. /* SVE instruction encodings for non-SVE-capable assemblers */
  92. /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
  93. /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
  94. .macro _sve_str_v nz, nxbase, offset=0
  95. _sve_check_zreg \nz
  96. _check_general_reg \nxbase
  97. _check_num (\offset), -0x100, 0xff
  98. .inst 0xe5804000 \
  99. | (\nz) \
  100. | ((\nxbase) << 5) \
  101. | (((\offset) & 7) << 10) \
  102. | (((\offset) & 0x1f8) << 13)
  103. .endm
  104. /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
  105. .macro _sve_ldr_v nz, nxbase, offset=0
  106. _sve_check_zreg \nz
  107. _check_general_reg \nxbase
  108. _check_num (\offset), -0x100, 0xff
  109. .inst 0x85804000 \
  110. | (\nz) \
  111. | ((\nxbase) << 5) \
  112. | (((\offset) & 7) << 10) \
  113. | (((\offset) & 0x1f8) << 13)
  114. .endm
  115. /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
  116. .macro _sve_str_p np, nxbase, offset=0
  117. _sve_check_preg \np
  118. _check_general_reg \nxbase
  119. _check_num (\offset), -0x100, 0xff
  120. .inst 0xe5800000 \
  121. | (\np) \
  122. | ((\nxbase) << 5) \
  123. | (((\offset) & 7) << 10) \
  124. | (((\offset) & 0x1f8) << 13)
  125. .endm
  126. /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
  127. .macro _sve_ldr_p np, nxbase, offset=0
  128. _sve_check_preg \np
  129. _check_general_reg \nxbase
  130. _check_num (\offset), -0x100, 0xff
  131. .inst 0x85800000 \
  132. | (\np) \
  133. | ((\nxbase) << 5) \
  134. | (((\offset) & 7) << 10) \
  135. | (((\offset) & 0x1f8) << 13)
  136. .endm
  137. /* RDVL X\nx, #\imm */
  138. .macro _sve_rdvl nx, imm
  139. _check_general_reg \nx
  140. _check_num (\imm), -0x20, 0x1f
  141. .inst 0x04bf5000 \
  142. | (\nx) \
  143. | (((\imm) & 0x3f) << 5)
  144. .endm
  145. /* RDFFR (unpredicated): RDFFR P\np.B */
  146. .macro _sve_rdffr np
  147. _sve_check_preg \np
  148. .inst 0x2519f000 \
  149. | (\np)
  150. .endm
  151. /* WRFFR P\np.B */
  152. .macro _sve_wrffr np
  153. _sve_check_preg \np
  154. .inst 0x25289000 \
  155. | ((\np) << 5)
  156. .endm
  157. /* PFALSE P\np.B */
  158. .macro _sve_pfalse np
  159. _sve_check_preg \np
  160. .inst 0x2518e400 \
  161. | (\np)
  162. .endm
  163. /* SME instruction encodings for non-SME-capable assemblers */
  164. /* (pre binutils 2.38/LLVM 13) */
  165. /* RDSVL X\nx, #\imm */
  166. .macro _sme_rdsvl nx, imm
  167. _check_general_reg \nx
  168. _check_num (\imm), -0x20, 0x1f
  169. .inst 0x04bf5800 \
  170. | (\nx) \
  171. | (((\imm) & 0x3f) << 5)
  172. .endm
  173. /*
  174. * STR (vector from ZA array):
  175. * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
  176. */
  177. .macro _sme_str_zav nw, nxbase, offset=0
  178. _sme_check_wv \nw
  179. _check_general_reg \nxbase
  180. _check_num (\offset), -0x100, 0xff
  181. .inst 0xe1200000 \
  182. | (((\nw) & 3) << 13) \
  183. | ((\nxbase) << 5) \
  184. | ((\offset) & 7)
  185. .endm
  186. /*
  187. * LDR (vector to ZA array):
  188. * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
  189. */
  190. .macro _sme_ldr_zav nw, nxbase, offset=0
  191. _sme_check_wv \nw
  192. _check_general_reg \nxbase
  193. _check_num (\offset), -0x100, 0xff
  194. .inst 0xe1000000 \
  195. | (((\nw) & 3) << 13) \
  196. | ((\nxbase) << 5) \
  197. | ((\offset) & 7)
  198. .endm
  199. /*
  200. * Zero the entire ZA array
  201. * ZERO ZA
  202. */
  203. .macro zero_za
  204. .inst 0xc00800ff
  205. .endm
  206. .macro __for from:req, to:req
  207. .if (\from) == (\to)
  208. _for__body %\from
  209. .else
  210. __for %\from, %((\from) + ((\to) - (\from)) / 2)
  211. __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
  212. .endif
  213. .endm
  214. .macro _for var:req, from:req, to:req, insn:vararg
  215. .macro _for__body \var:req
  216. .noaltmacro
  217. \insn
  218. .altmacro
  219. .endm
  220. .altmacro
  221. __for \from, \to
  222. .noaltmacro
  223. .purgem _for__body
  224. .endm
  225. /* Update ZCR_EL1.LEN with the new VQ */
  226. .macro sve_load_vq xvqminus1, xtmp, xtmp2
  227. mrs_s \xtmp, SYS_ZCR_EL1
  228. bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
  229. orr \xtmp2, \xtmp2, \xvqminus1
  230. cmp \xtmp2, \xtmp
  231. b.eq 921f
  232. msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
  233. 921:
  234. .endm
  235. /* Update SMCR_EL1.LEN with the new VQ */
  236. .macro sme_load_vq xvqminus1, xtmp, xtmp2
  237. mrs_s \xtmp, SYS_SMCR_EL1
  238. bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
  239. orr \xtmp2, \xtmp2, \xvqminus1
  240. cmp \xtmp2, \xtmp
  241. b.eq 921f
  242. msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
  243. 921:
  244. .endm
  245. /* Preserve the first 128-bits of Znz and zero the rest. */
  246. .macro _sve_flush_z nz
  247. _sve_check_zreg \nz
  248. mov v\nz\().16b, v\nz\().16b
  249. .endm
  250. .macro sve_flush_z
  251. _for n, 0, 31, _sve_flush_z \n
  252. .endm
  253. .macro sve_flush_p
  254. _for n, 0, 15, _sve_pfalse \n
  255. .endm
  256. .macro sve_flush_ffr
  257. _sve_wrffr 0
  258. .endm
  259. .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
  260. _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
  261. _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
  262. cbz \save_ffr, 921f
  263. _sve_rdffr 0
  264. b 922f
  265. 921:
  266. _sve_pfalse 0 // Zero out FFR
  267. 922:
  268. _sve_str_p 0, \nxbase
  269. _sve_ldr_p 0, \nxbase, -16
  270. mrs x\nxtmp, fpsr
  271. str w\nxtmp, [\xpfpsr]
  272. mrs x\nxtmp, fpcr
  273. str w\nxtmp, [\xpfpsr, #4]
  274. .endm
  275. .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
  276. _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
  277. cbz \restore_ffr, 921f
  278. _sve_ldr_p 0, \nxbase
  279. _sve_wrffr 0
  280. 921:
  281. _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
  282. ldr w\nxtmp, [\xpfpsr]
  283. msr fpsr, x\nxtmp
  284. ldr w\nxtmp, [\xpfpsr, #4]
  285. msr fpcr, x\nxtmp
  286. .endm
  287. .macro sme_save_za nxbase, xvl, nw
  288. mov w\nw, #0
  289. 423:
  290. _sme_str_zav \nw, \nxbase
  291. add x\nxbase, x\nxbase, \xvl
  292. add x\nw, x\nw, #1
  293. cmp \xvl, x\nw
  294. bne 423b
  295. .endm
  296. .macro sme_load_za nxbase, xvl, nw
  297. mov w\nw, #0
  298. 423:
  299. _sme_ldr_zav \nw, \nxbase
  300. add x\nxbase, x\nxbase, \xvl
  301. add x\nw, x\nw, #1
  302. cmp \xvl, x\nw
  303. bne 423b
  304. .endm