za-test.S 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. // Copyright (C) 2021 ARM Limited.
  3. // Original author: Mark Brown <[email protected]>
  4. //
  5. // Scalable Matrix Extension ZA context switch test
  6. // Repeatedly writes unique test patterns into each ZA tile
  7. // and reads them back to verify integrity.
  8. //
  9. // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
  10. // (leave it running for as long as you want...)
  11. // kill $pids
  12. #include <asm/unistd.h>
  13. #include "assembler.h"
  14. #include "asm-offsets.h"
  15. #include "sme-inst.h"
  16. .arch_extension sve
  17. #define MAXVL 2048
  18. #define MAXVL_B (MAXVL / 8)
  19. // Declare some storage space to shadow ZA register contents and a
  20. // scratch buffer for a vector.
  21. .pushsection .text
  22. .data
  23. .align 4
  24. zaref:
  25. .space MAXVL_B * MAXVL_B
  26. scratch:
  27. .space MAXVL_B
  28. .popsection
  29. // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
  30. // Clobbers x0-x3
  31. function memcpy
  32. cmp x2, #0
  33. b.eq 1f
  34. 0: ldrb w3, [x1], #1
  35. strb w3, [x0], #1
  36. subs x2, x2, #1
  37. b.ne 0b
  38. 1: ret
  39. endfunction
  40. // Generate a test pattern for storage in ZA
  41. // x0: pid
  42. // x1: row in ZA
  43. // x2: generation
  44. // These values are used to constuct a 32-bit pattern that is repeated in the
  45. // scratch buffer as many times as will fit:
  46. // bits 31:28 generation number (increments once per test_loop)
  47. // bits 27:16 pid
  48. // bits 15: 8 row number
  49. // bits 7: 0 32-bit lane index
  50. function pattern
  51. mov w3, wzr
  52. bfi w3, w0, #16, #12 // PID
  53. bfi w3, w1, #8, #8 // Row
  54. bfi w3, w2, #28, #4 // Generation
  55. ldr x0, =scratch
  56. mov w1, #MAXVL_B / 4
  57. 0: str w3, [x0], #4
  58. add w3, w3, #1 // Lane
  59. subs w1, w1, #1
  60. b.ne 0b
  61. ret
  62. endfunction
  63. // Get the address of shadow data for ZA horizontal vector xn
  64. .macro _adrza xd, xn, nrtmp
  65. ldr \xd, =zaref
  66. rdsvl \nrtmp, 1
  67. madd \xd, x\nrtmp, \xn, \xd
  68. .endm
  69. // Set up test pattern in a ZA horizontal vector
  70. // x0: pid
  71. // x1: row number
  72. // x2: generation
  73. function setup_za
  74. mov x4, x30
  75. mov x12, x1 // Use x12 for vector select
  76. bl pattern // Get pattern in scratch buffer
  77. _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
  78. mov x5, x0
  79. ldr x1, =scratch
  80. bl memcpy // length set up in x2 by _adrza
  81. _ldr_za 12, 5 // load vector w12 from pointer x5
  82. ret x4
  83. endfunction
  84. // Trivial memory compare: compare x2 bytes starting at address x0 with
  85. // bytes starting at address x1.
  86. // Returns only if all bytes match; otherwise, the program is aborted.
  87. // Clobbers x0-x5.
  88. function memcmp
  89. cbz x2, 2f
  90. stp x0, x1, [sp, #-0x20]!
  91. str x2, [sp, #0x10]
  92. mov x5, #0
  93. 0: ldrb w3, [x0, x5]
  94. ldrb w4, [x1, x5]
  95. add x5, x5, #1
  96. cmp w3, w4
  97. b.ne 1f
  98. subs x2, x2, #1
  99. b.ne 0b
  100. 1: ldr x2, [sp, #0x10]
  101. ldp x0, x1, [sp], #0x20
  102. b.ne barf
  103. 2: ret
  104. endfunction
  105. // Verify that a ZA vector matches its shadow in memory, else abort
  106. // x0: row number
  107. // Clobbers x0-x7 and x12.
  108. function check_za
  109. mov x3, x30
  110. mov x12, x0
  111. _adrza x5, x0, 6 // pointer to expected value in x5
  112. mov x4, x0
  113. ldr x7, =scratch // x7 is scratch
  114. mov x0, x7 // Poison scratch
  115. mov x1, x6
  116. bl memfill_ae
  117. _str_za 12, 7 // save vector w12 to pointer x7
  118. mov x0, x5
  119. mov x1, x7
  120. mov x2, x6
  121. mov x30, x3
  122. b memcmp
  123. endfunction
  124. // Any SME register modified here can cause corruption in the main
  125. // thread -- but *only* the locations modified here.
  126. function irritator_handler
  127. // Increment the irritation signal count (x23):
  128. ldr x0, [x2, #ucontext_regs + 8 * 23]
  129. add x0, x0, #1
  130. str x0, [x2, #ucontext_regs + 8 * 23]
  131. // Corrupt some random ZA data
  132. #if 0
  133. adr x0, .text + (irritator_handler - .text) / 16 * 16
  134. movi v0.8b, #1
  135. movi v9.16b, #2
  136. movi v31.8b, #3
  137. #endif
  138. ret
  139. endfunction
  140. function tickle_handler
  141. // Increment the signal count (x23):
  142. ldr x0, [x2, #ucontext_regs + 8 * 23]
  143. add x0, x0, #1
  144. str x0, [x2, #ucontext_regs + 8 * 23]
  145. ret
  146. endfunction
  147. function terminate_handler
  148. mov w21, w0
  149. mov x20, x2
  150. puts "Terminated by signal "
  151. mov w0, w21
  152. bl putdec
  153. puts ", no error, iterations="
  154. ldr x0, [x20, #ucontext_regs + 8 * 22]
  155. bl putdec
  156. puts ", signals="
  157. ldr x0, [x20, #ucontext_regs + 8 * 23]
  158. bl putdecn
  159. mov x0, #0
  160. mov x8, #__NR_exit
  161. svc #0
  162. endfunction
  163. // w0: signal number
  164. // x1: sa_action
  165. // w2: sa_flags
  166. // Clobbers x0-x6,x8
  167. function setsignal
  168. str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
  169. mov w4, w0
  170. mov x5, x1
  171. mov w6, w2
  172. add x0, sp, #16
  173. mov x1, #sa_sz
  174. bl memclr
  175. mov w0, w4
  176. add x1, sp, #16
  177. str w6, [x1, #sa_flags]
  178. str x5, [x1, #sa_handler]
  179. mov x2, #0
  180. mov x3, #sa_mask_sz
  181. mov x8, #__NR_rt_sigaction
  182. svc #0
  183. cbz w0, 1f
  184. puts "sigaction failure\n"
  185. b .Labort
  186. 1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
  187. ret
  188. endfunction
  189. // Main program entry point
  190. .globl _start
  191. function _start
  192. _start:
  193. mov x23, #0 // signal count
  194. mov w0, #SIGINT
  195. adr x1, terminate_handler
  196. mov w2, #SA_SIGINFO
  197. bl setsignal
  198. mov w0, #SIGTERM
  199. adr x1, terminate_handler
  200. mov w2, #SA_SIGINFO
  201. bl setsignal
  202. mov w0, #SIGUSR1
  203. adr x1, irritator_handler
  204. mov w2, #SA_SIGINFO
  205. orr w2, w2, #SA_NODEFER
  206. bl setsignal
  207. mov w0, #SIGUSR2
  208. adr x1, tickle_handler
  209. mov w2, #SA_SIGINFO
  210. orr w2, w2, #SA_NODEFER
  211. bl setsignal
  212. puts "Streaming mode "
  213. smstart_za
  214. // Sanity-check and report the vector length
  215. rdsvl 19, 8
  216. cmp x19, #128
  217. b.lo 1f
  218. cmp x19, #2048
  219. b.hi 1f
  220. tst x19, #(8 - 1)
  221. b.eq 2f
  222. 1: puts "bad vector length: "
  223. mov x0, x19
  224. bl putdecn
  225. b .Labort
  226. 2: puts "vector length:\t"
  227. mov x0, x19
  228. bl putdec
  229. puts " bits\n"
  230. // Obtain our PID, to ensure test pattern uniqueness between processes
  231. mov x8, #__NR_getpid
  232. svc #0
  233. mov x20, x0
  234. puts "PID:\t"
  235. mov x0, x20
  236. bl putdecn
  237. mov x22, #0 // generation number, increments per iteration
  238. .Ltest_loop:
  239. rdsvl 0, 8
  240. cmp x0, x19
  241. b.ne vl_barf
  242. rdsvl 21, 1 // Set up ZA & shadow with test pattern
  243. 0: mov x0, x20
  244. sub x1, x21, #1
  245. mov x2, x22
  246. bl setup_za
  247. subs x21, x21, #1
  248. b.ne 0b
  249. mov x8, #__NR_sched_yield // encourage preemption
  250. 1:
  251. svc #0
  252. mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
  253. and x1, x0, #3
  254. cmp x1, #2
  255. b.ne svcr_barf
  256. rdsvl 21, 1 // Verify that the data made it through
  257. rdsvl 24, 1 // Verify that the data made it through
  258. 0: sub x0, x24, x21
  259. bl check_za
  260. subs x21, x21, #1
  261. bne 0b
  262. add x22, x22, #1 // Everything still working
  263. b .Ltest_loop
  264. .Labort:
  265. mov x0, #0
  266. mov x1, #SIGABRT
  267. mov x8, #__NR_kill
  268. svc #0
  269. endfunction
  270. function barf
  271. // fpsimd.c acitivty log dump hack
  272. // ldr w0, =0xdeadc0de
  273. // mov w8, #__NR_exit
  274. // svc #0
  275. // end hack
  276. smstop
  277. mov x10, x0 // expected data
  278. mov x11, x1 // actual data
  279. mov x12, x2 // data size
  280. puts "Mismatch: PID="
  281. mov x0, x20
  282. bl putdec
  283. puts ", iteration="
  284. mov x0, x22
  285. bl putdec
  286. puts ", row="
  287. mov x0, x21
  288. bl putdecn
  289. puts "\tExpected ["
  290. mov x0, x10
  291. mov x1, x12
  292. bl dumphex
  293. puts "]\n\tGot ["
  294. mov x0, x11
  295. mov x1, x12
  296. bl dumphex
  297. puts "]\n"
  298. mov x8, #__NR_getpid
  299. svc #0
  300. // fpsimd.c acitivty log dump hack
  301. // ldr w0, =0xdeadc0de
  302. // mov w8, #__NR_exit
  303. // svc #0
  304. // ^ end of hack
  305. mov x1, #SIGABRT
  306. mov x8, #__NR_kill
  307. svc #0
  308. // mov x8, #__NR_exit
  309. // mov x1, #1
  310. // svc #0
  311. endfunction
  312. function vl_barf
  313. mov x10, x0
  314. puts "Bad active VL: "
  315. mov x0, x10
  316. bl putdecn
  317. mov x8, #__NR_exit
  318. mov x1, #1
  319. svc #0
  320. endfunction
  321. function svcr_barf
  322. mov x10, x0
  323. puts "Bad SVCR: "
  324. mov x0, x10
  325. bl putdecn
  326. mov x8, #__NR_exit
  327. mov x1, #1
  328. svc #0
  329. endfunction