sve-test.S 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. // Copyright (C) 2015-2019 ARM Limited.
  3. // Original author: Dave Martin <[email protected]>
  4. //
  5. // Simple Scalable Vector Extension context switch test
  6. // Repeatedly writes unique test patterns into each SVE register
  7. // and reads them back to verify integrity.
  8. //
  9. // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
  10. // (leave it running for as long as you want...)
  11. // kill $pids
  12. #include <asm/unistd.h>
  13. #include "assembler.h"
  14. #include "asm-offsets.h"
  15. #include "sme-inst.h"
  16. #define NZR 32
  17. #define NPR 16
  18. #define MAXVL_B (2048 / 8)
  19. .arch_extension sve
  20. .macro _sve_ldr_v zt, xn
  21. ldr z\zt, [x\xn]
  22. .endm
  23. .macro _sve_str_v zt, xn
  24. str z\zt, [x\xn]
  25. .endm
  26. .macro _sve_ldr_p pt, xn
  27. ldr p\pt, [x\xn]
  28. .endm
  29. .macro _sve_str_p pt, xn
  30. str p\pt, [x\xn]
  31. .endm
  32. // Generate accessor functions to read/write programmatically selected
  33. // SVE registers.
  34. // x0 is the register index to access
  35. // x1 is the memory address to read from (getz,setp) or store to (setz,setp)
  36. // All clobber x0-x2
  37. define_accessor setz, NZR, _sve_ldr_v
  38. define_accessor getz, NZR, _sve_str_v
  39. define_accessor setp, NPR, _sve_ldr_p
  40. define_accessor getp, NPR, _sve_str_p
  41. // Declare some storate space to shadow the SVE register contents:
  42. .pushsection .text
  43. .data
  44. .align 4
  45. zref:
  46. .space MAXVL_B * NZR
  47. pref:
  48. .space MAXVL_B / 8 * NPR
  49. ffrref:
  50. .space MAXVL_B / 8
  51. scratch:
  52. .space MAXVL_B
  53. .popsection
  54. // Generate a test pattern for storage in SVE registers
  55. // x0: pid (16 bits)
  56. // x1: register number (6 bits)
  57. // x2: generation (4 bits)
  58. // These values are used to constuct a 32-bit pattern that is repeated in the
  59. // scratch buffer as many times as will fit:
  60. // bits 31:28 generation number (increments once per test_loop)
  61. // bits 27:22 32-bit lane index
  62. // bits 21:16 register number
  63. // bits 15: 0 pid
  64. function pattern
  65. orr w1, w0, w1, lsl #16
  66. orr w2, w1, w2, lsl #28
  67. ldr x0, =scratch
  68. mov w1, #MAXVL_B / 4
  69. 0: str w2, [x0], #4
  70. add w2, w2, #(1 << 22)
  71. subs w1, w1, #1
  72. bne 0b
  73. ret
  74. endfunction
  75. // Get the address of shadow data for SVE Z-register Z<xn>
  76. .macro _adrz xd, xn, nrtmp
  77. ldr \xd, =zref
  78. rdvl x\nrtmp, #1
  79. madd \xd, x\nrtmp, \xn, \xd
  80. .endm
  81. // Get the address of shadow data for SVE P-register P<xn - NZR>
  82. .macro _adrp xd, xn, nrtmp
  83. ldr \xd, =pref
  84. rdvl x\nrtmp, #1
  85. lsr x\nrtmp, x\nrtmp, #3
  86. sub \xn, \xn, #NZR
  87. madd \xd, x\nrtmp, \xn, \xd
  88. .endm
  89. // Set up test pattern in a SVE Z-register
  90. // x0: pid
  91. // x1: register number
  92. // x2: generation
  93. function setup_zreg
  94. mov x4, x30
  95. mov x6, x1
  96. bl pattern
  97. _adrz x0, x6, 2
  98. mov x5, x0
  99. ldr x1, =scratch
  100. bl memcpy
  101. mov x0, x6
  102. mov x1, x5
  103. bl setz
  104. ret x4
  105. endfunction
  106. // Set up test pattern in a SVE P-register
  107. // x0: pid
  108. // x1: register number
  109. // x2: generation
  110. function setup_preg
  111. mov x4, x30
  112. mov x6, x1
  113. bl pattern
  114. _adrp x0, x6, 2
  115. mov x5, x0
  116. ldr x1, =scratch
  117. bl memcpy
  118. mov x0, x6
  119. mov x1, x5
  120. bl setp
  121. ret x4
  122. endfunction
  123. // Set up test pattern in the FFR
  124. // x0: pid
  125. // x2: generation
  126. //
  127. // We need to generate a canonical FFR value, which consists of a number of
  128. // low "1" bits, followed by a number of zeros. This gives us 17 unique values
  129. // per 16 bits of FFR, so we create a 4 bit signature out of the PID and
  130. // generation, and use that as the initial number of ones in the pattern.
  131. // We fill the upper lanes of FFR with zeros.
  132. // Beware: corrupts P0.
  133. function setup_ffr
  134. #ifndef SSVE
  135. mov x4, x30
  136. and w0, w0, #0x3
  137. bfi w0, w2, #2, #2
  138. mov w1, #1
  139. lsl w1, w1, w0
  140. sub w1, w1, #1
  141. ldr x0, =ffrref
  142. strh w1, [x0], 2
  143. rdvl x1, #1
  144. lsr x1, x1, #3
  145. sub x1, x1, #2
  146. bl memclr
  147. mov x0, #0
  148. ldr x1, =ffrref
  149. bl setp
  150. wrffr p0.b
  151. ret x4
  152. #else
  153. ret
  154. #endif
  155. endfunction
  156. // Trivial memory compare: compare x2 bytes starting at address x0 with
  157. // bytes starting at address x1.
  158. // Returns only if all bytes match; otherwise, the program is aborted.
  159. // Clobbers x0-x5.
  160. function memcmp
  161. cbz x2, 2f
  162. stp x0, x1, [sp, #-0x20]!
  163. str x2, [sp, #0x10]
  164. mov x5, #0
  165. 0: ldrb w3, [x0, x5]
  166. ldrb w4, [x1, x5]
  167. add x5, x5, #1
  168. cmp w3, w4
  169. b.ne 1f
  170. subs x2, x2, #1
  171. b.ne 0b
  172. 1: ldr x2, [sp, #0x10]
  173. ldp x0, x1, [sp], #0x20
  174. b.ne barf
  175. 2: ret
  176. endfunction
  177. // Verify that a SVE Z-register matches its shadow in memory, else abort
  178. // x0: reg number
  179. // Clobbers x0-x7.
  180. function check_zreg
  181. mov x3, x30
  182. _adrz x5, x0, 6
  183. mov x4, x0
  184. ldr x7, =scratch
  185. mov x0, x7
  186. mov x1, x6
  187. bl memfill_ae
  188. mov x0, x4
  189. mov x1, x7
  190. bl getz
  191. mov x0, x5
  192. mov x1, x7
  193. mov x2, x6
  194. mov x30, x3
  195. b memcmp
  196. endfunction
  197. // Verify that a SVE P-register matches its shadow in memory, else abort
  198. // x0: reg number
  199. // Clobbers x0-x7.
  200. function check_preg
  201. mov x3, x30
  202. _adrp x5, x0, 6
  203. mov x4, x0
  204. ldr x7, =scratch
  205. mov x0, x7
  206. mov x1, x6
  207. bl memfill_ae
  208. mov x0, x4
  209. mov x1, x7
  210. bl getp
  211. mov x0, x5
  212. mov x1, x7
  213. mov x2, x6
  214. mov x30, x3
  215. b memcmp
  216. endfunction
  217. // Verify that the FFR matches its shadow in memory, else abort
  218. // Beware -- corrupts P0.
  219. // Clobbers x0-x5.
  220. function check_ffr
  221. #ifndef SSVE
  222. mov x3, x30
  223. ldr x4, =scratch
  224. rdvl x5, #1
  225. lsr x5, x5, #3
  226. mov x0, x4
  227. mov x1, x5
  228. bl memfill_ae
  229. rdffr p0.b
  230. mov x0, #0
  231. mov x1, x4
  232. bl getp
  233. ldr x0, =ffrref
  234. mov x1, x4
  235. mov x2, x5
  236. mov x30, x3
  237. b memcmp
  238. #else
  239. ret
  240. #endif
  241. endfunction
  242. // Any SVE register modified here can cause corruption in the main
  243. // thread -- but *only* the registers modified here.
  244. function irritator_handler
  245. // Increment the irritation signal count (x23):
  246. ldr x0, [x2, #ucontext_regs + 8 * 23]
  247. add x0, x0, #1
  248. str x0, [x2, #ucontext_regs + 8 * 23]
  249. // Corrupt some random Z-regs
  250. adr x0, .text + (irritator_handler - .text) / 16 * 16
  251. movi v0.8b, #1
  252. movi v9.16b, #2
  253. movi v31.8b, #3
  254. #ifndef SSVE
  255. // And P0
  256. rdffr p0.b
  257. // And FFR
  258. wrffr p15.b
  259. #endif
  260. ret
  261. endfunction
  262. function tickle_handler
  263. // Increment the signal count (x23):
  264. ldr x0, [x2, #ucontext_regs + 8 * 23]
  265. add x0, x0, #1
  266. str x0, [x2, #ucontext_regs + 8 * 23]
  267. ret
  268. endfunction
  269. function terminate_handler
  270. mov w21, w0
  271. mov x20, x2
  272. puts "Terminated by signal "
  273. mov w0, w21
  274. bl putdec
  275. puts ", no error, iterations="
  276. ldr x0, [x20, #ucontext_regs + 8 * 22]
  277. bl putdec
  278. puts ", signals="
  279. ldr x0, [x20, #ucontext_regs + 8 * 23]
  280. bl putdecn
  281. mov x0, #0
  282. mov x8, #__NR_exit
  283. svc #0
  284. endfunction
  285. // w0: signal number
  286. // x1: sa_action
  287. // w2: sa_flags
  288. // Clobbers x0-x6,x8
  289. function setsignal
  290. str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
  291. mov w4, w0
  292. mov x5, x1
  293. mov w6, w2
  294. add x0, sp, #16
  295. mov x1, #sa_sz
  296. bl memclr
  297. mov w0, w4
  298. add x1, sp, #16
  299. str w6, [x1, #sa_flags]
  300. str x5, [x1, #sa_handler]
  301. mov x2, #0
  302. mov x3, #sa_mask_sz
  303. mov x8, #__NR_rt_sigaction
  304. svc #0
  305. cbz w0, 1f
  306. puts "sigaction failure\n"
  307. b .Labort
  308. 1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
  309. ret
  310. endfunction
  311. // Main program entry point
  312. .globl _start
  313. function _start
  314. _start:
  315. mov x23, #0 // Irritation signal count
  316. mov w0, #SIGINT
  317. adr x1, terminate_handler
  318. mov w2, #SA_SIGINFO
  319. bl setsignal
  320. mov w0, #SIGTERM
  321. adr x1, terminate_handler
  322. mov w2, #SA_SIGINFO
  323. bl setsignal
  324. mov w0, #SIGUSR1
  325. adr x1, irritator_handler
  326. mov w2, #SA_SIGINFO
  327. orr w2, w2, #SA_NODEFER
  328. bl setsignal
  329. mov w0, #SIGUSR2
  330. adr x1, tickle_handler
  331. mov w2, #SA_SIGINFO
  332. orr w2, w2, #SA_NODEFER
  333. bl setsignal
  334. #ifdef SSVE
  335. puts "Streaming mode "
  336. smstart_sm
  337. #endif
  338. // Sanity-check and report the vector length
  339. rdvl x19, #8
  340. cmp x19, #128
  341. b.lo 1f
  342. cmp x19, #2048
  343. b.hi 1f
  344. tst x19, #(8 - 1)
  345. b.eq 2f
  346. 1: puts "Bad vector length: "
  347. mov x0, x19
  348. bl putdecn
  349. b .Labort
  350. 2: puts "Vector length:\t"
  351. mov x0, x19
  352. bl putdec
  353. puts " bits\n"
  354. // Obtain our PID, to ensure test pattern uniqueness between processes
  355. mov x8, #__NR_getpid
  356. svc #0
  357. mov x20, x0
  358. puts "PID:\t"
  359. mov x0, x20
  360. bl putdecn
  361. #ifdef SSVE
  362. smstart_sm // syscalls will have exited streaming mode
  363. #endif
  364. mov x22, #0 // generation number, increments per iteration
  365. .Ltest_loop:
  366. rdvl x0, #8
  367. cmp x0, x19
  368. b.ne vl_barf
  369. mov x21, #0 // Set up Z-regs & shadow with test pattern
  370. 0: mov x0, x20
  371. mov x1, x21
  372. and x2, x22, #0xf
  373. bl setup_zreg
  374. add x21, x21, #1
  375. cmp x21, #NZR
  376. b.lo 0b
  377. mov x0, x20 // Set up FFR & shadow with test pattern
  378. mov x1, #NZR + NPR
  379. and x2, x22, #0xf
  380. bl setup_ffr
  381. 0: mov x0, x20 // Set up P-regs & shadow with test pattern
  382. mov x1, x21
  383. and x2, x22, #0xf
  384. bl setup_preg
  385. add x21, x21, #1
  386. cmp x21, #NZR + NPR
  387. b.lo 0b
  388. // Can't do this when SVE state is volatile across SVC:
  389. // mov x8, #__NR_sched_yield // Encourage preemption
  390. // svc #0
  391. mov x21, #0
  392. 0: mov x0, x21
  393. bl check_zreg
  394. add x21, x21, #1
  395. cmp x21, #NZR
  396. b.lo 0b
  397. 0: mov x0, x21
  398. bl check_preg
  399. add x21, x21, #1
  400. cmp x21, #NZR + NPR
  401. b.lo 0b
  402. bl check_ffr
  403. add x22, x22, #1
  404. b .Ltest_loop
  405. .Labort:
  406. mov x0, #0
  407. mov x1, #SIGABRT
  408. mov x8, #__NR_kill
  409. svc #0
  410. endfunction
  411. function barf
  412. // fpsimd.c acitivty log dump hack
  413. // ldr w0, =0xdeadc0de
  414. // mov w8, #__NR_exit
  415. // svc #0
  416. // end hack
  417. mov x10, x0 // expected data
  418. mov x11, x1 // actual data
  419. mov x12, x2 // data size
  420. puts "Mismatch: PID="
  421. mov x0, x20
  422. bl putdec
  423. puts ", iteration="
  424. mov x0, x22
  425. bl putdec
  426. puts ", reg="
  427. mov x0, x21
  428. bl putdecn
  429. puts "\tExpected ["
  430. mov x0, x10
  431. mov x1, x12
  432. bl dumphex
  433. puts "]\n\tGot ["
  434. mov x0, x11
  435. mov x1, x12
  436. bl dumphex
  437. puts "]\n"
  438. mov x8, #__NR_getpid
  439. svc #0
  440. // fpsimd.c acitivty log dump hack
  441. // ldr w0, =0xdeadc0de
  442. // mov w8, #__NR_exit
  443. // svc #0
  444. // ^ end of hack
  445. mov x1, #SIGABRT
  446. mov x8, #__NR_kill
  447. svc #0
  448. // mov x8, #__NR_exit
  449. // mov x1, #1
  450. // svc #0
  451. endfunction
  452. function vl_barf
  453. mov x10, x0
  454. puts "Bad active VL: "
  455. mov x0, x10
  456. bl putdecn
  457. mov x8, #__NR_exit
  458. mov x1, #1
  459. svc #0
  460. endfunction