123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- // SPDX-License-Identifier: GPL-2.0-only
- // Copyright (C) 2021 ARM Limited.
- // Original author: Mark Brown <[email protected]>
- //
- // Scalable Matrix Extension ZA context switch test
- // Repeatedly writes unique test patterns into each ZA tile
- // and reads them back to verify integrity.
- //
- // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
- // (leave it running for as long as you want...)
- // kill $pids
- #include <asm/unistd.h>
- #include "assembler.h"
- #include "asm-offsets.h"
- #include "sme-inst.h"
- .arch_extension sve
- #define MAXVL 2048
- #define MAXVL_B (MAXVL / 8)
- // Declare some storage space to shadow ZA register contents and a
- // scratch buffer for a vector.
- .pushsection .text
- .data
- .align 4
- zaref:
- .space MAXVL_B * MAXVL_B
- scratch:
- .space MAXVL_B
- .popsection
- // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
- // Clobbers x0-x3
- function memcpy
- cmp x2, #0
- b.eq 1f
- 0: ldrb w3, [x1], #1
- strb w3, [x0], #1
- subs x2, x2, #1
- b.ne 0b
- 1: ret
- endfunction
- // Generate a test pattern for storage in ZA
- // x0: pid
- // x1: row in ZA
- // x2: generation
- // These values are used to constuct a 32-bit pattern that is repeated in the
- // scratch buffer as many times as will fit:
- // bits 31:28 generation number (increments once per test_loop)
- // bits 27:16 pid
- // bits 15: 8 row number
- // bits 7: 0 32-bit lane index
- function pattern
- mov w3, wzr
- bfi w3, w0, #16, #12 // PID
- bfi w3, w1, #8, #8 // Row
- bfi w3, w2, #28, #4 // Generation
- ldr x0, =scratch
- mov w1, #MAXVL_B / 4
- 0: str w3, [x0], #4
- add w3, w3, #1 // Lane
- subs w1, w1, #1
- b.ne 0b
- ret
- endfunction
- // Get the address of shadow data for ZA horizontal vector xn
- .macro _adrza xd, xn, nrtmp
- ldr \xd, =zaref
- rdsvl \nrtmp, 1
- madd \xd, x\nrtmp, \xn, \xd
- .endm
- // Set up test pattern in a ZA horizontal vector
- // x0: pid
- // x1: row number
- // x2: generation
- function setup_za
- mov x4, x30
- mov x12, x1 // Use x12 for vector select
- bl pattern // Get pattern in scratch buffer
- _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
- mov x5, x0
- ldr x1, =scratch
- bl memcpy // length set up in x2 by _adrza
- _ldr_za 12, 5 // load vector w12 from pointer x5
- ret x4
- endfunction
- // Trivial memory compare: compare x2 bytes starting at address x0 with
- // bytes starting at address x1.
- // Returns only if all bytes match; otherwise, the program is aborted.
- // Clobbers x0-x5.
- function memcmp
- cbz x2, 2f
- stp x0, x1, [sp, #-0x20]!
- str x2, [sp, #0x10]
- mov x5, #0
- 0: ldrb w3, [x0, x5]
- ldrb w4, [x1, x5]
- add x5, x5, #1
- cmp w3, w4
- b.ne 1f
- subs x2, x2, #1
- b.ne 0b
- 1: ldr x2, [sp, #0x10]
- ldp x0, x1, [sp], #0x20
- b.ne barf
- 2: ret
- endfunction
- // Verify that a ZA vector matches its shadow in memory, else abort
- // x0: row number
- // Clobbers x0-x7 and x12.
- function check_za
- mov x3, x30
- mov x12, x0
- _adrza x5, x0, 6 // pointer to expected value in x5
- mov x4, x0
- ldr x7, =scratch // x7 is scratch
- mov x0, x7 // Poison scratch
- mov x1, x6
- bl memfill_ae
- _str_za 12, 7 // save vector w12 to pointer x7
- mov x0, x5
- mov x1, x7
- mov x2, x6
- mov x30, x3
- b memcmp
- endfunction
- // Any SME register modified here can cause corruption in the main
- // thread -- but *only* the locations modified here.
- function irritator_handler
- // Increment the irritation signal count (x23):
- ldr x0, [x2, #ucontext_regs + 8 * 23]
- add x0, x0, #1
- str x0, [x2, #ucontext_regs + 8 * 23]
- // Corrupt some random ZA data
- #if 0
- adr x0, .text + (irritator_handler - .text) / 16 * 16
- movi v0.8b, #1
- movi v9.16b, #2
- movi v31.8b, #3
- #endif
- ret
- endfunction
- function tickle_handler
- // Increment the signal count (x23):
- ldr x0, [x2, #ucontext_regs + 8 * 23]
- add x0, x0, #1
- str x0, [x2, #ucontext_regs + 8 * 23]
- ret
- endfunction
- function terminate_handler
- mov w21, w0
- mov x20, x2
- puts "Terminated by signal "
- mov w0, w21
- bl putdec
- puts ", no error, iterations="
- ldr x0, [x20, #ucontext_regs + 8 * 22]
- bl putdec
- puts ", signals="
- ldr x0, [x20, #ucontext_regs + 8 * 23]
- bl putdecn
- mov x0, #0
- mov x8, #__NR_exit
- svc #0
- endfunction
- // w0: signal number
- // x1: sa_action
- // w2: sa_flags
- // Clobbers x0-x6,x8
- function setsignal
- str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
- mov w4, w0
- mov x5, x1
- mov w6, w2
- add x0, sp, #16
- mov x1, #sa_sz
- bl memclr
- mov w0, w4
- add x1, sp, #16
- str w6, [x1, #sa_flags]
- str x5, [x1, #sa_handler]
- mov x2, #0
- mov x3, #sa_mask_sz
- mov x8, #__NR_rt_sigaction
- svc #0
- cbz w0, 1f
- puts "sigaction failure\n"
- b .Labort
- 1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
- ret
- endfunction
- // Main program entry point
- .globl _start
- function _start
- _start:
- mov x23, #0 // signal count
- mov w0, #SIGINT
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
- mov w0, #SIGTERM
- adr x1, terminate_handler
- mov w2, #SA_SIGINFO
- bl setsignal
- mov w0, #SIGUSR1
- adr x1, irritator_handler
- mov w2, #SA_SIGINFO
- orr w2, w2, #SA_NODEFER
- bl setsignal
- mov w0, #SIGUSR2
- adr x1, tickle_handler
- mov w2, #SA_SIGINFO
- orr w2, w2, #SA_NODEFER
- bl setsignal
- puts "Streaming mode "
- smstart_za
- // Sanity-check and report the vector length
- rdsvl 19, 8
- cmp x19, #128
- b.lo 1f
- cmp x19, #2048
- b.hi 1f
- tst x19, #(8 - 1)
- b.eq 2f
- 1: puts "bad vector length: "
- mov x0, x19
- bl putdecn
- b .Labort
- 2: puts "vector length:\t"
- mov x0, x19
- bl putdec
- puts " bits\n"
- // Obtain our PID, to ensure test pattern uniqueness between processes
- mov x8, #__NR_getpid
- svc #0
- mov x20, x0
- puts "PID:\t"
- mov x0, x20
- bl putdecn
- mov x22, #0 // generation number, increments per iteration
- .Ltest_loop:
- rdsvl 0, 8
- cmp x0, x19
- b.ne vl_barf
- rdsvl 21, 1 // Set up ZA & shadow with test pattern
- 0: mov x0, x20
- sub x1, x21, #1
- mov x2, x22
- bl setup_za
- subs x21, x21, #1
- b.ne 0b
- mov x8, #__NR_sched_yield // encourage preemption
- 1:
- svc #0
- mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
- and x1, x0, #3
- cmp x1, #2
- b.ne svcr_barf
- rdsvl 21, 1 // Verify that the data made it through
- rdsvl 24, 1 // Verify that the data made it through
- 0: sub x0, x24, x21
- bl check_za
- subs x21, x21, #1
- bne 0b
- add x22, x22, #1 // Everything still working
- b .Ltest_loop
- .Labort:
- mov x0, #0
- mov x1, #SIGABRT
- mov x8, #__NR_kill
- svc #0
- endfunction
- function barf
- // fpsimd.c acitivty log dump hack
- // ldr w0, =0xdeadc0de
- // mov w8, #__NR_exit
- // svc #0
- // end hack
- smstop
- mov x10, x0 // expected data
- mov x11, x1 // actual data
- mov x12, x2 // data size
- puts "Mismatch: PID="
- mov x0, x20
- bl putdec
- puts ", iteration="
- mov x0, x22
- bl putdec
- puts ", row="
- mov x0, x21
- bl putdecn
- puts "\tExpected ["
- mov x0, x10
- mov x1, x12
- bl dumphex
- puts "]\n\tGot ["
- mov x0, x11
- mov x1, x12
- bl dumphex
- puts "]\n"
- mov x8, #__NR_getpid
- svc #0
- // fpsimd.c acitivty log dump hack
- // ldr w0, =0xdeadc0de
- // mov w8, #__NR_exit
- // svc #0
- // ^ end of hack
- mov x1, #SIGABRT
- mov x8, #__NR_kill
- svc #0
- // mov x8, #__NR_exit
- // mov x1, #1
- // svc #0
- endfunction
- function vl_barf
- mov x10, x0
- puts "Bad active VL: "
- mov x0, x10
- bl putdecn
- mov x8, #__NR_exit
- mov x1, #1
- svc #0
- endfunction
- function svcr_barf
- mov x10, x0
- puts "Bad SVCR: "
- mov x0, x10
- bl putdecn
- mov x8, #__NR_exit
- mov x1, #1
- svc #0
- endfunction
|