powerpc/64s: system call support for scv/rfscv instructions

Add support for the scv instruction on POWER9 and later CPUs.

For now this implements the zeroth scv vector 'scv 0', as identical to
'sc' system calls, with the exception that LR is not preserved, nor
are volatile CR registers, and error is not indicated with CR0[SO],
but by returning a negative errno.

rfscv is implemented to return from scv type system calls. It can not
be used to return from sc system calls because those are defined to
preserve LR.

getpid syscall throughput on POWER9 is improved by 26% (428 to 318
cycles), largely due to reducing mtmsr and mtspr.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Fix ppc64e build]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200611081203.995112-3-npiggin@gmail.com
This commit is contained in:
Nicholas Piggin
2020-06-11 18:12:03 +10:00
committed by Michael Ellerman
parent b2dc2977cb
commit 7fa95f9ada
20 changed files with 422 additions and 45 deletions

View File

@@ -756,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* guarantee they will be delivered virtually. Some conditions (see the ISA)
* cause exceptions to be delivered in real mode.
*
* The scv instructions are a special case. They get a 0x3000 offset applied.
* scv exceptions have unique reentrancy properties, see below.
*
* It's impossible to receive interrupts below 0x300 via AIL.
*
* KVM: None of the virtual exceptions are from the guest. Anything that
@@ -765,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
* 0x1900 - 0x3fff : Real mode trampolines
* 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
* 0x1900 - 0x2fff : Real mode trampolines
* 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
* 0x5900 - 0x6fff : Relon mode trampolines
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - .... : Common interrupt handlers, remaining early
@@ -777,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* vectors there.
*/
OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
#ifdef CONFIG_PPC_POWERNV
@@ -814,6 +817,77 @@ USE_FIXED_SECTION(real_vectors)
.globl __start_interrupts
__start_interrupts:
/**
* Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
* This is a synchronous interrupt invoked with the "scv" instruction. The
* system call does not alter the HV bit, so it is directed to the OS.
*
* Handling:
* scv instructions enter the kernel without changing EE, RI, ME, or HV.
* In particular, this means we can take a maskable interrupt at any point
* in the scv handler, which is unlike any other interrupt. This is solved
* by treating the instruction addresses below __end_interrupts as being
* soft-masked.
*
* AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
* ensure scv is never executed with relocation off, which means AIL-0
* should never happen.
*
* Before leaving the below __end_interrupts text, at least of the following
* must be true:
* - MSR[PR]=1 (i.e., return to userspace)
* - MSR_EE|MSR_RI is set (no reentrant exceptions)
* - Standard kernel environment is set up (stack, paca, etc)
*
* Call convention:
*
* syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*/
EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
/* SCV 0 */
mr r9,r13
GET_PACA(r13)
mflr r11
mfctr r12
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
#ifdef CONFIG_RELOCATABLE
b system_call_vectored_tramp
#else
b system_call_vectored_common
#endif
nop
/* SCV 1 - 127 */
.rept 127
mr r9,r13
GET_PACA(r13)
mflr r11
mfctr r12
li r10,IRQS_ALL_DISABLED
stb r10,PACAIRQSOFTMASK(r13)
li r0,-1 /* cause failure */
#ifdef CONFIG_RELOCATABLE
b system_call_vectored_sigill_tramp
#else
b system_call_vectored_sigill
#endif
.endr
EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
__LOAD_HANDLER(r10, system_call_vectored_common)
mtctr r10
bctr
TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
__LOAD_HANDLER(r10, system_call_vectored_sigill)
mtctr r10
bctr
#endif
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x100)
@@ -2963,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
TRAMP_REAL_BEGIN(rfscv_flush_fallback)
/* system call volatile */
mr r7,r13
GET_PACA(r13);
mr r8,r1
ld r1,PACAKSAVE(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
1:
ld r11,(0x80 + 8)*0(r10)
ld r11,(0x80 + 8)*1(r10)
ld r11,(0x80 + 8)*2(r10)
ld r11,(0x80 + 8)*3(r10)
ld r11,(0x80 + 8)*4(r10)
ld r11,(0x80 + 8)*5(r10)
ld r11,(0x80 + 8)*6(r10)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
mtctr r9
li r9,0
li r10,0
li r11,0
mr r1,r8
mr r13,r7
RFSCV
USE_TEXT_SECTION()
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1