parisc: Add alternative coding infrastructure

This patch adds the necessary code to patch a running kernel at runtime
to improve performance.

The current implementation offers a few optimizations variants:

- When running a SMP kernel on a single UP processor, unwanted assembler
  statements like locking functions are overwritten with NOPs. When
  multiple instructions shall be skipped, one branch instruction is used
  instead of multiple nop instructions.

- In the UP case, some pdtlb and pitlb instructions are patched to
  become pdtlb,l and pitlb,l which only flushes the CPU-local tlb
  entries instead of broadcasting the flush to other CPUs in the system
  and thus may improve performance.

- fic and fdc instructions are skipped if no I- or D-caches are
  installed.  This should speed up qemu emulation and cacheless systems.

- If no cache coherence is needed for IO operations, the relevant fdc
  and sync instructions in the sba and ccio drivers are replaced by
  nops.

- On systems which share I- and D-TLBs and thus don't have a seperate
  instruction TLB, the pitlb instruction is replaced by a nop.

Live-patching is done early in the boot process, just after having run
the system inventory. No drivers are running and thus no external
interrupts should arrive. So the hope is that no TLB exceptions will
occur during the patching. If this turns out to be wrong we will
probably need to do the patching in real-mode.

Signed-off-by: Helge Deller <deller@gmx.de>
这个提交包含在:
Helge Deller
2018-10-16 22:38:22 +02:00
父节点 34c201ae49
当前提交 3847dab774
修改 14 个文件,包含 233 行新增62 行删除

查看文件

@@ -479,18 +479,6 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
/* Purge TLB entries for small ranges using the pdtlb and
pitlb instructions. These instructions execute locally
but cause a purge request to be broadcast to other TLBs. */
if (likely(!split_tlb)) {
while (start < end) {
purge_tlb_start(flags);
mtsp(sid, 1);
pdtlb(start);
purge_tlb_end(flags);
start += PAGE_SIZE;
}
return 0;
}
/* split TLB case */
while (start < end) {
purge_tlb_start(flags);
mtsp(sid, 1);

查看文件

@@ -38,6 +38,7 @@
#include <asm/ldcw.h>
#include <asm/traps.h>
#include <asm/thread_info.h>
#include <asm/alternative.h>
#include <linux/linkage.h>
@@ -464,7 +465,7 @@
/* Acquire pa_tlb_lock lock and check page is present. */
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
98: cmpib,COND(=),n 0,\spc,2f
load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
@@ -473,6 +474,7 @@
bb,<,n \pte,_PAGE_PRESENT_BIT,3f
b \fault
stw,ma \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
2: LDREG 0(\ptp),\pte
bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault
@@ -482,15 +484,17 @@
/* Release pa_tlb_lock lock without reloading lock address. */
.macro tlb_unlock0 spc,tmp
#ifdef CONFIG_SMP
or,COND(=) %r0,\spc,%r0
98: or,COND(=) %r0,\spc,%r0
stw,ma \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
load_pa_tlb_lock \tmp
98: load_pa_tlb_lock \tmp
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
tlb_unlock0 \spc,\tmp
#endif
.endm

查看文件

@@ -37,6 +37,7 @@
#include <asm/pgtable.h>
#include <asm/cache.h>
#include <asm/ldcw.h>
#include <asm/alternative.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -190,7 +191,7 @@ ENDPROC_CFI(flush_tlb_all_local)
.import cache_info,data
ENTRY_CFI(flush_instruction_cache_local)
load32 cache_info, %r1
88: load32 cache_info, %r1
/* Flush Instruction Cache */
@@ -243,6 +244,7 @@ fioneloop2:
fisync:
sync
mtsm %r22 /* restore I-bit */
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
bv %r0(%r2)
nop
ENDPROC_CFI(flush_instruction_cache_local)
@@ -250,7 +252,7 @@ ENDPROC_CFI(flush_instruction_cache_local)
.import cache_info, data
ENTRY_CFI(flush_data_cache_local)
load32 cache_info, %r1
88: load32 cache_info, %r1
/* Flush Data Cache */
@@ -304,6 +306,7 @@ fdsync:
syncdma
sync
mtsm %r22 /* restore I-bit */
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
bv %r0(%r2)
nop
ENDPROC_CFI(flush_data_cache_local)
@@ -312,6 +315,7 @@ ENDPROC_CFI(flush_data_cache_local)
.macro tlb_lock la,flags,tmp
#ifdef CONFIG_SMP
98:
#if __PA_LDCW_ALIGNMENT > 4
load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
@@ -326,15 +330,17 @@ ENDPROC_CFI(flush_data_cache_local)
nop
b,n 2b
3:
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
.macro tlb_unlock la,flags,tmp
#ifdef CONFIG_SMP
ldi 1,\tmp
98: ldi 1,\tmp
sync
stw \tmp,0(\la)
mtsm \flags
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
@@ -596,9 +602,11 @@ ENTRY_CFI(copy_user_page_asm)
pdtlb,l %r0(%r29)
#else
tlb_lock %r20,%r21,%r22
pdtlb %r0(%r28)
pdtlb %r0(%r29)
0: pdtlb %r0(%r28)
1: pdtlb %r0(%r29)
tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
#ifdef CONFIG_64BIT
@@ -736,8 +744,9 @@ ENTRY_CFI(clear_user_page_asm)
pdtlb,l %r0(%r28)
#else
tlb_lock %r20,%r21,%r22
pdtlb %r0(%r28)
0: pdtlb %r0(%r28)
tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
#ifdef CONFIG_64BIT
@@ -813,11 +822,12 @@ ENTRY_CFI(flush_dcache_page_asm)
pdtlb,l %r0(%r28)
#else
tlb_lock %r20,%r21,%r22
pdtlb %r0(%r28)
0: pdtlb %r0(%r28)
tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), r31
#ifdef CONFIG_64BIT
@@ -847,6 +857,7 @@ ENTRY_CFI(flush_dcache_page_asm)
cmpb,COND(<<) %r28, %r25,1b
fdc,m r31(%r28)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
@@ -874,15 +885,19 @@ ENTRY_CFI(flush_icache_page_asm)
#ifdef CONFIG_PA20
pdtlb,l %r0(%r28)
pitlb,l %r0(%sr4,%r28)
1: pitlb,l %r0(%sr4,%r28)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
#else
tlb_lock %r20,%r21,%r22
pdtlb %r0(%r28)
pitlb %r0(%sr4,%r28)
0: pdtlb %r0(%r28)
1: pitlb %r0(%sr4,%r28)
tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
#endif
ldil L%icache_stride, %r1
88: ldil L%icache_stride, %r1
ldw R%icache_stride(%r1), %r31
#ifdef CONFIG_64BIT
@@ -914,13 +929,14 @@ ENTRY_CFI(flush_icache_page_asm)
cmpb,COND(<<) %r28, %r25,1b
fic,m %r31(%sr4,%r28)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(flush_icache_page_asm)
ENTRY_CFI(flush_kernel_dcache_page_asm)
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), %r23
#ifdef CONFIG_64BIT
@@ -950,13 +966,14 @@ ENTRY_CFI(flush_kernel_dcache_page_asm)
cmpb,COND(<<) %r26, %r25,1b
fdc,m %r23(%r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(flush_kernel_dcache_page_asm)
ENTRY_CFI(purge_kernel_dcache_page_asm)
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), %r23
#ifdef CONFIG_64BIT
@@ -985,13 +1002,14 @@ ENTRY_CFI(purge_kernel_dcache_page_asm)
cmpb,COND(<<) %r26, %r25, 1b
pdc,m %r23(%r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(purge_kernel_dcache_page_asm)
ENTRY_CFI(flush_user_dcache_range_asm)
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), %r23
ldo -1(%r23), %r21
ANDCM %r26, %r21, %r26
@@ -999,13 +1017,14 @@ ENTRY_CFI(flush_user_dcache_range_asm)
1: cmpb,COND(<<),n %r26, %r25, 1b
fdc,m %r23(%sr3, %r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(flush_user_dcache_range_asm)
ENTRY_CFI(flush_kernel_dcache_range_asm)
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), %r23
ldo -1(%r23), %r21
ANDCM %r26, %r21, %r26
@@ -1014,13 +1033,14 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)
fdc,m %r23(%r26)
sync
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
syncdma
bv %r0(%r2)
nop
ENDPROC_CFI(flush_kernel_dcache_range_asm)
ENTRY_CFI(purge_kernel_dcache_range_asm)
ldil L%dcache_stride, %r1
88: ldil L%dcache_stride, %r1
ldw R%dcache_stride(%r1), %r23
ldo -1(%r23), %r21
ANDCM %r26, %r21, %r26
@@ -1029,13 +1049,14 @@ ENTRY_CFI(purge_kernel_dcache_range_asm)
pdc,m %r23(%r26)
sync
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
syncdma
bv %r0(%r2)
nop
ENDPROC_CFI(purge_kernel_dcache_range_asm)
ENTRY_CFI(flush_user_icache_range_asm)
ldil L%icache_stride, %r1
88: ldil L%icache_stride, %r1
ldw R%icache_stride(%r1), %r23
ldo -1(%r23), %r21
ANDCM %r26, %r21, %r26
@@ -1043,13 +1064,14 @@ ENTRY_CFI(flush_user_icache_range_asm)
1: cmpb,COND(<<),n %r26, %r25,1b
fic,m %r23(%sr3, %r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(flush_user_icache_range_asm)
ENTRY_CFI(flush_kernel_icache_page)
ldil L%icache_stride, %r1
88: ldil L%icache_stride, %r1
ldw R%icache_stride(%r1), %r23
#ifdef CONFIG_64BIT
@@ -1079,13 +1101,14 @@ ENTRY_CFI(flush_kernel_icache_page)
cmpb,COND(<<) %r26, %r25, 1b
fic,m %r23(%sr4, %r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
sync
bv %r0(%r2)
nop
ENDPROC_CFI(flush_kernel_icache_page)
ENTRY_CFI(flush_kernel_icache_range_asm)
ldil L%icache_stride, %r1
88: ldil L%icache_stride, %r1
ldw R%icache_stride(%r1), %r23
ldo -1(%r23), %r21
ANDCM %r26, %r21, %r26
@@ -1093,6 +1116,7 @@ ENTRY_CFI(flush_kernel_icache_range_asm)
1: cmpb,COND(<<),n %r26, %r25, 1b
fic,m %r23(%sr4, %r26)
89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
sync
bv %r0(%r2)
nop

查看文件

@@ -305,6 +305,86 @@ static int __init parisc_init_resources(void)
return 0;
}
static int no_alternatives __initdata;
static int __init setup_no_alternatives(char *str)
{
no_alternatives = 1;
return 1;
}
__setup("no-alternatives", setup_no_alternatives);
static void __init apply_alternatives_all(void)
{
struct alt_instr *entry;
int index = 0, applied = 0;
pr_info("alternatives: %spatching kernel code\n",
no_alternatives ? "NOT " : "");
if (no_alternatives)
return;
set_kernel_text_rw(1);
for (entry = (struct alt_instr *) &__alt_instructions;
entry < (struct alt_instr *) &__alt_instructions_end;
entry++, index++) {
u32 *from, len, cond, replacement;
from = (u32 *)((ulong)&entry->orig_offset + entry->orig_offset);
len = entry->len;
cond = entry->cond;
replacement = entry->replacement;
WARN_ON(!cond);
pr_debug("Check %d: Cond 0x%x, Replace %02d instructions @ 0x%px with 0x%08x\n",
index, cond, len, from, replacement);
if ((cond & ALT_COND_NO_SMP) && (num_online_cpus() != 1))
continue;
if ((cond & ALT_COND_NO_DCACHE) && (cache_info.dc_size != 0))
continue;
if ((cond & ALT_COND_NO_ICACHE) && (cache_info.ic_size != 0))
continue;
/*
* If the PDC_MODEL capabilities has Non-coherent IO-PDIR bit
* set (bit #61, big endian), we have to flush and sync every
* time IO-PDIR is changed in Ike/Astro.
*/
if ((cond & ALT_COND_NO_IOC_FDC) &&
(boot_cpu_data.pdc.capabilities & PDC_MODEL_IOPDIR_FDC))
continue;
/* Want to replace pdtlb by a pdtlb,l instruction? */
if (replacement == INSN_PxTLB) {
replacement = *from;
if (boot_cpu_data.cpu_type >= pcxu) /* >= pa2.0 ? */
replacement |= (1 << 10); /* set el bit */
}
/*
* Replace instruction with NOPs?
* For long distance insert a branch instruction instead.
*/
if (replacement == INSN_NOP && len > 1)
replacement = 0xe8000002 + (len-2)*8; /* "b,n .+8" */
pr_debug("Do %d: Cond 0x%x, Replace %02d instructions @ 0x%px with 0x%08x\n",
index, cond, len, from, replacement);
/* Replace instruction */
*from = replacement;
applied++;
}
pr_info("alternatives: applied %d out of %d patches\n", applied, index);
set_kernel_text_rw(0);
}
extern void gsc_init(void);
extern void processor_init(void);
extern void ccio_init(void);
@@ -346,6 +426,7 @@ static int __init parisc_init(void)
boot_cpu_data.cpu_hz / 1000000,
boot_cpu_data.cpu_hz % 1000000 );
apply_alternatives_all();
parisc_setup_cache_timing();
/* These are in a non-obvious order, will fix when we have an iotree */

查看文件

@@ -65,7 +65,6 @@
#define INSN_LDI_R25_1 0x34190002 /* ldi 1,%r25 (in_syscall=1) */
#define INSN_LDI_R20 0x3414015a /* ldi __NR_rt_sigreturn,%r20 */
#define INSN_BLE_SR2_R0 0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */
#define INSN_NOP 0x08000240 /* nop */
/* For debugging */
#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */

查看文件

@@ -61,6 +61,12 @@ SECTIONS
EXIT_DATA
}
PERCPU_SECTION(8)
. = ALIGN(4);
.altinstructions : {
__alt_instructions = .;
*(.altinstructions)
__alt_instructions_end = .;
}
. = ALIGN(HUGEPAGE_SIZE);
__init_end = .;
/* freed after init ends here */