Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "There were lots of changes in this development cycle: - over 100 separate cleanups, restructuring changes, speedups and fixes in the x86 system call, irq, trap and other entry code, part of a heroic effort to deobfuscate a decade old spaghetti asm code and its C code dependencies (Denys Vlasenko, Andy Lutomirski) - alternatives code fixes and enhancements (Borislav Petkov) - simplifications and cleanups to the compat code (Brian Gerst) - signal handling fixes and new x86 testcases (Andy Lutomirski) - various other fixes and cleanups By their nature many of these changes are risky - we tried to test them well on many different x86 systems (there are no known regressions), and they are split up finely to help bisection - but there's still a fair bit of residual risk left so caveat emptor" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits) perf/x86/64: Report regs_user->ax too in get_regs_user() perf/x86/64: Simplify regs_user->abi setting code in get_regs_user() perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user() perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user() x86/asm/entry/32: Tidy up JNZ instructions after TESTs x86/asm/entry/64: Reduce padding in execve stubs x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork x86/asm/entry/64: Simplify jumps in ret_from_fork x86/asm/entry/64: Remove a redundant jump x86/asm/entry/64: Optimize [v]fork/clone stubs x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat() x86/asm/entry/64: Use common code for rt_sigreturn() epilogue x86/asm/entry/64: Add forgotten CFI annotation x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout x86/asm/entry/64: Move opportunistic sysret code to syscall code path x86, selftests: Add sigreturn selftest x86/alternatives: Guard NOPs optimization x86/asm/entry: Clear EXTRA_REGS for all executable formats x86/signal: Remove pax argument from restore_sigcontext ...
This commit is contained in:
@@ -13,16 +13,6 @@
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
.macro SAVE reg
|
||||
pushl_cfi %\reg
|
||||
CFI_REL_OFFSET \reg, 0
|
||||
.endm
|
||||
|
||||
.macro RESTORE reg
|
||||
popl_cfi %\reg
|
||||
CFI_RESTORE \reg
|
||||
.endm
|
||||
|
||||
.macro read64 reg
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
|
||||
.macro addsub_return func ins insc
|
||||
ENTRY(atomic64_\func\()_return_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebp
|
||||
SAVE ebx
|
||||
SAVE esi
|
||||
SAVE edi
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edi
|
||||
|
||||
movl %eax, %esi
|
||||
movl %edx, %edi
|
||||
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
|
||||
10:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE edi
|
||||
RESTORE esi
|
||||
RESTORE ebx
|
||||
RESTORE ebp
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg ebp
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_\func\()_return_cx8)
|
||||
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
|
||||
.macro incdec_return func ins insc
|
||||
ENTRY(atomic64_\func\()_return_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
|
||||
10:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_\func\()_return_cx8)
|
||||
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
|
||||
|
||||
ENTRY(atomic64_dec_if_positive_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
|
||||
2:
|
||||
movl %ebx, %eax
|
||||
movl %ecx, %edx
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_dec_if_positive_cx8)
|
||||
|
||||
ENTRY(atomic64_add_unless_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebp
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg ebx
|
||||
/* these just push these two parameters on the stack */
|
||||
SAVE edi
|
||||
SAVE ecx
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg ecx
|
||||
|
||||
movl %eax, %ebp
|
||||
movl %edx, %edi
|
||||
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
|
||||
3:
|
||||
addl $8, %esp
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
RESTORE ebx
|
||||
RESTORE ebp
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg ebp
|
||||
ret
|
||||
4:
|
||||
cmpl %edx, 4(%esp)
|
||||
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
|
||||
|
||||
ENTRY(atomic64_inc_not_zero_cx8)
|
||||
CFI_STARTPROC
|
||||
SAVE ebx
|
||||
pushl_cfi_reg ebx
|
||||
|
||||
read64 %esi
|
||||
1:
|
||||
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
|
||||
|
||||
movl $1, %eax
|
||||
3:
|
||||
RESTORE ebx
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(atomic64_inc_not_zero_cx8)
|
||||
|
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
||||
*/
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: unsigned char *buff
|
||||
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
|
||||
6: addl %ecx,%eax
|
||||
adcl $0, %eax
|
||||
7:
|
||||
testl $1, 12(%esp)
|
||||
testb $1, 12(%esp)
|
||||
jz 8f
|
||||
roll $8, %eax
|
||||
8:
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
|
||||
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: const unsigned char *buf
|
||||
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
|
||||
addl %ebx,%eax
|
||||
adcl $0,%eax
|
||||
80:
|
||||
testl $1, 12(%esp)
|
||||
testb $1, 12(%esp)
|
||||
jz 90f
|
||||
roll $8, %eax
|
||||
90:
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
subl $4,%esp
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg ebx
|
||||
movl ARGBASE+16(%esp),%eax # sum
|
||||
movl ARGBASE+12(%esp),%ecx # len
|
||||
movl ARGBASE+4(%esp),%esi # src
|
||||
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
|
||||
|
||||
.previous
|
||||
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi %edi
|
||||
CFI_RESTORE edi
|
||||
popl_cfi_reg ebx
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg edi
|
||||
popl_cfi %ecx # equivalent to addl $4,%esp
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
|
||||
|
||||
ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %ebx
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl_cfi %edi
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl_cfi %esi
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi_reg esi
|
||||
movl ARGBASE+4(%esp),%esi #src
|
||||
movl ARGBASE+8(%esp),%edi #dst
|
||||
movl ARGBASE+12(%esp),%ecx #len
|
||||
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
|
||||
jmp 7b
|
||||
.previous
|
||||
|
||||
popl_cfi %esi
|
||||
CFI_RESTORE esi
|
||||
popl_cfi %edi
|
||||
CFI_RESTORE edi
|
||||
popl_cfi %ebx
|
||||
CFI_RESTORE ebx
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial_copy_generic)
|
||||
|
@@ -1,31 +1,35 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* Zero a page.
|
||||
* rdi page
|
||||
*/
|
||||
ENTRY(clear_page_c)
|
||||
* Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
|
||||
* recommended to use this when possible and we do use them by default.
|
||||
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
|
||||
* Otherwise, use original.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Zero a page.
|
||||
* %rdi - page
|
||||
*/
|
||||
ENTRY(clear_page)
|
||||
CFI_STARTPROC
|
||||
|
||||
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp clear_page_c_e", X86_FEATURE_ERMS
|
||||
|
||||
movl $4096/8,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosq
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c)
|
||||
ENDPROC(clear_page)
|
||||
|
||||
ENTRY(clear_page_c_e)
|
||||
ENTRY(clear_page_orig)
|
||||
CFI_STARTPROC
|
||||
movl $4096,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosb
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c_e)
|
||||
|
||||
ENTRY(clear_page)
|
||||
CFI_STARTPROC
|
||||
xorl %eax,%eax
|
||||
movl $4096/64,%ecx
|
||||
.p2align 4
|
||||
@@ -45,29 +49,13 @@ ENTRY(clear_page)
|
||||
nop
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
.Lclear_page_end:
|
||||
ENDPROC(clear_page)
|
||||
ENDPROC(clear_page_orig)
|
||||
|
||||
/*
|
||||
* Some CPUs support enhanced REP MOVSB/STOSB instructions.
|
||||
* It is recommended to use this when possible.
|
||||
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
|
||||
* Otherwise, use original function.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
1: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
|
||||
2: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
|
||||
3:
|
||||
.previous
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
|
||||
.Lclear_page_end-clear_page, 2b-1b
|
||||
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
|
||||
.Lclear_page_end-clear_page,3b-2b
|
||||
.previous
|
||||
ENTRY(clear_page_c_e)
|
||||
CFI_STARTPROC
|
||||
movl $4096,%ecx
|
||||
xorl %eax,%eax
|
||||
rep stosb
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(clear_page_c_e)
|
||||
|
@@ -2,23 +2,26 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* Some CPUs run faster using the string copy instructions (sane microcode).
|
||||
* It is also a lot simpler. Use this when possible. But, don't use streaming
|
||||
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
|
||||
* prefetch distance based on SMP/UP.
|
||||
*/
|
||||
ALIGN
|
||||
copy_page_rep:
|
||||
ENTRY(copy_page)
|
||||
CFI_STARTPROC
|
||||
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
|
||||
movl $4096/8, %ecx
|
||||
rep movsq
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page_rep)
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/*
|
||||
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
|
||||
* Could vary the prefetch distance based on SMP/UP.
|
||||
*/
|
||||
|
||||
ENTRY(copy_page)
|
||||
ENTRY(copy_page_regs)
|
||||
CFI_STARTPROC
|
||||
subq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 2*8
|
||||
@@ -90,21 +93,5 @@ ENTRY(copy_page)
|
||||
addq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -2*8
|
||||
ret
|
||||
.Lcopy_page_end:
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/* Some CPUs run faster using the string copy instructions.
|
||||
It is also a lot simpler. Use this when possible */
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
1: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
|
||||
2:
|
||||
.previous
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
|
||||
.Lcopy_page_end-copy_page, 2b-1b
|
||||
.previous
|
||||
ENDPROC(copy_page_regs)
|
||||
|
@@ -8,9 +8,6 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
#define FIX_ALIGNMENT 1
|
||||
|
||||
#include <asm/current.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
@@ -19,33 +16,7 @@
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
/*
|
||||
* By placing feature2 after feature1 in altinstructions section, we logically
|
||||
* implement:
|
||||
* If CPU has feature2, jmp to alt2 is used
|
||||
* else if CPU has feature1, jmp to alt1 is used
|
||||
* else jmp to orig is used.
|
||||
*/
|
||||
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
|
||||
0:
|
||||
.byte 0xe9 /* 32bit jump */
|
||||
.long \orig-1f /* by default jump to orig */
|
||||
1:
|
||||
.section .altinstr_replacement,"ax"
|
||||
2: .byte 0xe9 /* near jump with 32bit immediate */
|
||||
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
|
||||
3: .byte 0xe9 /* near jump with 32bit immediate */
|
||||
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
|
||||
.previous
|
||||
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry 0b,2b,\feature1,5,5
|
||||
altinstruction_entry 0b,3b,\feature2,5,5
|
||||
.previous
|
||||
.endm
|
||||
|
||||
.macro ALIGN_DESTINATION
|
||||
#ifdef FIX_ALIGNMENT
|
||||
/* check for bad alignment of destination */
|
||||
movl %edi,%ecx
|
||||
andl $7,%ecx
|
||||
@@ -67,7 +38,6 @@
|
||||
|
||||
_ASM_EXTABLE(100b,103b)
|
||||
_ASM_EXTABLE(101b,103b)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* Standard copy_to_user with segment limit checking */
|
||||
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
|
||||
jc bad_to_user
|
||||
cmpq TI_addr_limit(%rax),%rcx
|
||||
ja bad_to_user
|
||||
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
||||
copy_user_generic_unrolled,copy_user_generic_string, \
|
||||
copy_user_enhanced_fast_string
|
||||
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
|
||||
"jmp copy_user_generic_string", \
|
||||
X86_FEATURE_REP_GOOD, \
|
||||
"jmp copy_user_enhanced_fast_string", \
|
||||
X86_FEATURE_ERMS
|
||||
CFI_ENDPROC
|
||||
ENDPROC(_copy_to_user)
|
||||
|
||||
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
|
||||
jc bad_from_user
|
||||
cmpq TI_addr_limit(%rax),%rcx
|
||||
ja bad_from_user
|
||||
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
||||
copy_user_generic_unrolled,copy_user_generic_string, \
|
||||
copy_user_enhanced_fast_string
|
||||
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
|
||||
"jmp copy_user_generic_string", \
|
||||
X86_FEATURE_REP_GOOD, \
|
||||
"jmp copy_user_enhanced_fast_string", \
|
||||
X86_FEATURE_ERMS
|
||||
CFI_ENDPROC
|
||||
ENDPROC(_copy_from_user)
|
||||
|
||||
|
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
|
||||
|
||||
/* handle last odd byte */
|
||||
.Lhandle_1:
|
||||
testl $1, %r10d
|
||||
testb $1, %r10b
|
||||
jz .Lende
|
||||
xorl %ebx, %ebx
|
||||
source
|
||||
|
@@ -52,6 +52,13 @@
|
||||
*/
|
||||
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
|
||||
{
|
||||
/*
|
||||
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
|
||||
* even if the input buffer is long enough to hold them.
|
||||
*/
|
||||
if (buf_len > MAX_INSN_SIZE)
|
||||
buf_len = MAX_INSN_SIZE;
|
||||
|
||||
memset(insn, 0, sizeof(*insn));
|
||||
insn->kaddr = kaddr;
|
||||
insn->end_kaddr = kaddr + buf_len;
|
||||
@@ -164,6 +171,12 @@ found:
|
||||
/* VEX.W overrides opnd_size */
|
||||
insn->opnd_bytes = 8;
|
||||
} else {
|
||||
/*
|
||||
* For VEX2, fake VEX3-like byte#2.
|
||||
* Makes it easier to decode vex.W, vex.vvvv,
|
||||
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
|
||||
*/
|
||||
insn->vex_prefix.bytes[2] = b2 & 0x7f;
|
||||
insn->vex_prefix.nbytes = 2;
|
||||
insn->next_byte += 2;
|
||||
}
|
||||
|
@@ -1,11 +1,19 @@
|
||||
/* Copyright 2002 Andi Kleen */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* We build a jump to memcpy_orig by default which gets NOPped out on
|
||||
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
|
||||
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
|
||||
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
|
||||
*/
|
||||
|
||||
.weak memcpy
|
||||
|
||||
/*
|
||||
* memcpy - Copy a memory block.
|
||||
*
|
||||
@@ -17,15 +25,11 @@
|
||||
* Output:
|
||||
* rax original destination
|
||||
*/
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp memcpy_erms", X86_FEATURE_ERMS
|
||||
|
||||
/*
|
||||
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c:
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
shrq $3, %rcx
|
||||
@@ -34,29 +38,21 @@
|
||||
movl %edx, %ecx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e:
|
||||
.previous
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
|
||||
* memcpy_c. Use memcpy_c_e when possible.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
* memcpy_erms() - enhanced fast string memcpy. This is faster and
|
||||
* simpler than memcpy. Use memcpy_erms when possible.
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c_e:
|
||||
ENTRY(memcpy_erms)
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e_e:
|
||||
.previous
|
||||
ENDPROC(memcpy_erms)
|
||||
|
||||
.weak memcpy
|
||||
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ENTRY(memcpy_orig)
|
||||
CFI_STARTPROC
|
||||
movq %rdi, %rax
|
||||
|
||||
@@ -183,26 +179,4 @@ ENTRY(memcpy)
|
||||
.Lend:
|
||||
retq
|
||||
CFI_ENDPROC
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
|
||||
* If the feature is supported, memcpy_c_e() is the first choice.
|
||||
* If enhanced rep movsb copy is not available, use fast string copy
|
||||
* memcpy_c() when possible. This is faster and code is simpler than
|
||||
* original memcpy().
|
||||
* Otherwise, original memcpy() is used.
|
||||
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||
* feature to implement the right patch order.
|
||||
*
|
||||
* Replace only beginning, memcpy is used to apply alternatives,
|
||||
* so it is silly to overwrite itself with nops - reboot is the
|
||||
* only outcome...
|
||||
*/
|
||||
.section .altinstructions, "a"
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
|
||||
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
|
||||
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
|
||||
.previous
|
||||
ENDPROC(memcpy_orig)
|
||||
|
@@ -5,7 +5,6 @@
|
||||
* This assembly file is re-written from memmove_64.c file.
|
||||
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
|
||||
*/
|
||||
#define _STRING_C
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
@@ -44,6 +43,8 @@ ENTRY(__memmove)
|
||||
jg 2f
|
||||
|
||||
.Lmemmove_begin_forward:
|
||||
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
|
||||
|
||||
/*
|
||||
* movsq instruction have many startup latency
|
||||
* so we handle small size by general register.
|
||||
@@ -207,21 +208,5 @@ ENTRY(__memmove)
|
||||
13:
|
||||
retq
|
||||
CFI_ENDPROC
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
.Lmemmove_begin_forward_efs:
|
||||
/* Forward moving data. */
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
retq
|
||||
.Lmemmove_end_forward_efs:
|
||||
.previous
|
||||
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry .Lmemmove_begin_forward, \
|
||||
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
|
||||
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
|
||||
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
|
||||
.previous
|
||||
ENDPROC(__memmove)
|
||||
ENDPROC(memmove)
|
||||
|
@@ -5,19 +5,30 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
.weak memset
|
||||
|
||||
/*
|
||||
* ISO C memset - set a memory block to a byte value. This function uses fast
|
||||
* string to get better performance than the original function. The code is
|
||||
* simpler and shorter than the orignal function as well.
|
||||
*
|
||||
*
|
||||
* rdi destination
|
||||
* rsi value (char)
|
||||
* rdx count (bytes)
|
||||
*
|
||||
* rsi value (char)
|
||||
* rdx count (bytes)
|
||||
*
|
||||
* rax original destination
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemset_c:
|
||||
*/
|
||||
ENTRY(memset)
|
||||
ENTRY(__memset)
|
||||
/*
|
||||
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
|
||||
* to use it when possible. If not available, use fast string instructions.
|
||||
*
|
||||
* Otherwise, use original memset function.
|
||||
*/
|
||||
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp memset_erms", X86_FEATURE_ERMS
|
||||
|
||||
movq %rdi,%r9
|
||||
movq %rdx,%rcx
|
||||
andl $7,%edx
|
||||
@@ -31,8 +42,8 @@
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
.Lmemset_e:
|
||||
.previous
|
||||
ENDPROC(memset)
|
||||
ENDPROC(__memset)
|
||||
|
||||
/*
|
||||
* ISO C memset - set a memory block to a byte value. This function uses
|
||||
@@ -45,21 +56,16 @@
|
||||
*
|
||||
* rax original destination
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemset_c_e:
|
||||
ENTRY(memset_erms)
|
||||
movq %rdi,%r9
|
||||
movb %sil,%al
|
||||
movq %rdx,%rcx
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
.Lmemset_e_e:
|
||||
.previous
|
||||
ENDPROC(memset_erms)
|
||||
|
||||
.weak memset
|
||||
|
||||
ENTRY(memset)
|
||||
ENTRY(__memset)
|
||||
ENTRY(memset_orig)
|
||||
CFI_STARTPROC
|
||||
movq %rdi,%r10
|
||||
|
||||
@@ -134,23 +140,4 @@ ENTRY(__memset)
|
||||
jmp .Lafter_bad_alignment
|
||||
.Lfinal:
|
||||
CFI_ENDPROC
|
||||
ENDPROC(memset)
|
||||
ENDPROC(__memset)
|
||||
|
||||
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
|
||||
* It is recommended to use this when possible.
|
||||
*
|
||||
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
|
||||
* instructions.
|
||||
*
|
||||
* Otherwise, use original memset function.
|
||||
*
|
||||
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||
* feature to implement the right patch order.
|
||||
*/
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
|
||||
.Lfinal-__memset,.Lmemset_e-.Lmemset_c
|
||||
altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
|
||||
.Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
|
||||
.previous
|
||||
ENDPROC(memset_orig)
|
||||
|
@@ -14,8 +14,8 @@
|
||||
.macro op_safe_regs op
|
||||
ENTRY(\op\()_safe_regs)
|
||||
CFI_STARTPROC
|
||||
pushq_cfi %rbx
|
||||
pushq_cfi %rbp
|
||||
pushq_cfi_reg rbx
|
||||
pushq_cfi_reg rbp
|
||||
movq %rdi, %r10 /* Save pointer */
|
||||
xorl %r11d, %r11d /* Return value */
|
||||
movl (%rdi), %eax
|
||||
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
|
||||
movl %ebp, 20(%r10)
|
||||
movl %esi, 24(%r10)
|
||||
movl %edi, 28(%r10)
|
||||
popq_cfi %rbp
|
||||
popq_cfi %rbx
|
||||
popq_cfi_reg rbp
|
||||
popq_cfi_reg rbx
|
||||
ret
|
||||
3:
|
||||
CFI_RESTORE_STATE
|
||||
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
|
||||
.macro op_safe_regs op
|
||||
ENTRY(\op\()_safe_regs)
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %ebx
|
||||
pushl_cfi %ebp
|
||||
pushl_cfi %esi
|
||||
pushl_cfi %edi
|
||||
pushl_cfi_reg ebx
|
||||
pushl_cfi_reg ebp
|
||||
pushl_cfi_reg esi
|
||||
pushl_cfi_reg edi
|
||||
pushl_cfi $0 /* Return value */
|
||||
pushl_cfi %eax
|
||||
movl 4(%eax), %ecx
|
||||
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
|
||||
movl %esi, 24(%eax)
|
||||
movl %edi, 28(%eax)
|
||||
popl_cfi %eax
|
||||
popl_cfi %edi
|
||||
popl_cfi %esi
|
||||
popl_cfi %ebp
|
||||
popl_cfi %ebx
|
||||
popl_cfi_reg edi
|
||||
popl_cfi_reg esi
|
||||
popl_cfi_reg ebp
|
||||
popl_cfi_reg ebx
|
||||
ret
|
||||
3:
|
||||
CFI_RESTORE_STATE
|
||||
|
@@ -34,10 +34,10 @@
|
||||
*/
|
||||
|
||||
#define save_common_regs \
|
||||
pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi_reg ecx
|
||||
|
||||
#define restore_common_regs \
|
||||
popl_cfi %ecx; CFI_RESTORE ecx
|
||||
popl_cfi_reg ecx
|
||||
|
||||
/* Avoid uglifying the argument copying x86-64 needs to do. */
|
||||
.macro movq src, dst
|
||||
@@ -64,22 +64,22 @@
|
||||
*/
|
||||
|
||||
#define save_common_regs \
|
||||
pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
|
||||
pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
|
||||
pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
|
||||
pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
|
||||
pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
|
||||
pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
|
||||
pushq_cfi %r11; CFI_REL_OFFSET r11, 0
|
||||
pushq_cfi_reg rdi; \
|
||||
pushq_cfi_reg rsi; \
|
||||
pushq_cfi_reg rcx; \
|
||||
pushq_cfi_reg r8; \
|
||||
pushq_cfi_reg r9; \
|
||||
pushq_cfi_reg r10; \
|
||||
pushq_cfi_reg r11
|
||||
|
||||
#define restore_common_regs \
|
||||
popq_cfi %r11; CFI_RESTORE r11; \
|
||||
popq_cfi %r10; CFI_RESTORE r10; \
|
||||
popq_cfi %r9; CFI_RESTORE r9; \
|
||||
popq_cfi %r8; CFI_RESTORE r8; \
|
||||
popq_cfi %rcx; CFI_RESTORE rcx; \
|
||||
popq_cfi %rsi; CFI_RESTORE rsi; \
|
||||
popq_cfi %rdi; CFI_RESTORE rdi
|
||||
popq_cfi_reg r11; \
|
||||
popq_cfi_reg r10; \
|
||||
popq_cfi_reg r9; \
|
||||
popq_cfi_reg r8; \
|
||||
popq_cfi_reg rcx; \
|
||||
popq_cfi_reg rsi; \
|
||||
popq_cfi_reg rdi
|
||||
|
||||
#endif
|
||||
|
||||
@@ -87,12 +87,10 @@
|
||||
ENTRY(call_rwsem_down_read_failed)
|
||||
CFI_STARTPROC
|
||||
save_common_regs
|
||||
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
|
||||
CFI_REL_OFFSET __ASM_REG(dx), 0
|
||||
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
|
||||
movq %rax,%rdi
|
||||
call rwsem_down_read_failed
|
||||
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
|
||||
CFI_RESTORE __ASM_REG(dx)
|
||||
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
|
||||
restore_common_regs
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
|
||||
ENTRY(call_rwsem_downgrade_wake)
|
||||
CFI_STARTPROC
|
||||
save_common_regs
|
||||
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
|
||||
CFI_REL_OFFSET __ASM_REG(dx), 0
|
||||
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
|
||||
movq %rax,%rdi
|
||||
call rwsem_downgrade_wake
|
||||
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
|
||||
CFI_RESTORE __ASM_REG(dx)
|
||||
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
|
||||
restore_common_regs
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
|
@@ -13,12 +13,9 @@
|
||||
.globl \name
|
||||
\name:
|
||||
CFI_STARTPROC
|
||||
pushl_cfi %eax
|
||||
CFI_REL_OFFSET eax, 0
|
||||
pushl_cfi %ecx
|
||||
CFI_REL_OFFSET ecx, 0
|
||||
pushl_cfi %edx
|
||||
CFI_REL_OFFSET edx, 0
|
||||
pushl_cfi_reg eax
|
||||
pushl_cfi_reg ecx
|
||||
pushl_cfi_reg edx
|
||||
|
||||
.if \put_ret_addr_in_eax
|
||||
/* Place EIP in the arg1 */
|
||||
@@ -26,12 +23,9 @@
|
||||
.endif
|
||||
|
||||
call \func
|
||||
popl_cfi %edx
|
||||
CFI_RESTORE edx
|
||||
popl_cfi %ecx
|
||||
CFI_RESTORE ecx
|
||||
popl_cfi %eax
|
||||
CFI_RESTORE eax
|
||||
popl_cfi_reg edx
|
||||
popl_cfi_reg ecx
|
||||
popl_cfi_reg eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
_ASM_NOKPROBE(\name)
|
||||
|
@@ -17,9 +17,18 @@
|
||||
CFI_STARTPROC
|
||||
|
||||
/* this one pushes 9 elems, the next one would be %rIP */
|
||||
SAVE_ARGS
|
||||
pushq_cfi_reg rdi
|
||||
pushq_cfi_reg rsi
|
||||
pushq_cfi_reg rdx
|
||||
pushq_cfi_reg rcx
|
||||
pushq_cfi_reg rax
|
||||
pushq_cfi_reg r8
|
||||
pushq_cfi_reg r9
|
||||
pushq_cfi_reg r10
|
||||
pushq_cfi_reg r11
|
||||
|
||||
.if \put_ret_addr_in_rdi
|
||||
/* 9*8(%rsp) is return addr on stack */
|
||||
movq_cfi_restore 9*8, rdi
|
||||
.endif
|
||||
|
||||
@@ -45,11 +54,22 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) \
|
||||
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|
||||
|| defined(CONFIG_PREEMPT)
|
||||
CFI_STARTPROC
|
||||
SAVE_ARGS
|
||||
CFI_ADJUST_CFA_OFFSET 9*8
|
||||
restore:
|
||||
RESTORE_ARGS
|
||||
popq_cfi_reg r11
|
||||
popq_cfi_reg r10
|
||||
popq_cfi_reg r9
|
||||
popq_cfi_reg r8
|
||||
popq_cfi_reg rax
|
||||
popq_cfi_reg rcx
|
||||
popq_cfi_reg rdx
|
||||
popq_cfi_reg rsi
|
||||
popq_cfi_reg rdi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
_ASM_NOKPROBE(restore)
|
||||
#endif
|
||||
|
@@ -273,6 +273,9 @@ dd: ESC
|
||||
de: ESC
|
||||
df: ESC
|
||||
# 0xe0 - 0xef
|
||||
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
|
||||
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
|
||||
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
|
||||
e0: LOOPNE/LOOPNZ Jb (f64)
|
||||
e1: LOOPE/LOOPZ Jb (f64)
|
||||
e2: LOOP Jb (f64)
|
||||
@@ -281,6 +284,10 @@ e4: IN AL,Ib
|
||||
e5: IN eAX,Ib
|
||||
e6: OUT Ib,AL
|
||||
e7: OUT Ib,eAX
|
||||
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
|
||||
# in "near" jumps and calls is 16-bit. For CALL,
|
||||
# push of return address is 16-bit wide, RSP is decremented by 2
|
||||
# but is not truncated to 16 bits, unlike RIP.
|
||||
e8: CALL Jz (f64)
|
||||
e9: JMP-near Jz (f64)
|
||||
ea: JMP-far Ap (i64)
|
||||
@@ -456,6 +463,7 @@ AVXcode: 1
|
||||
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
|
||||
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
|
||||
# 0x0f 0x80-0x8f
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
80: JO Jz (f64)
|
||||
81: JNO Jz (f64)
|
||||
82: JB/JC/JNAE Jz (f64)
|
||||
@@ -842,6 +850,7 @@ EndTable
|
||||
GrpTable: Grp5
|
||||
0: INC Ev
|
||||
1: DEC Ev
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
2: CALLN Ev (f64)
|
||||
3: CALLF Ep
|
||||
4: JMPN Ev (f64)
|
||||
|
Reference in New Issue
Block a user