Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm changes from Ingo Molnar:
 "There were lots of changes in this development cycle:

   - over 100 separate cleanups, restructuring changes, speedups and
     fixes in the x86 system call, irq, trap and other entry code, part
     of a heroic effort to deobfuscate a decade old spaghetti asm code
     and its C code dependencies (Denys Vlasenko, Andy Lutomirski)

   - alternatives code fixes and enhancements (Borislav Petkov)

   - simplifications and cleanups to the compat code (Brian Gerst)

   - signal handling fixes and new x86 testcases (Andy Lutomirski)

   - various other fixes and cleanups

  By their nature many of these changes are risky - we tried to test
  them well on many different x86 systems (there are no known
  regressions), and they are split up finely to help bisection - but
  there's still a fair bit of residual risk left so caveat emptor"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
  perf/x86/64: Report regs_user->ax too in get_regs_user()
  perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
  perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
  perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
  x86/asm/entry/32: Tidy up JNZ instructions after TESTs
  x86/asm/entry/64: Reduce padding in execve stubs
  x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
  x86/asm/entry/64: Simplify jumps in ret_from_fork
  x86/asm/entry/64: Remove a redundant jump
  x86/asm/entry/64: Optimize [v]fork/clone stubs
  x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
  x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
  x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
  x86/asm/entry/64: Add forgotten CFI annotation
  x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
  x86/asm/entry/64: Move opportunistic sysret code to syscall code path
  x86, selftests: Add sigreturn selftest
  x86/alternatives: Guard NOPs optimization
  x86/asm/entry: Clear EXTRA_REGS for all executable formats
  x86/signal: Remove pax argument from restore_sigcontext
  ...
This commit is contained in:
Linus Torvalds
2015-04-13 13:16:36 -07:00
121 changed files with 3084 additions and 2104 deletions

View File

@@ -13,16 +13,6 @@
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>
.macro SAVE reg
pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
popl_cfi %\reg
CFI_RESTORE \reg
.endm
.macro read64 reg
movl %ebx, %eax
movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
.macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
SAVE ebp
SAVE ebx
SAVE esi
SAVE edi
pushl_cfi_reg ebp
pushl_cfi_reg ebx
pushl_cfi_reg esi
pushl_cfi_reg edi
movl %eax, %esi
movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
RESTORE edi
RESTORE esi
RESTORE ebx
RESTORE ebp
popl_cfi_reg edi
popl_cfi_reg esi
popl_cfi_reg ebx
popl_cfi_reg ebp
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
.macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
ENTRY(atomic64_dec_if_positive_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
2:
movl %ebx, %eax
movl %ecx, %edx
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8)
ENTRY(atomic64_add_unless_cx8)
CFI_STARTPROC
SAVE ebp
SAVE ebx
pushl_cfi_reg ebp
pushl_cfi_reg ebx
/* these just push these two parameters on the stack */
SAVE edi
SAVE ecx
pushl_cfi_reg edi
pushl_cfi_reg ecx
movl %eax, %ebp
movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
3:
addl $8, %esp
CFI_ADJUST_CFA_OFFSET -8
RESTORE ebx
RESTORE ebp
popl_cfi_reg ebx
popl_cfi_reg ebp
ret
4:
cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
ENTRY(atomic64_inc_not_zero_cx8)
CFI_STARTPROC
SAVE ebx
pushl_cfi_reg ebx
read64 %esi
1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
RESTORE ebx
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)

View File

@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
6: addl %ecx,%eax
adcl $0, %eax
7:
testl $1, 12(%esp)
testb $1, 12(%esp)
jz 8f
roll $8, %eax
8:
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi_reg ebx
popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
addl %ebx,%eax
adcl $0,%eax
80:
testl $1, 12(%esp)
testb $1, 12(%esp)
jz 90f
roll $8, %eax
90:
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi_reg ebx
popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi_reg edi
pushl_cfi_reg esi
pushl_cfi_reg ebx
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
.previous
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi %esi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi_reg ebx
popl_cfi_reg esi
popl_cfi_reg edi
popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
pushl_cfi %edi
CFI_REL_OFFSET edi, 0
pushl_cfi %esi
CFI_REL_OFFSET esi, 0
pushl_cfi_reg ebx
pushl_cfi_reg edi
pushl_cfi_reg esi
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
popl_cfi %esi
CFI_RESTORE esi
popl_cfi %edi
CFI_RESTORE edi
popl_cfi %ebx
CFI_RESTORE ebx
popl_cfi_reg esi
popl_cfi_reg edi
popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)

View File

@@ -1,31 +1,35 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* Zero a page.
* rdi page
*/
ENTRY(clear_page_c)
* Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
* recommended to use this when possible and we do use them by default.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original.
*/
/*
* Zero a page.
* %rdi - page
*/
ENTRY(clear_page)
CFI_STARTPROC
ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
"jmp clear_page_c_e", X86_FEATURE_ERMS
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
ENDPROC(clear_page_c)
ENDPROC(clear_page)
ENTRY(clear_page_c_e)
ENTRY(clear_page_orig)
CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
nop
ret
CFI_ENDPROC
.Lclear_page_end:
ENDPROC(clear_page)
ENDPROC(clear_page_orig)
/*
* Some CPUs support enhanced REP MOVSB/STOSB instructions.
* It is recommended to use this when possible.
* If enhanced REP MOVSB/STOSB is not available, try to use fast string.
* Otherwise, use original function.
*
*/
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
3:
.previous
.section .altinstructions,"a"
altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
.Lclear_page_end-clear_page, 2b-1b
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
.Lclear_page_end-clear_page,3b-2b
.previous
ENTRY(clear_page_c_e)
CFI_STARTPROC
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
CFI_ENDPROC
ENDPROC(clear_page_c_e)

View File

@@ -2,23 +2,26 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
* Some CPUs run faster using the string copy instructions (sane microcode).
* It is also a lot simpler. Use this when possible. But, don't use streaming
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
* prefetch distance based on SMP/UP.
*/
ALIGN
copy_page_rep:
ENTRY(copy_page)
CFI_STARTPROC
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
CFI_ENDPROC
ENDPROC(copy_page_rep)
ENDPROC(copy_page)
/*
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
* Could vary the prefetch distance based on SMP/UP.
*/
ENTRY(copy_page)
ENTRY(copy_page_regs)
CFI_STARTPROC
subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC
ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
.Lcopy_page_end-copy_page, 2b-1b
.previous
ENDPROC(copy_page_regs)

View File

@@ -8,9 +8,6 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
@@ -19,33 +16,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
/*
* By placing feature2 after feature1 in altinstructions section, we logically
* implement:
* If CPU has feature2, jmp to alt2 is used
* else if CPU has feature1, jmp to alt1 is used
* else jmp to orig is used.
*/
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
3: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
.section .altinstructions,"a"
altinstruction_entry 0b,2b,\feature1,5,5
altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
@@ -67,7 +38,6 @@
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
#endif
.endm
/* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
copy_user_generic_unrolled,copy_user_generic_string, \
copy_user_enhanced_fast_string
ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
"jmp copy_user_generic_string", \
X86_FEATURE_REP_GOOD, \
"jmp copy_user_enhanced_fast_string", \
X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_from_user)

View File

@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
/* handle last odd byte */
.Lhandle_1:
testl $1, %r10d
testb $1, %r10b
jz .Lende
xorl %ebx, %ebx
source

View File

@@ -52,6 +52,13 @@
*/
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
{
/*
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
* even if the input buffer is long enough to hold them.
*/
if (buf_len > MAX_INSN_SIZE)
buf_len = MAX_INSN_SIZE;
memset(insn, 0, sizeof(*insn));
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
* For VEX2, fake VEX3-like byte#2.
* Makes it easier to decode vex.W, vex.vvvv,
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
*/
insn->vex_prefix.bytes[2] = b2 & 0x7f;
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}

View File

@@ -1,11 +1,19 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
.weak memcpy
/*
* memcpy - Copy a memory block.
*
@@ -17,15 +25,11 @@
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
@@ -34,29 +38,21 @@
movl %edx, %ecx
rep movsb
ret
.Lmemcpy_e:
.previous
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
* memcpy_c. Use memcpy_c_e when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
ENTRY(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
.previous
ENDPROC(memcpy_erms)
.weak memcpy
ENTRY(__memcpy)
ENTRY(memcpy)
ENTRY(memcpy_orig)
CFI_STARTPROC
movq %rdi, %rax
@@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend:
retq
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
.previous
ENDPROC(memcpy_orig)

View File

@@ -5,7 +5,6 @@
* This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
jg 2f
.Lmemmove_begin_forward:
ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
13:
retq
CFI_ENDPROC
.section .altinstr_replacement,"ax"
.Lmemmove_begin_forward_efs:
/* Forward moving data. */
movq %rdx, %rcx
rep movsb
retq
.Lmemmove_end_forward_efs:
.previous
.section .altinstructions,"a"
altinstruction_entry .Lmemmove_begin_forward, \
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
.previous
ENDPROC(__memmove)
ENDPROC(memmove)

View File

@@ -5,19 +5,30 @@
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
.weak memset
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
*
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
*/
ENTRY(memset)
ENTRY(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
*
* Otherwise, use original memset function.
*/
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
@@ -31,8 +42,8 @@
rep stosb
movq %r9,%rax
ret
.Lmemset_e:
.previous
ENDPROC(memset)
ENDPROC(__memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
*
* rax original destination
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c_e:
ENTRY(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
.Lmemset_e_e:
.previous
ENDPROC(memset_erms)
.weak memset
ENTRY(memset)
ENTRY(__memset)
ENTRY(memset_orig)
CFI_STARTPROC
movq %rdi,%r10
@@ -134,23 +140,4 @@ ENTRY(__memset)
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
* It is recommended to use this when possible.
*
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
* instructions.
*
* Otherwise, use original memset function.
*
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*/
.section .altinstructions,"a"
altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
.Lfinal-__memset,.Lmemset_e-.Lmemset_c
altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
.Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
.previous
ENDPROC(memset_orig)

View File

@@ -14,8 +14,8 @@
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
pushq_cfi %rbx
pushq_cfi %rbp
pushq_cfi_reg rbx
pushq_cfi_reg rbp
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
movl %ebp, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
popq_cfi %rbp
popq_cfi %rbx
popq_cfi_reg rbp
popq_cfi_reg rbx
ret
3:
CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
pushl_cfi %ebx
pushl_cfi %ebp
pushl_cfi %esi
pushl_cfi %edi
pushl_cfi_reg ebx
pushl_cfi_reg ebp
pushl_cfi_reg esi
pushl_cfi_reg edi
pushl_cfi $0 /* Return value */
pushl_cfi %eax
movl 4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
movl %esi, 24(%eax)
movl %edi, 28(%eax)
popl_cfi %eax
popl_cfi %edi
popl_cfi %esi
popl_cfi %ebp
popl_cfi %ebx
popl_cfi_reg edi
popl_cfi_reg esi
popl_cfi_reg ebp
popl_cfi_reg ebx
ret
3:
CFI_RESTORE_STATE

View File

@@ -34,10 +34,10 @@
*/
#define save_common_regs \
pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
pushl_cfi_reg ecx
#define restore_common_regs \
popl_cfi %ecx; CFI_RESTORE ecx
popl_cfi_reg ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst
@@ -64,22 +64,22 @@
*/
#define save_common_regs \
pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
pushq_cfi %r11; CFI_REL_OFFSET r11, 0
pushq_cfi_reg rdi; \
pushq_cfi_reg rsi; \
pushq_cfi_reg rcx; \
pushq_cfi_reg r8; \
pushq_cfi_reg r9; \
pushq_cfi_reg r10; \
pushq_cfi_reg r11
#define restore_common_regs \
popq_cfi %r11; CFI_RESTORE r11; \
popq_cfi %r10; CFI_RESTORE r10; \
popq_cfi %r9; CFI_RESTORE r9; \
popq_cfi %r8; CFI_RESTORE r8; \
popq_cfi %rcx; CFI_RESTORE rcx; \
popq_cfi %rsi; CFI_RESTORE rsi; \
popq_cfi %rdi; CFI_RESTORE rdi
popq_cfi_reg r11; \
popq_cfi_reg r10; \
popq_cfi_reg r9; \
popq_cfi_reg r8; \
popq_cfi_reg rcx; \
popq_cfi_reg rsi; \
popq_cfi_reg rdi
#endif
@@ -87,12 +87,10 @@
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
save_common_regs
__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
CFI_REL_OFFSET __ASM_REG(dx), 0
__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_downgrade_wake
__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
CFI_RESTORE __ASM_REG(dx)
__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC

View File

@@ -13,12 +13,9 @@
.globl \name
\name:
CFI_STARTPROC
pushl_cfi %eax
CFI_REL_OFFSET eax, 0
pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
pushl_cfi %edx
CFI_REL_OFFSET edx, 0
pushl_cfi_reg eax
pushl_cfi_reg ecx
pushl_cfi_reg edx
.if \put_ret_addr_in_eax
/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
.endif
call \func
popl_cfi %edx
CFI_RESTORE edx
popl_cfi %ecx
CFI_RESTORE ecx
popl_cfi %eax
CFI_RESTORE eax
popl_cfi_reg edx
popl_cfi_reg ecx
popl_cfi_reg eax
ret
CFI_ENDPROC
_ASM_NOKPROBE(\name)

View File

@@ -17,9 +17,18 @@
CFI_STARTPROC
/* this one pushes 9 elems, the next one would be %rIP */
SAVE_ARGS
pushq_cfi_reg rdi
pushq_cfi_reg rsi
pushq_cfi_reg rdx
pushq_cfi_reg rcx
pushq_cfi_reg rax
pushq_cfi_reg r8
pushq_cfi_reg r9
pushq_cfi_reg r10
pushq_cfi_reg r11
.if \put_ret_addr_in_rdi
/* 9*8(%rsp) is return addr on stack */
movq_cfi_restore 9*8, rdi
.endif
@@ -45,11 +54,22 @@
#endif
#endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
CFI_STARTPROC
SAVE_ARGS
CFI_ADJUST_CFA_OFFSET 9*8
restore:
RESTORE_ARGS
popq_cfi_reg r11
popq_cfi_reg r10
popq_cfi_reg r9
popq_cfi_reg r8
popq_cfi_reg rax
popq_cfi_reg rcx
popq_cfi_reg rdx
popq_cfi_reg rsi
popq_cfi_reg rdi
ret
CFI_ENDPROC
_ASM_NOKPROBE(restore)
#endif

View File

@@ -273,6 +273,9 @@ dd: ESC
de: ESC
df: ESC
# 0xe0 - 0xef
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
e0: LOOPNE/LOOPNZ Jb (f64)
e1: LOOPE/LOOPZ Jb (f64)
e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
e5: IN eAX,Ib
e6: OUT Ib,AL
e7: OUT Ib,eAX
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
e8: CALL Jz (f64)
e9: JMP-near Jz (f64)
ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64)
81: JNO Jz (f64)
82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
GrpTable: Grp5
0: INC Ev
1: DEC Ev
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)