Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: Include probe_roms.h in probe_roms.c x86/32: Print control and debug registers for kerenel context x86: Tighten dependencies of CPU_SUP_*_32 x86/numa: Improve internode cache alignment x86: Fix the NMI nesting comments x86-64: Improve insn scheduling in SAVE_ARGS_IRQ x86-64: Fix CFI annotations for NMI nesting code bitops: Add missing parentheses to new get_order macro bitops: Optimise get_order() bitops: Adjust the comment on get_order() to describe the size==0 case x86/spinlocks: Eliminate TICKET_MASK x86-64: Handle byte-wise tail copying in memcpy() without a loop x86-64: Fix memcpy() to support sizes of 4Gb and above x86-64: Fix memset() to support sizes of 4Gb and above x86-64: Slightly shorten copy_page()
This commit is contained in:
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
|
||||
|
||||
ENTRY(copy_page)
|
||||
CFI_STARTPROC
|
||||
subq $3*8,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET 3*8
|
||||
subq $2*8,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET 2*8
|
||||
movq %rbx,(%rsp)
|
||||
CFI_REL_OFFSET rbx, 0
|
||||
movq %r12,1*8(%rsp)
|
||||
CFI_REL_OFFSET r12, 1*8
|
||||
movq %r13,2*8(%rsp)
|
||||
CFI_REL_OFFSET r13, 2*8
|
||||
|
||||
movl $(4096/64)-5,%ecx
|
||||
.p2align 4
|
||||
@@ -91,10 +89,8 @@ ENTRY(copy_page)
|
||||
CFI_RESTORE rbx
|
||||
movq 1*8(%rsp),%r12
|
||||
CFI_RESTORE r12
|
||||
movq 2*8(%rsp),%r13
|
||||
CFI_RESTORE r13
|
||||
addq $3*8,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET -3*8
|
||||
addq $2*8,%rsp
|
||||
CFI_ADJUST_CFA_OFFSET -2*8
|
||||
ret
|
||||
.Lcopy_page_end:
|
||||
CFI_ENDPROC
|
||||
|
@@ -27,9 +27,8 @@
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c:
|
||||
movq %rdi, %rax
|
||||
|
||||
movl %edx, %ecx
|
||||
shrl $3, %ecx
|
||||
movq %rdx, %rcx
|
||||
shrq $3, %rcx
|
||||
andl $7, %edx
|
||||
rep movsq
|
||||
movl %edx, %ecx
|
||||
@@ -48,8 +47,7 @@
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c_e:
|
||||
movq %rdi, %rax
|
||||
|
||||
movl %edx, %ecx
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e_e:
|
||||
@@ -60,10 +58,7 @@ ENTRY(memcpy)
|
||||
CFI_STARTPROC
|
||||
movq %rdi, %rax
|
||||
|
||||
/*
|
||||
* Use 32bit CMP here to avoid long NOP padding.
|
||||
*/
|
||||
cmp $0x20, %edx
|
||||
cmpq $0x20, %rdx
|
||||
jb .Lhandle_tail
|
||||
|
||||
/*
|
||||
@@ -72,7 +67,7 @@ ENTRY(memcpy)
|
||||
*/
|
||||
cmp %dil, %sil
|
||||
jl .Lcopy_backward
|
||||
subl $0x20, %edx
|
||||
subq $0x20, %rdx
|
||||
.Lcopy_forward_loop:
|
||||
subq $0x20, %rdx
|
||||
|
||||
@@ -91,7 +86,7 @@ ENTRY(memcpy)
|
||||
movq %r11, 3*8(%rdi)
|
||||
leaq 4*8(%rdi), %rdi
|
||||
jae .Lcopy_forward_loop
|
||||
addq $0x20, %rdx
|
||||
addl $0x20, %edx
|
||||
jmp .Lhandle_tail
|
||||
|
||||
.Lcopy_backward:
|
||||
@@ -123,11 +118,11 @@ ENTRY(memcpy)
|
||||
/*
|
||||
* Calculate copy position to head.
|
||||
*/
|
||||
addq $0x20, %rdx
|
||||
addl $0x20, %edx
|
||||
subq %rdx, %rsi
|
||||
subq %rdx, %rdi
|
||||
.Lhandle_tail:
|
||||
cmpq $16, %rdx
|
||||
cmpl $16, %edx
|
||||
jb .Lless_16bytes
|
||||
|
||||
/*
|
||||
@@ -144,7 +139,7 @@ ENTRY(memcpy)
|
||||
retq
|
||||
.p2align 4
|
||||
.Lless_16bytes:
|
||||
cmpq $8, %rdx
|
||||
cmpl $8, %edx
|
||||
jb .Lless_8bytes
|
||||
/*
|
||||
* Move data from 8 bytes to 15 bytes.
|
||||
@@ -156,7 +151,7 @@ ENTRY(memcpy)
|
||||
retq
|
||||
.p2align 4
|
||||
.Lless_8bytes:
|
||||
cmpq $4, %rdx
|
||||
cmpl $4, %edx
|
||||
jb .Lless_3bytes
|
||||
|
||||
/*
|
||||
@@ -169,18 +164,19 @@ ENTRY(memcpy)
|
||||
retq
|
||||
.p2align 4
|
||||
.Lless_3bytes:
|
||||
cmpl $0, %edx
|
||||
je .Lend
|
||||
subl $1, %edx
|
||||
jb .Lend
|
||||
/*
|
||||
* Move data from 1 bytes to 3 bytes.
|
||||
*/
|
||||
.Lloop_1:
|
||||
movb (%rsi), %r8b
|
||||
movb %r8b, (%rdi)
|
||||
incq %rdi
|
||||
incq %rsi
|
||||
decl %edx
|
||||
jnz .Lloop_1
|
||||
movzbl (%rsi), %ecx
|
||||
jz .Lstore_1byte
|
||||
movzbq 1(%rsi), %r8
|
||||
movzbq (%rsi, %rdx), %r9
|
||||
movb %r8b, 1(%rdi)
|
||||
movb %r9b, (%rdi, %rdx)
|
||||
.Lstore_1byte:
|
||||
movb %cl, (%rdi)
|
||||
|
||||
.Lend:
|
||||
retq
|
||||
|
@@ -19,16 +19,15 @@
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemset_c:
|
||||
movq %rdi,%r9
|
||||
movl %edx,%r8d
|
||||
andl $7,%r8d
|
||||
movl %edx,%ecx
|
||||
shrl $3,%ecx
|
||||
movq %rdx,%rcx
|
||||
andl $7,%edx
|
||||
shrq $3,%rcx
|
||||
/* expand byte value */
|
||||
movzbl %sil,%esi
|
||||
movabs $0x0101010101010101,%rax
|
||||
mulq %rsi /* with rax, clobbers rdx */
|
||||
imulq %rsi,%rax
|
||||
rep stosq
|
||||
movl %r8d,%ecx
|
||||
movl %edx,%ecx
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
@@ -50,7 +49,7 @@
|
||||
.Lmemset_c_e:
|
||||
movq %rdi,%r9
|
||||
movb %sil,%al
|
||||
movl %edx,%ecx
|
||||
movq %rdx,%rcx
|
||||
rep stosb
|
||||
movq %r9,%rax
|
||||
ret
|
||||
@@ -61,12 +60,11 @@ ENTRY(memset)
|
||||
ENTRY(__memset)
|
||||
CFI_STARTPROC
|
||||
movq %rdi,%r10
|
||||
movq %rdx,%r11
|
||||
|
||||
/* expand byte value */
|
||||
movzbl %sil,%ecx
|
||||
movabs $0x0101010101010101,%rax
|
||||
mul %rcx /* with rax, clobbers rdx */
|
||||
imulq %rcx,%rax
|
||||
|
||||
/* align dst */
|
||||
movl %edi,%r9d
|
||||
@@ -75,13 +73,13 @@ ENTRY(__memset)
|
||||
CFI_REMEMBER_STATE
|
||||
.Lafter_bad_alignment:
|
||||
|
||||
movl %r11d,%ecx
|
||||
shrl $6,%ecx
|
||||
movq %rdx,%rcx
|
||||
shrq $6,%rcx
|
||||
jz .Lhandle_tail
|
||||
|
||||
.p2align 4
|
||||
.Lloop_64:
|
||||
decl %ecx
|
||||
decq %rcx
|
||||
movq %rax,(%rdi)
|
||||
movq %rax,8(%rdi)
|
||||
movq %rax,16(%rdi)
|
||||
@@ -97,7 +95,7 @@ ENTRY(__memset)
|
||||
to predict jump tables. */
|
||||
.p2align 4
|
||||
.Lhandle_tail:
|
||||
movl %r11d,%ecx
|
||||
movl %edx,%ecx
|
||||
andl $63&(~7),%ecx
|
||||
jz .Lhandle_7
|
||||
shrl $3,%ecx
|
||||
@@ -109,12 +107,11 @@ ENTRY(__memset)
|
||||
jnz .Lloop_8
|
||||
|
||||
.Lhandle_7:
|
||||
movl %r11d,%ecx
|
||||
andl $7,%ecx
|
||||
andl $7,%edx
|
||||
jz .Lende
|
||||
.p2align 4
|
||||
.Lloop_1:
|
||||
decl %ecx
|
||||
decl %edx
|
||||
movb %al,(%rdi)
|
||||
leaq 1(%rdi),%rdi
|
||||
jnz .Lloop_1
|
||||
@@ -125,13 +122,13 @@ ENTRY(__memset)
|
||||
|
||||
CFI_RESTORE_STATE
|
||||
.Lbad_alignment:
|
||||
cmpq $7,%r11
|
||||
cmpq $7,%rdx
|
||||
jbe .Lhandle_7
|
||||
movq %rax,(%rdi) /* unaligned store */
|
||||
movq $8,%r8
|
||||
subq %r9,%r8
|
||||
addq %r8,%rdi
|
||||
subq %r8,%r11
|
||||
subq %r8,%rdx
|
||||
jmp .Lafter_bad_alignment
|
||||
.Lfinal:
|
||||
CFI_ENDPROC
|
||||
|
Reference in New Issue
Block a user