123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- /*
- * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details. No warranty for anything given at all.
- */
- #include <linux/linkage.h>
- #include <asm/errno.h>
- #include <asm/asm.h>
- /*
- * Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
- * destination is zeroed.
- *
- * Input
- * rdi source
- * rsi destination
- * edx len (32bit)
- *
- * Output
- * eax 64bit sum. undefined in case of exception.
- *
- * Wrappers need to take care of valid exception sum and zeroing.
- * They also should align source or destination to 8 bytes.
- */
- .macro source
- 10:
- _ASM_EXTABLE_UA(10b, .Lfault)
- .endm
- .macro dest
- 20:
- _ASM_EXTABLE_UA(20b, .Lfault)
- .endm
- SYM_FUNC_START(csum_partial_copy_generic)
- subq $5*8, %rsp
- movq %rbx, 0*8(%rsp)
- movq %r12, 1*8(%rsp)
- movq %r14, 2*8(%rsp)
- movq %r13, 3*8(%rsp)
- movq %r15, 4*8(%rsp)
- movl $-1, %eax
- xorl %r9d, %r9d
- movl %edx, %ecx
- cmpl $8, %ecx
- jb .Lshort
- testb $7, %sil
- jne .Lunaligned
- .Laligned:
- movl %ecx, %r12d
- shrq $6, %r12
- jz .Lhandle_tail /* < 64 */
- clc
- /* main loop. clear in 64 byte blocks */
- /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
- /* r11: temp3, rdx: temp4, r12 loopcnt */
- /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
- .p2align 4
- .Lloop:
- source
- movq (%rdi), %rbx
- source
- movq 8(%rdi), %r8
- source
- movq 16(%rdi), %r11
- source
- movq 24(%rdi), %rdx
- source
- movq 32(%rdi), %r10
- source
- movq 40(%rdi), %r15
- source
- movq 48(%rdi), %r14
- source
- movq 56(%rdi), %r13
- 30:
- /*
- * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
- * potentially unmapped kernel address.
- */
- _ASM_EXTABLE(30b, 2f)
- prefetcht0 5*64(%rdi)
- 2:
- adcq %rbx, %rax
- adcq %r8, %rax
- adcq %r11, %rax
- adcq %rdx, %rax
- adcq %r10, %rax
- adcq %r15, %rax
- adcq %r14, %rax
- adcq %r13, %rax
- decl %r12d
- dest
- movq %rbx, (%rsi)
- dest
- movq %r8, 8(%rsi)
- dest
- movq %r11, 16(%rsi)
- dest
- movq %rdx, 24(%rsi)
- dest
- movq %r10, 32(%rsi)
- dest
- movq %r15, 40(%rsi)
- dest
- movq %r14, 48(%rsi)
- dest
- movq %r13, 56(%rsi)
- leaq 64(%rdi), %rdi
- leaq 64(%rsi), %rsi
- jnz .Lloop
- adcq %r9, %rax
- /* do last up to 56 bytes */
- .Lhandle_tail:
- /* ecx: count, rcx.63: the end result needs to be rol8 */
- movq %rcx, %r10
- andl $63, %ecx
- shrl $3, %ecx
- jz .Lfold
- clc
- .p2align 4
- .Lloop_8:
- source
- movq (%rdi), %rbx
- adcq %rbx, %rax
- decl %ecx
- dest
- movq %rbx, (%rsi)
- leaq 8(%rsi), %rsi /* preserve carry */
- leaq 8(%rdi), %rdi
- jnz .Lloop_8
- adcq %r9, %rax /* add in carry */
- .Lfold:
- /* reduce checksum to 32bits */
- movl %eax, %ebx
- shrq $32, %rax
- addl %ebx, %eax
- adcl %r9d, %eax
- /* do last up to 6 bytes */
- .Lhandle_7:
- movl %r10d, %ecx
- andl $7, %ecx
- .L1: /* .Lshort rejoins the common path here */
- shrl $1, %ecx
- jz .Lhandle_1
- movl $2, %edx
- xorl %ebx, %ebx
- clc
- .p2align 4
- .Lloop_1:
- source
- movw (%rdi), %bx
- adcl %ebx, %eax
- decl %ecx
- dest
- movw %bx, (%rsi)
- leaq 2(%rdi), %rdi
- leaq 2(%rsi), %rsi
- jnz .Lloop_1
- adcl %r9d, %eax /* add in carry */
- /* handle last odd byte */
- .Lhandle_1:
- testb $1, %r10b
- jz .Lende
- xorl %ebx, %ebx
- source
- movb (%rdi), %bl
- dest
- movb %bl, (%rsi)
- addl %ebx, %eax
- adcl %r9d, %eax /* carry */
- .Lende:
- testq %r10, %r10
- js .Lwas_odd
- .Lout:
- movq 0*8(%rsp), %rbx
- movq 1*8(%rsp), %r12
- movq 2*8(%rsp), %r14
- movq 3*8(%rsp), %r13
- movq 4*8(%rsp), %r15
- addq $5*8, %rsp
- RET
- .Lshort:
- movl %ecx, %r10d
- jmp .L1
- .Lunaligned:
- xorl %ebx, %ebx
- testb $1, %sil
- jne .Lodd
- 1: testb $2, %sil
- je 2f
- source
- movw (%rdi), %bx
- dest
- movw %bx, (%rsi)
- leaq 2(%rdi), %rdi
- subq $2, %rcx
- leaq 2(%rsi), %rsi
- addq %rbx, %rax
- 2: testb $4, %sil
- je .Laligned
- source
- movl (%rdi), %ebx
- dest
- movl %ebx, (%rsi)
- leaq 4(%rdi), %rdi
- subq $4, %rcx
- leaq 4(%rsi), %rsi
- addq %rbx, %rax
- jmp .Laligned
- .Lodd:
- source
- movb (%rdi), %bl
- dest
- movb %bl, (%rsi)
- leaq 1(%rdi), %rdi
- leaq 1(%rsi), %rsi
- /* decrement, set MSB */
- leaq -1(%rcx, %rcx), %rcx
- rorq $1, %rcx
- shll $8, %ebx
- addq %rbx, %rax
- jmp 1b
- .Lwas_odd:
- roll $8, %eax
- jmp .Lout
- /* Exception: just return 0 */
- .Lfault:
- xorl %eax, %eax
- jmp .Lout
- SYM_FUNC_END(csum_partial_copy_generic)
|