123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- /* SPDX-License-Identifier: GPL-2.0+
- *
- * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
- *
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * IP/TCP/UDP checksumming routines
- *
- * Authors: Jorge Cwik, <[email protected]>
- * Arnt Gulbrandsen, <[email protected]>
- * Tom May, <[email protected]>
- * Pentium Pro/II routines:
- * Alexander Kjeldaas <[email protected]>
- * Finn Arne Gangstad <[email protected]>
- * Lots of code moved from tcp.c and ip.c; see those files
- * for more names.
- *
- * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- * handling.
- * Andi Kleen, add zeroing on error
- * converted to pure assembler
- *
- * SuperH version: Copyright (C) 1999 Niibe Yutaka
- */
- #include <asm/errno.h>
- #include <linux/linkage.h>
- /*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
- /*
- * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
- */
- .text
- ENTRY(csum_partial)
- /*
- * Experiments with Ethernet and SLIP connections show that buff
- * is aligned on either a 2-byte or 4-byte boundary. We get at
- * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
- * Fortunately, it is easy to convert 2-byte alignment to 4-byte
- * alignment for the unrolled loop.
- */
- mov r4, r0
- tst #3, r0 ! Check alignment.
- bt/s 2f ! Jump if alignment is ok.
- mov r4, r7 ! Keep a copy to check for alignment
- !
- tst #1, r0 ! Check alignment.
- bt 21f ! Jump if alignment is boundary of 2bytes.
- ! buf is odd
- tst r5, r5
- add #-1, r5
- bt 9f
- mov.b @r4+, r0
- extu.b r0, r0
- addc r0, r6 ! t=0 from previous tst
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
- mov r4, r0
- tst #2, r0
- bt 2f
- 21:
- ! buf is 2 byte aligned (len could be 0)
- add #-2, r5 ! Alignment uses up two bytes.
- cmp/pz r5 !
- bt/s 1f ! Jump if we had at least two bytes.
- clrt
- bra 6f
- add #2, r5 ! r5 was < 2. Deal with it.
- 1:
- mov.w @r4+, r0
- extu.w r0, r0
- addc r0, r6
- bf 2f
- add #1, r6
- 2:
- ! buf is 4 byte aligned (len could be 0)
- mov r5, r1
- mov #-5, r0
- shld r0, r1
- tst r1, r1
- bt/s 4f ! if it's =0, go to 4f
- clrt
- .align 2
- 3:
- mov.l @r4+, r0
- mov.l @r4+, r2
- mov.l @r4+, r3
- addc r0, r6
- mov.l @r4+, r0
- addc r2, r6
- mov.l @r4+, r2
- addc r3, r6
- mov.l @r4+, r3
- addc r0, r6
- mov.l @r4+, r0
- addc r2, r6
- mov.l @r4+, r2
- addc r3, r6
- addc r0, r6
- addc r2, r6
- movt r0
- dt r1
- bf/s 3b
- cmp/eq #1, r0
- ! here, we know r1==0
- addc r1, r6 ! add carry to r6
- 4:
- mov r5, r0
- and #0x1c, r0
- tst r0, r0
- bt 6f
- ! 4 bytes or more remaining
- mov r0, r1
- shlr2 r1
- mov #0, r2
- 5:
- addc r2, r6
- mov.l @r4+, r2
- movt r0
- dt r1
- bf/s 5b
- cmp/eq #1, r0
- addc r2, r6
- addc r1, r6 ! r1==0 here, so it means add carry-bit
- 6:
- ! 3 bytes or less remaining
- mov #3, r0
- and r0, r5
- tst r5, r5
- bt 9f ! if it's =0 go to 9f
- mov #2, r1
- cmp/hs r1, r5
- bf 7f
- mov.w @r4+, r0
- extu.w r0, r0
- cmp/eq r1, r5
- bt/s 8f
- clrt
- shll16 r0
- addc r0, r6
- 7:
- mov.b @r4+, r0
- extu.b r0, r0
- #ifndef __LITTLE_ENDIAN__
- shll8 r0
- #endif
- 8:
- addc r0, r6
- mov #0, r0
- addc r0, r6
- 9:
- ! Check if the buffer was misaligned, if so realign sum
- mov r7, r0
- tst #1, r0
- bt 10f
- mov r6, r0
- shll8 r6
- shlr16 r0
- shlr8 r0
- or r0, r6
- 10:
- rts
- mov r6, r0
- /*
- unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
- */
- /*
- * Copy from ds while checksumming, otherwise like csum_partial with initial
- * sum being ~0U
- */
- #define EXC(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6001f ; \
- .previous
- !
- ! r4: const char *SRC
- ! r5: char *DST
- ! r6: int LEN
- !
- ENTRY(csum_partial_copy_generic)
- mov #-1,r7
- mov #3,r0 ! Check src and dest are equally aligned
- mov r4,r1
- and r0,r1
- and r5,r0
- cmp/eq r1,r0
- bf 3f ! Different alignments, use slow version
- tst #1,r0 ! Check dest word aligned
- bf 3f ! If not, do it the slow way
- mov #2,r0
- tst r0,r5 ! Check dest alignment.
- bt 2f ! Jump if alignment is ok.
- add #-2,r6 ! Alignment uses up two bytes.
- cmp/pz r6 ! Jump if we had at least two bytes.
- bt/s 1f
- clrt
- add #2,r6 ! r6 was < 2. Deal with it.
- bra 4f
- mov r6,r2
- 3: ! Handle different src and dest alignments.
- ! This is not common, so simple byte by byte copy will do.
- mov r6,r2
- shlr r6
- tst r6,r6
- bt 4f
- clrt
- .align 2
- 5:
- EXC( mov.b @r4+,r1 )
- EXC( mov.b @r4+,r0 )
- extu.b r1,r1
- EXC( mov.b r1,@r5 )
- EXC( mov.b r0,@(1,r5) )
- extu.b r0,r0
- add #2,r5
- #ifdef __LITTLE_ENDIAN__
- shll8 r0
- #else
- shll8 r1
- #endif
- or r1,r0
- addc r0,r7
- movt r0
- dt r6
- bf/s 5b
- cmp/eq #1,r0
- mov #0,r0
- addc r0, r7
- mov r2, r0
- tst #1, r0
- bt 7f
- bra 5f
- clrt
- ! src and dest equally aligned, but to a two byte boundary.
- ! Handle first two bytes as a special case
- .align 2
- 1:
- EXC( mov.w @r4+,r0 )
- EXC( mov.w r0,@r5 )
- add #2,r5
- extu.w r0,r0
- addc r0,r7
- mov #0,r0
- addc r0,r7
- 2:
- mov r6,r2
- mov #-5,r0
- shld r0,r6
- tst r6,r6
- bt/s 2f
- clrt
- .align 2
- 1:
- EXC( mov.l @r4+,r0 )
- EXC( mov.l @r4+,r1 )
- addc r0,r7
- EXC( mov.l r0,@r5 )
- EXC( mov.l r1,@(4,r5) )
- addc r1,r7
- EXC( mov.l @r4+,r0 )
- EXC( mov.l @r4+,r1 )
- addc r0,r7
- EXC( mov.l r0,@(8,r5) )
- EXC( mov.l r1,@(12,r5) )
- addc r1,r7
- EXC( mov.l @r4+,r0 )
- EXC( mov.l @r4+,r1 )
- addc r0,r7
- EXC( mov.l r0,@(16,r5) )
- EXC( mov.l r1,@(20,r5) )
- addc r1,r7
- EXC( mov.l @r4+,r0 )
- EXC( mov.l @r4+,r1 )
- addc r0,r7
- EXC( mov.l r0,@(24,r5) )
- EXC( mov.l r1,@(28,r5) )
- addc r1,r7
- add #32,r5
- movt r0
- dt r6
- bf/s 1b
- cmp/eq #1,r0
- mov #0,r0
- addc r0,r7
- 2: mov r2,r6
- mov #0x1c,r0
- and r0,r6
- cmp/pl r6
- bf/s 4f
- clrt
- shlr2 r6
- 3:
- EXC( mov.l @r4+,r0 )
- addc r0,r7
- EXC( mov.l r0,@r5 )
- add #4,r5
- movt r0
- dt r6
- bf/s 3b
- cmp/eq #1,r0
- mov #0,r0
- addc r0,r7
- 4: mov r2,r6
- mov #3,r0
- and r0,r6
- cmp/pl r6
- bf 7f
- mov #2,r1
- cmp/hs r1,r6
- bf 5f
- EXC( mov.w @r4+,r0 )
- EXC( mov.w r0,@r5 )
- extu.w r0,r0
- add #2,r5
- cmp/eq r1,r6
- bt/s 6f
- clrt
- shll16 r0
- addc r0,r7
- 5:
- EXC( mov.b @r4+,r0 )
- EXC( mov.b r0,@r5 )
- extu.b r0,r0
- #ifndef __LITTLE_ENDIAN__
- shll8 r0
- #endif
- 6: addc r0,r7
- mov #0,r0
- addc r0,r7
- 7:
- # Exception handler:
- .section .fixup, "ax"
- 6001:
- rts
- mov #0,r0
- .previous
- rts
- mov r7,r0
|