powerpc/32: Optimise __csum_partial()
Improve __csum_partial by interleaving loads and adds. On a 8xx, it brings neither improvement nor degradation. On a 83xx, it brings a 25% improvement. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:

committed by
Michael Ellerman

parent
1128bb7813
commit
373e098e1e
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
|
|||||||
bdnz 2b
|
bdnz 2b
|
||||||
21: srwi. r6,r4,4 /* # blocks of 4 words to do */
|
21: srwi. r6,r4,4 /* # blocks of 4 words to do */
|
||||||
beq 3f
|
beq 3f
|
||||||
|
lwz r0,4(r3)
|
||||||
mtctr r6
|
mtctr r6
|
||||||
22: lwz r0,4(r3)
|
|
||||||
lwz r6,8(r3)
|
lwz r6,8(r3)
|
||||||
lwz r7,12(r3)
|
|
||||||
lwzu r8,16(r3)
|
|
||||||
adde r5,r5,r0
|
adde r5,r5,r0
|
||||||
|
lwz r7,12(r3)
|
||||||
adde r5,r5,r6
|
adde r5,r5,r6
|
||||||
|
lwzu r8,16(r3)
|
||||||
adde r5,r5,r7
|
adde r5,r5,r7
|
||||||
|
bdz 23f
|
||||||
|
22: lwz r0,4(r3)
|
||||||
adde r5,r5,r8
|
adde r5,r5,r8
|
||||||
|
lwz r6,8(r3)
|
||||||
|
adde r5,r5,r0
|
||||||
|
lwz r7,12(r3)
|
||||||
|
adde r5,r5,r6
|
||||||
|
lwzu r8,16(r3)
|
||||||
|
adde r5,r5,r7
|
||||||
bdnz 22b
|
bdnz 22b
|
||||||
|
23: adde r5,r5,r8
|
||||||
3: andi. r0,r4,2
|
3: andi. r0,r4,2
|
||||||
beq+ 4f
|
beq+ 4f
|
||||||
lhz r0,4(r3)
|
lhz r0,4(r3)
|
||||||
|
Reference in New Issue
Block a user