
commit87a156fb18
("Align hot loops of some string functions") degraded the performance of string functions by adding useless nops A simple benchmark on an 8xx calling 100000x a memchr() that matches the first byte runs in 41668 TB ticks before this patch and in 35986 TB ticks after this patch. So this gives an improvement of approx 10% Another benchmark doing the same with a memchr() matching the 128th byte runs in 1011365 TB ticks before this patch and 1005682 TB ticks after this patch, so regardless on the number of loops, removing those useless nops improves the test by 5683 TB ticks. Fixes:87a156fb18
("Align hot loops of some string functions") Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
133 lines
2.2 KiB
ArmAsm
133 lines
2.2 KiB
ArmAsm
/*
|
|
* String handling functions for PowerPC.
|
|
*
|
|
* Copyright (C) 1996 Paul Mackerras.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#include <asm/processor.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/ppc_asm.h>
|
|
#include <asm/export.h>
|
|
#include <asm/cache.h>
|
|
|
|
.text
|
|
|
|
/* This clears out any unused part of the destination buffer,
|
|
just as the libc version does. -- paulus */
|
|
_GLOBAL(strncpy)
|
|
PPC_LCMPI 0,r5,0
|
|
beqlr
|
|
mtctr r5
|
|
addi r6,r3,-1
|
|
addi r4,r4,-1
|
|
.balign IFETCH_ALIGN_BYTES
|
|
1: lbzu r0,1(r4)
|
|
cmpwi 0,r0,0
|
|
stbu r0,1(r6)
|
|
bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
|
|
bnelr /* if we didn't hit a null char, we're done */
|
|
mfctr r5
|
|
PPC_LCMPI 0,r5,0 /* any space left in destination buffer? */
|
|
beqlr /* we know r0 == 0 here */
|
|
2: stbu r0,1(r6) /* clear it out if so */
|
|
bdnz 2b
|
|
blr
|
|
EXPORT_SYMBOL(strncpy)
|
|
|
|
_GLOBAL(strncmp)
|
|
PPC_LCMPI 0,r5,0
|
|
beq- 2f
|
|
mtctr r5
|
|
addi r5,r3,-1
|
|
addi r4,r4,-1
|
|
.balign IFETCH_ALIGN_BYTES
|
|
1: lbzu r3,1(r5)
|
|
cmpwi 1,r3,0
|
|
lbzu r0,1(r4)
|
|
subf. r3,r0,r3
|
|
beqlr 1
|
|
bdnzt eq,1b
|
|
blr
|
|
2: li r3,0
|
|
blr
|
|
EXPORT_SYMBOL(strncmp)
|
|
|
|
#ifdef CONFIG_PPC32
|
|
_GLOBAL(memcmp)
|
|
PPC_LCMPI 0,r5,0
|
|
beq- 2f
|
|
mtctr r5
|
|
addi r6,r3,-1
|
|
addi r4,r4,-1
|
|
1: lbzu r3,1(r6)
|
|
lbzu r0,1(r4)
|
|
subf. r3,r0,r3
|
|
bdnzt 2,1b
|
|
blr
|
|
2: li r3,0
|
|
blr
|
|
EXPORT_SYMBOL(memcmp)
|
|
#endif
|
|
|
|
_GLOBAL(memchr)
|
|
PPC_LCMPI 0,r5,0
|
|
beq- 2f
|
|
mtctr r5
|
|
addi r3,r3,-1
|
|
.balign IFETCH_ALIGN_BYTES
|
|
1: lbzu r0,1(r3)
|
|
cmpw 0,r0,r4
|
|
bdnzf 2,1b
|
|
beqlr
|
|
2: li r3,0
|
|
blr
|
|
EXPORT_SYMBOL(memchr)
|
|
|
|
#ifdef CONFIG_PPC32
|
|
_GLOBAL(__clear_user)
|
|
addi r6,r3,-4
|
|
li r3,0
|
|
li r5,0
|
|
cmplwi 0,r4,4
|
|
blt 7f
|
|
/* clear a single word */
|
|
11: stwu r5,4(r6)
|
|
beqlr
|
|
/* clear word sized chunks */
|
|
andi. r0,r6,3
|
|
add r4,r0,r4
|
|
subf r6,r0,r6
|
|
srwi r0,r4,2
|
|
andi. r4,r4,3
|
|
mtctr r0
|
|
bdz 7f
|
|
1: stwu r5,4(r6)
|
|
bdnz 1b
|
|
/* clear byte sized chunks */
|
|
7: cmpwi 0,r4,0
|
|
beqlr
|
|
mtctr r4
|
|
addi r6,r6,3
|
|
8: stbu r5,1(r6)
|
|
bdnz 8b
|
|
blr
|
|
90: mr r3,r4
|
|
blr
|
|
91: mfctr r3
|
|
slwi r3,r3,2
|
|
add r3,r3,r4
|
|
blr
|
|
92: mfctr r3
|
|
blr
|
|
|
|
EX_TABLE(11b, 90b)
|
|
EX_TABLE(1b, 91b)
|
|
EX_TABLE(8b, 92b)
|
|
|
|
EXPORT_SYMBOL(__clear_user)
|
|
#endif
|