sh: clear/copy_page renames in lib and lib64.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Este cometimento está contido em:
89
arch/sh/lib64/copy_page.S
Ficheiro normal
89
arch/sh/lib64/copy_page.S
Ficheiro normal
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
|
||||
|
||||
This file is subject to the terms and conditions of the GNU General Public
|
||||
License. See the file "COPYING" in the main directory of this archive
|
||||
for more details.
|
||||
|
||||
Tight version of mempy for the case of just copying a page.
|
||||
Prefetch strategy empirically optimised against RTL simulations
|
||||
of SH5-101 cut2 eval chip with Cayman board DDR memory.
|
||||
|
||||
Parameters:
|
||||
r2 : destination effective address (start of page)
|
||||
r3 : source effective address (start of page)
|
||||
|
||||
Always copies 4096 bytes.
|
||||
|
||||
Points to review.
|
||||
* Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
|
||||
It seems like the prefetch needs to be at at least 4 lines ahead to get
|
||||
the data into the cache in time, and the allocos contend with outstanding
|
||||
prefetches for the same cache set, so it's better to have the numbers
|
||||
different.
|
||||
*/
|
||||
|
||||
.section .text..SHmedia32,"ax"
|
||||
.little
|
||||
|
||||
.balign 8
|
||||
.global copy_page
|
||||
copy_page:
|
||||
|
||||
/* Copy 4096 bytes worth of data from r3 to r2.
|
||||
Do prefetches 4 lines ahead.
|
||||
Do alloco 2 lines ahead */
|
||||
|
||||
pta 1f, tr1
|
||||
pta 2f, tr2
|
||||
pta 3f, tr3
|
||||
ptabs r18, tr0
|
||||
|
||||
#if 0
|
||||
/* TAKum03020 */
|
||||
ld.q r3, 0x00, r63
|
||||
ld.q r3, 0x20, r63
|
||||
ld.q r3, 0x40, r63
|
||||
ld.q r3, 0x60, r63
|
||||
#endif
|
||||
alloco r2, 0x00
|
||||
synco ! TAKum03020
|
||||
alloco r2, 0x20
|
||||
synco ! TAKum03020
|
||||
|
||||
movi 3968, r6
|
||||
add r2, r6, r6
|
||||
addi r6, 64, r7
|
||||
addi r7, 64, r8
|
||||
sub r3, r2, r60
|
||||
addi r60, 8, r61
|
||||
addi r61, 8, r62
|
||||
addi r62, 8, r23
|
||||
addi r60, 0x80, r22
|
||||
|
||||
/* Minimal code size. The extra branches inside the loop don't cost much
|
||||
because they overlap with the time spent waiting for prefetches to
|
||||
complete. */
|
||||
1:
|
||||
#if 0
|
||||
/* TAKum03020 */
|
||||
bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
|
||||
ldx.q r2, r22, r63 ! prefetch 4 lines hence
|
||||
#endif
|
||||
2:
|
||||
bge/u r2, r7, tr3 ! skip alloco for last 2 lines
|
||||
alloco r2, 0x40 ! alloc destination line 2 lines ahead
|
||||
synco ! TAKum03020
|
||||
3:
|
||||
ldx.q r2, r60, r36
|
||||
ldx.q r2, r61, r37
|
||||
ldx.q r2, r62, r38
|
||||
ldx.q r2, r23, r39
|
||||
st.q r2, 0, r36
|
||||
st.q r2, 8, r37
|
||||
st.q r2, 16, r38
|
||||
st.q r2, 24, r39
|
||||
addi r2, 32, r2
|
||||
bgt/l r8, r2, tr1
|
||||
|
||||
blink tr0, r63 ! return
|
Criar uma nova questão referindo esta
Bloquear um utilizador