ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+

ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).

We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.

Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.

Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
Russell King
2014-06-30 16:29:12 +01:00
parent af040ffc9b
commit 6ebbf2ce43
106 changed files with 644 additions and 607 deletions

View File

@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
mov pc, lr
ret lr
ENDPROC(__ashldi3)
ENDPROC(__aeabi_llsl)

View File

@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
mov pc, lr
ret lr
ENDPROC(__ashrdi3)
ENDPROC(__aeabi_lasr)

View File

@@ -25,7 +25,7 @@
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
mov pc, lr
ret lr
ENDPROC(c_backtrace)
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...

View File

@@ -1,3 +1,4 @@
#include <asm/assembler.h>
#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
@@ -70,7 +71,7 @@ UNWIND( .fnstart )
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
@@ -98,7 +99,7 @@ UNWIND( .fnstart )
\store r2, [r1]
moveq r0, #0
restore_irqs ip
mov pc, lr
ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm

View File

@@ -1,4 +1,5 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#if __LINUX_ARM_ARCH__ >= 6
ENTRY(__bswapsi2)
@@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
mov r3, r3, lsr #8
bic r3, r3, #0xff00
eor r0, r3, r0, ror #8
mov pc, lr
ret lr
ENDPROC(__bswapsi2)
ENTRY(__bswapdi2)
@@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
bic r1, r1, #0xff00
eor r1, r1, r0, ror #8
eor r0, r3, ip, ror #8
mov pc, lr
ret lr
ENDPROC(__bswapdi2)
#endif

View File

@@ -36,9 +36,9 @@ ENTRY(call_with_stack)
mov r0, r1
adr lr, BSYM(1f)
mov pc, r2
ret r2
1: ldr lr, [sp]
ldr sp, [sp, #4]
mov pc, lr
ret lr
ENDPROC(call_with_stack)

View File

@@ -97,7 +97,7 @@ td3 .req lr
#endif
#endif
adcnes sum, sum, td0 @ update checksum
mov pc, lr
ret lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}

View File

@@ -7,6 +7,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/assembler.h>
/*
* unsigned int
@@ -40,7 +41,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
moveq pc, lr @ dst is now 32bit aligned
reteq lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
sub len, len, #2
@@ -48,7 +49,7 @@ sum .req r3
strb r8, [dst], #1
adcs sum, sum, ip, put_byte_1
strb ip, [dst], #1
mov pc, lr @ dst is now 32bit aligned
ret lr @ dst is now 32bit aligned
/*
* Handle 0 to 7 bytes, with any alignment of source and

View File

@@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
mul r0, r2, r0 @ max = 2^32-1
add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
moveq pc, lr
reteq lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
@@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
#endif
bhi __loop_delay
mov pc, lr
ret lr
ENDPROC(__loop_udelay)
ENDPROC(__loop_const_udelay)
ENDPROC(__loop_delay)

View File

@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
#ifdef __ARMEB__
@@ -97,7 +98,7 @@ UNWIND(.fnstart)
mov yl, #0
cmpeq xl, r4
movlo xh, xl
movlo pc, lr
retlo lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +112,14 @@ UNWIND(.fnstart)
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
mov pc, lr
ret lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
moveq pc, lr
reteq lr
@ We still have remainer bits in the low part. Bring them up.
@@ -144,7 +145,7 @@ UNWIND(.fnstart)
movs ip, ip, lsr #1
mov xh, #1
bne 4b
mov pc, lr
ret lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
@@ -184,13 +185,13 @@ UNWIND(.fnstart)
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
ret lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
reteq lr
UNWIND(.fnend)
UNWIND(.fnstart)

View File

@@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_zero_bit_le)
/*
@@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_bit_le)
/*
@@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
@@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
@@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
#endif
cmp r1, r0 @ Clamp to maxbit
movlo r0, r1
mov pc, lr
ret lr

View File

@@ -36,7 +36,7 @@ ENTRY(__get_user_1)
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
@@ -56,20 +56,20 @@ rb .req r0
orr r2, rb, r2, lsl #8
#endif
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
4: TUSER(ldr) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_4)
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ret lr
ENDPROC(__get_user_bad)
.pushsection __ex_table, "a"

View File

@@ -25,7 +25,7 @@
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne .Linsb_align

View File

@@ -12,7 +12,7 @@
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne 3f
@@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
stmcsia r1!, {r3, ip}
ldrne r3, [r0, #0]
strne r3, [r1, #0]
mov pc, lr
ret lr
3: ldr r3, [r0]
cmp ip, #2
@@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
strb r3, [r1, #1]
8: mov r3, ip, get_byte_0
strb r3, [r1, #0]
mov pc, lr
ret lr
ENDPROC(__raw_readsl)

View File

@@ -27,11 +27,11 @@
strb r3, [r1], #1
subs r2, r2, #1
moveq pc, lr
reteq lr
ENTRY(__raw_readsw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
tst r1, #3
bne .Linsw_align

View File

@@ -26,7 +26,7 @@
ENTRY(__raw_readsw)
teq r2, #0
moveq pc, lr
reteq lr
tst r1, #3
bne .Linsw_align

View File

@@ -45,7 +45,7 @@
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne .Loutsb_align

View File

@@ -12,7 +12,7 @@
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne 3f
@@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
ldrne r3, [r1, #0]
strcs ip, [r0, #0]
strne r3, [r0, #0]
mov pc, lr
ret lr
3: bic r1, r1, #3
ldr r3, [r1], #4
@@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
mov pc, lr
ret lr
5: mov ip, r3, lspull #8
ldr r3, [r1], #4
@@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
mov pc, lr
ret lr
6: mov ip, r3, lspull #24
ldr r3, [r1], #4
@@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
mov pc, lr
ret lr
ENDPROC(__raw_writesl)

View File

@@ -28,11 +28,11 @@
orr r3, r3, r3, lsl #16
str r3, [r0]
subs r2, r2, #1
moveq pc, lr
reteq lr
ENTRY(__raw_writesw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
tst r1, #3
bne .Loutsw_align

View File

@@ -31,7 +31,7 @@
ENTRY(__raw_writesw)
teq r2, #0
moveq pc, lr
reteq lr
ands r3, r1, #3
bne .Loutsw_align
@@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
tst r2, #1
3: movne ip, r3, lsr #8
strneh ip, [r0]
mov pc, lr
ret lr
ENDPROC(__raw_writesw)

View File

@@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
subs r2, r1, #1
moveq pc, lr
reteq lr
bcc Ldiv0
cmp r0, r1
bls 11f
@@ -220,16 +220,16 @@ UNWIND(.fnstart)
ARM_DIV_BODY r0, r1, r2, r3
mov r0, r2
mov pc, lr
ret lr
11: moveq r0, #1
movne r0, #0
mov pc, lr
ret lr
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
@@ -244,11 +244,11 @@ UNWIND(.fnstart)
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
movls pc, lr
retls lr
ARM_MOD_BODY r0, r1, r2, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
@@ -274,23 +274,23 @@ UNWIND(.fnstart)
cmp ip, #0
rsbmi r0, r0, #0
mov pc, lr
ret lr
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
mov pc, lr
ret lr
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
mov pc, lr
ret lr
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__divsi3)
@@ -315,7 +315,7 @@ UNWIND(.fnstart)
10: cmp ip, #0
rsbmi r0, r0, #0
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__modsi3)
@@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
@@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)

View File

@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
mov pc, lr
ret lr
ENDPROC(__lshrdi3)
ENDPROC(__aeabi_llsr)

View File

@@ -22,5 +22,5 @@ ENTRY(memchr)
bne 1b
sub r0, r0, #1
2: movne r0, #0
mov pc, lr
ret lr
ENDPROC(memchr)

View File

@@ -110,7 +110,7 @@ ENTRY(memset)
strneb r1, [ip], #1
tst r2, #1
strneb r1, [ip], #1
mov pc, lr
ret lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?

View File

@@ -121,5 +121,5 @@ ENTRY(__memzero)
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
strneb r2, [r0], #1 @ 1
mov pc, lr @ 1
ret lr @ 1
ENDPROC(__memzero)

View File

@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
adc xh, xh, yh, lsr #16
adds xl, xl, ip, lsl #16
adc xh, xh, ip, lsr #16
mov pc, lr
ret lr
ENDPROC(__muldi3)
ENDPROC(__aeabi_lmul)

View File

@@ -36,7 +36,7 @@ ENTRY(__put_user_1)
check_uaccess r0, 1, r1, ip, __put_user_bad
1: TUSER(strb) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
@@ -60,14 +60,14 @@ ENTRY(__put_user_2)
#endif
#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
check_uaccess r0, 4, r1, ip, __put_user_bad
4: TUSER(str) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
@@ -80,12 +80,12 @@ ENTRY(__put_user_8)
6: TUSER(str) r3, [r0]
#endif
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_8)
__put_user_bad:
mov r0, #-EFAULT
mov pc, lr
ret lr
ENDPROC(__put_user_bad)
.pushsection __ex_table, "a"

View File

@@ -23,5 +23,5 @@ ENTRY(strchr)
teq r2, r1
movne r0, #0
subeq r0, r0, #1
mov pc, lr
ret lr
ENDPROC(strchr)

View File

@@ -22,5 +22,5 @@ ENTRY(strrchr)
teq r2, #0
bne 1b
mov r0, r3
mov pc, lr
ret lr
ENDPROC(strrchr)

View File

@@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
movlo r0, #0
moveq r0, #1
movhi r0, #2
mov pc, lr
ret lr
ENDPROC(__ucmpdi2)
@@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
movlo r0, #-1
moveq r0, #0
movhi r0, #1
mov pc, lr
ret lr
ENDPROC(__aeabi_ulcmp)