ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+

ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 16:29:12 +01:00
parent af040ffc9b
commit 6ebbf2ce43
106 changed files with 644 additions and 607 deletions
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */


 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
 THUMB(	lsrmi	r3, al, ip		)
 THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
-	mov	pc, lr
+	ret	lr

 ENDPROC(__ashldi3)
 ENDPROC(__aeabi_llsl)
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */


 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
 THUMB(	lslmi	r3, ah, ip		)
 THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
-	mov	pc, lr
+	ret	lr

 ENDPROC(__ashrdi3)
 ENDPROC(__aeabi_lasr)
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -25,7 +25,7 @@
 ENTRY(c_backtrace)

 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		mov	pc, lr
+		ret	lr
 ENDPROC(c_backtrace)
 #else
 		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,4 @@
+#include <asm/assembler.h>
 #include <asm/unwind.h>

 #if __LINUX_ARM_ARCH__ >= 6
@@ -70,7 +71,7 @@ UNWIND(	.fnstart	)
 	\instr	r2, r2, r3
 	str	r2, [r1, r0, lsl #2]
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
@@ -98,7 +99,7 @@ UNWIND(	.fnstart	)
 	\store	r2, [r1]
 	moveq	r0, #0
 	restore_irqs ip
-	mov	pc, lr
+	ret	lr
 UNWIND(	.fnend		)
 ENDPROC(\name		)
 	.endm
--- a/arch/arm/lib/bswapsdi2.S
+++ b/arch/arm/lib/bswapsdi2.S
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #if __LINUX_ARM_ARCH__ >= 6
 ENTRY(__bswapsi2)
@@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
 	mov r3, r3, lsr #8
 	bic r3, r3, #0xff00
 	eor r0, r3, r0, ror #8
-	mov pc, lr
+	ret lr
 ENDPROC(__bswapsi2)

 ENTRY(__bswapdi2)
@@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
 	bic r1, r1, #0xff00
 	eor r1, r1, r0, ror #8
 	eor r0, r3, ip, ror #8
-	mov pc, lr
+	ret lr
 ENDPROC(__bswapdi2)
 #endif
--- a/arch/arm/lib/call_with_stack.S
+++ b/arch/arm/lib/call_with_stack.S
@@ -36,9 +36,9 @@ ENTRY(call_with_stack)
 	mov	r0, r1

 	adr	lr, BSYM(1f)
-	mov	pc, r2
+	ret	r2

 1:	ldr	lr, [sp]
 	ldr	sp, [sp, #4]
-	mov	pc, lr
+	ret	lr
 ENDPROC(call_with_stack)
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -97,7 +97,7 @@ td3	.req	lr
 #endif
 #endif
 		adcnes	sum, sum, td0		@ update checksum
-		mov	pc, lr
+		ret	lr

 ENTRY(csum_partial)
 		stmfd	sp!, {buf, lr}
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -7,6 +7,7 @@
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
+#include <asm/assembler.h>

 /*
 * unsigned int
@@ -40,7 +41,7 @@ sum	.req	r3
 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 		strb	ip, [dst], #1
 		tst	dst, #2
-		moveq	pc, lr			@ dst is now 32bit aligned
+		reteq	lr			@ dst is now 32bit aligned

 .Ldst_16bit:	load2b	r8, ip
 		sub	len, len, #2
@@ -48,7 +49,7 @@ sum	.req	r3
 		strb	r8, [dst], #1
 		adcs	sum, sum, ip, put_byte_1
 		strb	ip, [dst], #1
-		mov	pc, lr			@ dst is now 32bit aligned
+		ret	lr			@ dst is now 32bit aligned

 		/*
 		 * Handle 0 to 7 bytes, with any alignment of source and
--- a/arch/arm/lib/delay-loop.S
+++ b/arch/arm/lib/delay-loop.S
@@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 		mul	r0, r2, r0		@ max = 2^32-1
 		add	r0, r0, r1, lsr #32-6
 		movs	r0, r0, lsr #6
-		moveq	pc, lr
+		reteq	lr

 /*
 * loops = r0 * HZ * loops_per_jiffy / 1000000
@@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 ENTRY(__loop_delay)
 		subs	r0, r0, #1
 #if 0
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
-		movls	pc, lr
+		retls	lr
 		subs	r0, r0, #1
 #endif
 		bhi	__loop_delay
-		mov	pc, lr
+		ret	lr
 ENDPROC(__loop_udelay)
 ENDPROC(__loop_const_udelay)
 ENDPROC(__loop_delay)
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
 */

 #include <linux/linkage.h>
+#include <asm/assembler.h>
 #include <asm/unwind.h>

 #ifdef __ARMEB__
@@ -97,7 +98,7 @@ UNWIND(.fnstart)
 	mov	yl, #0
 	cmpeq	xl, r4
 	movlo	xh, xl
-	movlo	pc, lr
+	retlo	lr

 	@ The division loop for lower bit positions.
 	@ Here we shift remainer bits leftwards rather than moving the
@@ -111,14 +112,14 @@ UNWIND(.fnstart)
 	subcs	xh, xh, r4
 	movs	ip, ip, lsr #1
 	bne	4b
-	mov	pc, lr
+	ret	lr

 	@ The top part of remainder became zero.  If carry is set
 	@ (the 33th bit) this is a false positive so resume the loop.
 	@ Otherwise, if lower part is also null then we are done.
 6:	bcs	5b
 	cmp	xl, #0
-	moveq	pc, lr
+	reteq	lr

 	@ We still have remainer bits in the low part.  Bring them up.

@@ -144,7 +145,7 @@ UNWIND(.fnstart)
 	movs	ip, ip, lsr #1
 	mov	xh, #1
 	bne	4b
-	mov	pc, lr
+	ret	lr

 8:	@ Division by a power of 2: determine what that divisor order is
 	@ then simply shift values around
@@ -184,13 +185,13 @@ UNWIND(.fnstart)
 THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
-	mov	pc, lr
+	ret	lr

 	@ eq -> division by 1: obvious enough...
 9:	moveq	yl, xl
 	moveq	yh, xh
 	moveq	xh, #0
-	moveq	pc, lr
+	reteq	lr
 UNWIND(.fnend)

 UNWIND(.fnstart)
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_zero_bit_le)

 /*
@@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_bit_le)

 /*
@@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_zero_bit_be)

 ENTRY(_find_next_zero_bit_be)
@@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
 2:		cmp	r2, r1			@ any more?
 		blo	1b
 3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
+		ret	lr
 ENDPROC(_find_first_bit_be)

 ENTRY(_find_next_bit_be)
@@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
 #endif
 		cmp	r1, r0			@ Clamp to maxbit
 		movlo	r0, r1
-		mov	pc, lr
+		ret	lr

--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -36,7 +36,7 @@ ENTRY(__get_user_1)
 	check_uaccess r0, 1, r1, r2, __get_user_bad
 1: TUSER(ldrb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_1)

 ENTRY(__get_user_2)
@@ -56,20 +56,20 @@ rb	.req	r0
 	orr	r2, rb, r2, lsl #8
 #endif
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_2)

 ENTRY(__get_user_4)
 	check_uaccess r0, 4, r1, r2, __get_user_bad
 4: TUSER(ldr)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_4)

 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__get_user_bad)

 .pushsection __ex_table, "a"
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -25,7 +25,7 @@

 ENTRY(__raw_readsb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Linsb_align

--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -12,7 +12,7 @@

 ENTRY(__raw_readsl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f

@@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
 		stmcsia	r1!, {r3, ip}
 		ldrne	r3, [r0, #0]
 		strne	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr

 3:		ldr	r3, [r0]
 		cmp	ip, #2
@@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
 		strb	r3, [r1, #1]
 8:		mov	r3, ip, get_byte_0
 		strb	r3, [r1, #0]
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_readsl)
--- a/arch/arm/lib/io-readsw-armv3.S
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -27,11 +27,11 @@
 		strb	r3, [r1], #1

 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr

 ENTRY(__raw_readsw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align

--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -26,7 +26,7 @@

 ENTRY(__raw_readsw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Linsw_align

--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -45,7 +45,7 @@

 ENTRY(__raw_writesb)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	.Loutsb_align

--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -12,7 +12,7 @@

 ENTRY(__raw_writesl)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		ands	ip, r1, #3
 		bne	3f

@@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
 		ldrne	r3, [r1, #0]
 		strcs	ip, [r0, #0]
 		strne	r3, [r0, #0]
-		mov	pc, lr
+		ret	lr

 3:		bic	r1, r1, #3
 		ldr	r3, [r1], #4
@@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
-		mov	pc, lr
+		ret	lr

 5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
@@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
-		mov	pc, lr
+		ret	lr

 6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
@@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
 		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_writesl)
--- a/arch/arm/lib/io-writesw-armv3.S
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -28,11 +28,11 @@
 		orr	r3, r3, r3, lsl #16
 		str	r3, [r0]
 		subs	r2, r2, #1
-		moveq	pc, lr
+		reteq	lr

 ENTRY(__raw_writesw)
 		teq	r2, #0		@ do we have to check for the zero len?
-		moveq	pc, lr
+		reteq	lr
 		tst	r1, #3
 		bne	.Loutsw_align

--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -31,7 +31,7 @@

 ENTRY(__raw_writesw)
 		teq	r2, #0
-		moveq	pc, lr
+		reteq	lr
 		ands	r3, r1, #3
 		bne	.Loutsw_align

@@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
 		tst	r2, #1
 3:		movne	ip, r3, lsr #8
 		strneh	ip, [r0]
-		mov	pc, lr
+		ret	lr
 ENDPROC(__raw_writesw)
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
 UNWIND(.fnstart)

 	subs	r2, r1, #1
-	moveq	pc, lr
+	reteq	lr
 	bcc	Ldiv0
 	cmp	r0, r1
 	bls	11f
@@ -220,16 +220,16 @@ UNWIND(.fnstart)
 	ARM_DIV_BODY r0, r1, r2, r3

 	mov	r0, r2
-	mov	pc, lr
+	ret	lr

 11:	moveq	r0, #1
 	movne	r0, #0
-	mov	pc, lr
+	ret	lr

 12:	ARM_DIV2_ORDER r1, r2

 	mov	r0, r0, lsr r2
-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__udivsi3)
@@ -244,11 +244,11 @@ UNWIND(.fnstart)
 	moveq   r0, #0
 	tsthi	r1, r2				@ see if divisor is power of 2
 	andeq	r0, r0, r2
-	movls	pc, lr
+	retls	lr

 	ARM_MOD_BODY r0, r1, r2, r3

-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__umodsi3)
@@ -274,23 +274,23 @@ UNWIND(.fnstart)

 	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr

 10:	teq	ip, r0				@ same sign ?
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr

 11:	movlo	r0, #0
 	moveq	r0, ip, asr #31
 	orreq	r0, r0, #1
-	mov	pc, lr
+	ret	lr

 12:	ARM_DIV2_ORDER r1, r2

 	cmp	ip, #0
 	mov	r0, r3, lsr r2
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__divsi3)
@@ -315,7 +315,7 @@ UNWIND(.fnstart)

 10:	cmp	ip, #0
 	rsbmi	r0, r0, #0
-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__modsi3)
@@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__aeabi_uidivmod)
@@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr}	)
 	ldmfd	sp!, {r1, r2, ip, lr}
 	mul	r3, r0, r2
 	sub	r1, r1, r3
-	mov	pc, lr
+	ret	lr

 UNWIND(.fnend)
 ENDPROC(__aeabi_idivmod)
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA.  */


 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #ifdef __ARMEB__
 #define al r1
@@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
 THUMB(	lslmi	r3, ah, ip		)
 THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
-	mov	pc, lr
+	ret	lr

 ENDPROC(__lshrdi3)
 ENDPROC(__aeabi_llsr)
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -22,5 +22,5 @@ ENTRY(memchr)
 	bne	1b
 	sub	r0, r0, #1
 2:	movne	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(memchr)
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -110,7 +110,7 @@ ENTRY(memset)
 	strneb	r1, [ip], #1
 	tst	r2, #1
 	strneb	r1, [ip], #1
-	mov	pc, lr
+	ret	lr

 6:	subs	r2, r2, #4		@ 1 do we have enough
 	blt	5b			@ 1 bytes to align with?
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -121,5 +121,5 @@ ENTRY(__memzero)
 	strneb	r2, [r0], #1		@ 1
 	tst	r1, #1			@ 1 a byte left over
 	strneb	r2, [r0], #1		@ 1
-	mov	pc, lr			@ 1
+	ret	lr			@ 1
 ENDPROC(__memzero)
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -11,6 +11,7 @@
 */

 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #ifdef __ARMEB__
 #define xh r0
@@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
 	adc	xh, xh, yh, lsr #16
 	adds	xl, xl, ip, lsl #16
 	adc	xh, xh, ip, lsr #16
-	mov	pc, lr
+	ret	lr

 ENDPROC(__muldi3)
 ENDPROC(__aeabi_lmul)
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -36,7 +36,7 @@ ENTRY(__put_user_1)
 	check_uaccess r0, 1, r1, ip, __put_user_bad
 1: TUSER(strb)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_1)

 ENTRY(__put_user_2)
@@ -60,14 +60,14 @@ ENTRY(__put_user_2)
 #endif
 #endif	/* CONFIG_THUMB2_KERNEL */
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_2)

 ENTRY(__put_user_4)
 	check_uaccess r0, 4, r1, ip, __put_user_bad
 4: TUSER(str)	r2, [r0]
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_4)

 ENTRY(__put_user_8)
@@ -80,12 +80,12 @@ ENTRY(__put_user_8)
 6: TUSER(str)	r3, [r0]
 #endif
 	mov	r0, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_8)

 __put_user_bad:
 	mov	r0, #-EFAULT
-	mov	pc, lr
+	ret	lr
 ENDPROC(__put_user_bad)

 .pushsection __ex_table, "a"
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -23,5 +23,5 @@ ENTRY(strchr)
 		teq	r2, r1
 		movne	r0, #0
 		subeq	r0, r0, #1
-		mov	pc, lr
+		ret	lr
 ENDPROC(strchr)
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -22,5 +22,5 @@ ENTRY(strrchr)
 		teq	r2, #0
 		bne	1b
 		mov	r0, r3
-		mov	pc, lr
+		ret	lr
 ENDPROC(strrchr)
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -11,6 +11,7 @@
 */

 #include <linux/linkage.h>
+#include <asm/assembler.h>

 #ifdef __ARMEB__
 #define xh r0
@@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
 	movlo	r0, #0
 	moveq	r0, #1
 	movhi	r0, #2
-	mov	pc, lr
+	ret	lr

 ENDPROC(__ucmpdi2)

@@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
 	movlo	r0, #-1
 	moveq	r0, #0
 	movhi	r0, #1
-	mov	pc, lr
+	ret	lr

 ENDPROC(__aeabi_ulcmp)