memmove.S 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * arch/alpha/lib/memmove.S
  4. *
  5. * Barely optimized memmove routine for Alpha EV5.
  6. *
  7. * This is hand-massaged output from the original memcpy.c. We defer to
  8. * memcpy whenever possible; the backwards copy loops are not unrolled.
  9. */
  10. #include <asm/export.h>
  11. .set noat
  12. .set noreorder
  13. .text
  14. .align 4
  15. .globl memmove
  16. .ent memmove
  17. memmove:
  18. ldgp $29, 0($27)
  19. unop
  20. nop
  21. .prologue 1
  22. addq $16,$18,$4
  23. addq $17,$18,$5
  24. cmpule $4,$17,$1 /* dest + n <= src */
  25. cmpule $5,$16,$2 /* dest >= src + n */
  26. bis $1,$2,$1
  27. mov $16,$0
  28. xor $16,$17,$2
  29. bne $1,memcpy !samegp
  30. and $2,7,$2 /* Test for src/dest co-alignment. */
  31. and $16,7,$1
  32. cmpule $16,$17,$3
  33. bne $3,$memmove_up /* dest < src */
  34. and $4,7,$1
  35. bne $2,$misaligned_dn
  36. unop
  37. beq $1,$skip_aligned_byte_loop_head_dn
  38. $aligned_byte_loop_head_dn:
  39. lda $4,-1($4)
  40. lda $5,-1($5)
  41. unop
  42. ble $18,$egress
  43. ldq_u $3,0($5)
  44. ldq_u $2,0($4)
  45. lda $18,-1($18)
  46. extbl $3,$5,$1
  47. insbl $1,$4,$1
  48. mskbl $2,$4,$2
  49. bis $1,$2,$1
  50. and $4,7,$6
  51. stq_u $1,0($4)
  52. bne $6,$aligned_byte_loop_head_dn
  53. $skip_aligned_byte_loop_head_dn:
  54. lda $18,-8($18)
  55. blt $18,$skip_aligned_word_loop_dn
  56. $aligned_word_loop_dn:
  57. ldq $1,-8($5)
  58. nop
  59. lda $5,-8($5)
  60. lda $18,-8($18)
  61. stq $1,-8($4)
  62. nop
  63. lda $4,-8($4)
  64. bge $18,$aligned_word_loop_dn
  65. $skip_aligned_word_loop_dn:
  66. lda $18,8($18)
  67. bgt $18,$byte_loop_tail_dn
  68. unop
  69. ret $31,($26),1
  70. .align 4
  71. $misaligned_dn:
  72. nop
  73. fnop
  74. unop
  75. beq $18,$egress
  76. $byte_loop_tail_dn:
  77. ldq_u $3,-1($5)
  78. ldq_u $2,-1($4)
  79. lda $5,-1($5)
  80. lda $4,-1($4)
  81. lda $18,-1($18)
  82. extbl $3,$5,$1
  83. insbl $1,$4,$1
  84. mskbl $2,$4,$2
  85. bis $1,$2,$1
  86. stq_u $1,0($4)
  87. bgt $18,$byte_loop_tail_dn
  88. br $egress
  89. $memmove_up:
  90. mov $16,$4
  91. mov $17,$5
  92. bne $2,$misaligned_up
  93. beq $1,$skip_aligned_byte_loop_head_up
  94. $aligned_byte_loop_head_up:
  95. unop
  96. ble $18,$egress
  97. ldq_u $3,0($5)
  98. ldq_u $2,0($4)
  99. lda $18,-1($18)
  100. extbl $3,$5,$1
  101. insbl $1,$4,$1
  102. mskbl $2,$4,$2
  103. bis $1,$2,$1
  104. lda $5,1($5)
  105. stq_u $1,0($4)
  106. lda $4,1($4)
  107. and $4,7,$6
  108. bne $6,$aligned_byte_loop_head_up
  109. $skip_aligned_byte_loop_head_up:
  110. lda $18,-8($18)
  111. blt $18,$skip_aligned_word_loop_up
  112. $aligned_word_loop_up:
  113. ldq $1,0($5)
  114. nop
  115. lda $5,8($5)
  116. lda $18,-8($18)
  117. stq $1,0($4)
  118. nop
  119. lda $4,8($4)
  120. bge $18,$aligned_word_loop_up
  121. $skip_aligned_word_loop_up:
  122. lda $18,8($18)
  123. bgt $18,$byte_loop_tail_up
  124. unop
  125. ret $31,($26),1
  126. .align 4
  127. $misaligned_up:
  128. nop
  129. fnop
  130. unop
  131. beq $18,$egress
  132. $byte_loop_tail_up:
  133. ldq_u $3,0($5)
  134. ldq_u $2,0($4)
  135. lda $18,-1($18)
  136. extbl $3,$5,$1
  137. insbl $1,$4,$1
  138. mskbl $2,$4,$2
  139. bis $1,$2,$1
  140. stq_u $1,0($4)
  141. lda $5,1($5)
  142. lda $4,1($4)
  143. nop
  144. bgt $18,$byte_loop_tail_up
  145. $egress:
  146. ret $31,($26),1
  147. nop
  148. nop
  149. nop
  150. .end memmove
  151. EXPORT_SYMBOL(memmove)