memset.S 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * linux/arch/arm/lib/memset.S
  4. *
  5. * Copyright (C) 1995-2000 Russell King
  6. *
  7. * ASM optimised string functions
  8. */
  9. #include <linux/linkage.h>
  10. #include <asm/assembler.h>
  11. #include <asm/unwind.h>
  12. .text
  13. .align 5
  14. ENTRY(__memset)
  15. ENTRY(mmioset)
  16. WEAK(memset)
  17. UNWIND( .fnstart )
  18. and r1, r1, #255 @ cast to unsigned char
  19. ands r3, r0, #3 @ 1 unaligned?
  20. mov ip, r0 @ preserve r0 as return value
  21. bne 6f @ 1
  22. /*
  23. * we know that the pointer in ip is aligned to a word boundary.
  24. */
  25. 1: orr r1, r1, r1, lsl #8
  26. orr r1, r1, r1, lsl #16
  27. mov r3, r1
  28. 7: cmp r2, #16
  29. blt 4f
  30. UNWIND( .fnend )
  31. #if ! CALGN(1)+0
  32. /*
  33. * We need 2 extra registers for this loop - use r8 and the LR
  34. */
  35. UNWIND( .fnstart )
  36. UNWIND( .save {r8, lr} )
  37. stmfd sp!, {r8, lr}
  38. mov r8, r1
  39. mov lr, r3
  40. 2: subs r2, r2, #64
  41. stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
  42. stmiage ip!, {r1, r3, r8, lr}
  43. stmiage ip!, {r1, r3, r8, lr}
  44. stmiage ip!, {r1, r3, r8, lr}
  45. bgt 2b
  46. ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go.
  47. /*
  48. * No need to correct the count; we're only testing bits from now on
  49. */
  50. tst r2, #32
  51. stmiane ip!, {r1, r3, r8, lr}
  52. stmiane ip!, {r1, r3, r8, lr}
  53. tst r2, #16
  54. stmiane ip!, {r1, r3, r8, lr}
  55. ldmfd sp!, {r8, lr}
  56. UNWIND( .fnend )
  57. #else
  58. /*
  59. * This version aligns the destination pointer in order to write
  60. * whole cache lines at once.
  61. */
  62. UNWIND( .fnstart )
  63. UNWIND( .save {r4-r8, lr} )
  64. stmfd sp!, {r4-r8, lr}
  65. mov r4, r1
  66. mov r5, r3
  67. mov r6, r1
  68. mov r7, r3
  69. mov r8, r1
  70. mov lr, r3
  71. cmp r2, #96
  72. tstgt ip, #31
  73. ble 3f
  74. and r8, ip, #31
  75. rsb r8, r8, #32
  76. sub r2, r2, r8
  77. movs r8, r8, lsl #(32 - 4)
  78. stmiacs ip!, {r4, r5, r6, r7}
  79. stmiami ip!, {r4, r5}
  80. tst r8, #(1 << 30)
  81. mov r8, r1
  82. strne r1, [ip], #4
  83. 3: subs r2, r2, #64
  84. stmiage ip!, {r1, r3-r8, lr}
  85. stmiage ip!, {r1, r3-r8, lr}
  86. bgt 3b
  87. ldmfdeq sp!, {r4-r8, pc}
  88. tst r2, #32
  89. stmiane ip!, {r1, r3-r8, lr}
  90. tst r2, #16
  91. stmiane ip!, {r4-r7}
  92. ldmfd sp!, {r4-r8, lr}
  93. UNWIND( .fnend )
  94. #endif
  95. UNWIND( .fnstart )
  96. 4: tst r2, #8
  97. stmiane ip!, {r1, r3}
  98. tst r2, #4
  99. strne r1, [ip], #4
  100. /*
  101. * When we get here, we've got less than 4 bytes to set. We
  102. * may have an unaligned pointer as well.
  103. */
  104. 5: tst r2, #2
  105. strbne r1, [ip], #1
  106. strbne r1, [ip], #1
  107. tst r2, #1
  108. strbne r1, [ip], #1
  109. ret lr
  110. 6: subs r2, r2, #4 @ 1 do we have enough
  111. blt 5b @ 1 bytes to align with?
  112. cmp r3, #2 @ 1
  113. strblt r1, [ip], #1 @ 1
  114. strble r1, [ip], #1 @ 1
  115. strb r1, [ip], #1 @ 1
  116. add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
  117. b 1b
  118. UNWIND( .fnend )
  119. ENDPROC(memset)
  120. ENDPROC(mmioset)
  121. ENDPROC(__memset)
  122. ENTRY(__memset32)
  123. UNWIND( .fnstart )
  124. mov r3, r1 @ copy r1 to r3 and fall into memset64
  125. UNWIND( .fnend )
  126. ENDPROC(__memset32)
  127. ENTRY(__memset64)
  128. UNWIND( .fnstart )
  129. mov ip, r0 @ preserve r0 as return value
  130. b 7b @ jump into the middle of memset
  131. UNWIND( .fnend )
  132. ENDPROC(__memset64)