memset.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code
  3. * Copyright (C) 1991,1996 Free Software Foundation
  4. * Copyright (C) 1996,1997 Jakub Jelinek ([email protected])
  5. * Copyright (C) 1996 David S. Miller ([email protected])
  6. *
  7. * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
  8. * number of bytes not yet set if exception occurs and we were called as
  9. * clear_user.
  10. */
  11. #include <asm/ptrace.h>
  12. #include <asm/export.h>
  13. /* Work around cpp -rob */
  14. #define ALLOC #alloc
  15. #define EXECINSTR #execinstr
  16. #define EX(x,y,a,b) \
  17. 98: x,y; \
  18. .section .fixup,ALLOC,EXECINSTR; \
  19. .align 4; \
  20. 99: retl; \
  21. a, b, %o0; \
  22. .section __ex_table,ALLOC; \
  23. .align 4; \
  24. .word 98b, 99b; \
  25. .text; \
  26. .align 4
  27. #define STORE(source, base, offset, n) \
  28. 98: std source, [base + offset + n]; \
  29. .section .fixup,ALLOC,EXECINSTR; \
  30. .align 4; \
  31. 99: ba 30f; \
  32. sub %o3, n - offset, %o3; \
  33. .section __ex_table,ALLOC; \
  34. .align 4; \
  35. .word 98b, 99b; \
  36. .text; \
  37. .align 4;
  38. #define STORE_LAST(source, base, offset, n) \
  39. EX(std source, [base - offset - n], \
  40. add %o1, offset + n);
  41. /* Please don't change these macros, unless you change the logic
  42. * in the .fixup section below as well.
  43. * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
  44. #define ZERO_BIG_BLOCK(base, offset, source) \
  45. STORE(source, base, offset, 0x00); \
  46. STORE(source, base, offset, 0x08); \
  47. STORE(source, base, offset, 0x10); \
  48. STORE(source, base, offset, 0x18); \
  49. STORE(source, base, offset, 0x20); \
  50. STORE(source, base, offset, 0x28); \
  51. STORE(source, base, offset, 0x30); \
  52. STORE(source, base, offset, 0x38);
  53. #define ZERO_LAST_BLOCKS(base, offset, source) \
  54. STORE_LAST(source, base, offset, 0x38); \
  55. STORE_LAST(source, base, offset, 0x30); \
  56. STORE_LAST(source, base, offset, 0x28); \
  57. STORE_LAST(source, base, offset, 0x20); \
  58. STORE_LAST(source, base, offset, 0x18); \
  59. STORE_LAST(source, base, offset, 0x10); \
  60. STORE_LAST(source, base, offset, 0x08); \
  61. STORE_LAST(source, base, offset, 0x00);
  62. .text
  63. .align 4
  64. .globl __bzero_begin
  65. __bzero_begin:
  66. .globl __bzero
  67. .type __bzero,#function
  68. .globl memset
  69. EXPORT_SYMBOL(__bzero)
  70. EXPORT_SYMBOL(memset)
  71. memset:
  72. mov %o0, %g1
  73. mov 1, %g4
  74. and %o1, 0xff, %g3
  75. sll %g3, 8, %g2
  76. or %g3, %g2, %g3
  77. sll %g3, 16, %g2
  78. or %g3, %g2, %g3
  79. b 1f
  80. mov %o2, %o1
  81. 3:
  82. cmp %o2, 3
  83. be 2f
  84. EX(stb %g3, [%o0], sub %o1, 0)
  85. cmp %o2, 2
  86. be 2f
  87. EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
  88. EX(stb %g3, [%o0 + 0x02], sub %o1, 2)
  89. 2:
  90. sub %o2, 4, %o2
  91. add %o1, %o2, %o1
  92. b 4f
  93. sub %o0, %o2, %o0
  94. __bzero:
  95. clr %g4
  96. mov %g0, %g3
  97. 1:
  98. cmp %o1, 7
  99. bleu 7f
  100. andcc %o0, 3, %o2
  101. bne 3b
  102. 4:
  103. andcc %o0, 4, %g0
  104. be 2f
  105. mov %g3, %g2
  106. EX(st %g3, [%o0], sub %o1, 0)
  107. sub %o1, 4, %o1
  108. add %o0, 4, %o0
  109. 2:
  110. andcc %o1, 0xffffff80, %o3 ! Now everything is 8 aligned and o1 is len to run
  111. be 9f
  112. andcc %o1, 0x78, %o2
  113. 10:
  114. ZERO_BIG_BLOCK(%o0, 0x00, %g2)
  115. subcc %o3, 128, %o3
  116. ZERO_BIG_BLOCK(%o0, 0x40, %g2)
  117. bne 10b
  118. add %o0, 128, %o0
  119. orcc %o2, %g0, %g0
  120. 9:
  121. be 13f
  122. andcc %o1, 7, %o1
  123. srl %o2, 1, %o3
  124. set 13f, %o4
  125. sub %o4, %o3, %o4
  126. jmp %o4
  127. add %o0, %o2, %o0
  128. ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
  129. ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
  130. 13:
  131. be 8f
  132. andcc %o1, 4, %g0
  133. be 1f
  134. andcc %o1, 2, %g0
  135. EX(st %g3, [%o0], and %o1, 7)
  136. add %o0, 4, %o0
  137. 1:
  138. be 1f
  139. andcc %o1, 1, %g0
  140. EX(sth %g3, [%o0], and %o1, 3)
  141. add %o0, 2, %o0
  142. 1:
  143. bne,a 8f
  144. EX(stb %g3, [%o0], and %o1, 1)
  145. 8:
  146. b 0f
  147. nop
  148. 7:
  149. be 13b
  150. orcc %o1, 0, %g0
  151. be 0f
  152. 8:
  153. add %o0, 1, %o0
  154. subcc %o1, 1, %o1
  155. bne 8b
  156. EX(stb %g3, [%o0 - 1], add %o1, 1)
  157. 0:
  158. andcc %g4, 1, %g0
  159. be 5f
  160. nop
  161. retl
  162. mov %g1, %o0
  163. 5:
  164. retl
  165. clr %o0
  166. .section .fixup,#alloc,#execinstr
  167. .align 4
  168. 30:
  169. and %o1, 0x7f, %o1
  170. retl
  171. add %o3, %o1, %o0
  172. .globl __bzero_end
  173. __bzero_end: