usercopy.S 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /*
  2. * arch/xtensa/lib/usercopy.S
  3. *
  4. * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
  5. *
  6. * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
  7. * It needs to remain separate and distinct. The hal files are part
  8. * of the Xtensa link-time HAL, and those files may differ per
  9. * processor configuration. Patching the kernel for another
  10. * processor configuration includes replacing the hal files, and we
  11. * could lose the special functionality for accessing user-space
  12. * memory during such a patch. We sacrifice a little code space here
  13. * in favor to simplify code maintenance.
  14. *
  15. * This file is subject to the terms and conditions of the GNU General
  16. * Public License. See the file "COPYING" in the main directory of
  17. * this archive for more details.
  18. *
  19. * Copyright (C) 2002 Tensilica Inc.
  20. */
  21. /*
  22. * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  23. *
  24. * The returned value is the number of bytes not copied. Implies zero
  25. * is success.
  26. *
  27. * The general case algorithm is as follows:
  28. * If the destination and source are both aligned,
  29. * do 16B chunks with a loop, and then finish up with
  30. * 8B, 4B, 2B, and 1B copies conditional on the length.
  31. * If destination is aligned and source unaligned,
  32. * do the same, but use SRC to align the source data.
  33. * If destination is unaligned, align it by conditionally
  34. * copying 1B and 2B and then retest.
  35. * This code tries to use fall-through braches for the common
  36. * case of aligned destinations (except for the branches to
  37. * the alignment label).
  38. *
  39. * Register use:
  40. * a0/ return address
  41. * a1/ stack pointer
  42. * a2/ return value
  43. * a3/ src
  44. * a4/ length
  45. * a5/ dst
  46. * a6/ tmp
  47. * a7/ tmp
  48. * a8/ tmp
  49. * a9/ tmp
  50. * a10/ tmp
  51. * a11/ original length
  52. */
  53. #include <linux/linkage.h>
  54. #include <asm/asmmacro.h>
  55. #include <asm/core.h>
  56. .text
  57. ENTRY(__xtensa_copy_user)
  58. #if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__)
  59. #define STACK_SIZE 4
  60. #else
  61. #define STACK_SIZE 0
  62. #endif
  63. abi_entry(STACK_SIZE)
  64. # a2/ dst, a3/ src, a4/ len
  65. mov a5, a2 # copy dst so that a2 is return value
  66. mov a11, a4 # preserve original len for error case
  67. .Lcommon:
  68. bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  69. bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  70. .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
  71. srli a7, a4, 4 # number of loop iterations with 16B
  72. # per iteration
  73. movi a8, 3 # if source is also aligned,
  74. bnone a3, a8, .Laligned # then use word copy
  75. __ssa8 a3 # set shift amount from byte offset
  76. bnez a4, .Lsrcunaligned
  77. movi a2, 0 # return success for len==0
  78. abi_ret(STACK_SIZE)
  79. /*
  80. * Destination is unaligned
  81. */
  82. .Ldst1mod2: # dst is only byte aligned
  83. bltui a4, 7, .Lbytecopy # do short copies byte by byte
  84. # copy 1 byte
  85. EX(10f) l8ui a6, a3, 0
  86. addi a3, a3, 1
  87. EX(10f) s8i a6, a5, 0
  88. addi a5, a5, 1
  89. addi a4, a4, -1
  90. bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
  91. # return to main algorithm
  92. .Ldst2mod4: # dst 16-bit aligned
  93. # copy 2 bytes
  94. bltui a4, 6, .Lbytecopy # do short copies byte by byte
  95. EX(10f) l8ui a6, a3, 0
  96. EX(10f) l8ui a7, a3, 1
  97. addi a3, a3, 2
  98. EX(10f) s8i a6, a5, 0
  99. EX(10f) s8i a7, a5, 1
  100. addi a5, a5, 2
  101. addi a4, a4, -2
  102. j .Ldstaligned # dst is now aligned, return to main algorithm
  103. /*
  104. * Byte by byte copy
  105. */
  106. .align 4
  107. .byte 0 # 1 mod 4 alignment for LOOPNEZ
  108. # (0 mod 4 alignment for LBEG)
  109. .Lbytecopy:
  110. #if XCHAL_HAVE_LOOPS
  111. loopnez a4, .Lbytecopydone
  112. #else /* !XCHAL_HAVE_LOOPS */
  113. beqz a4, .Lbytecopydone
  114. add a7, a3, a4 # a7 = end address for source
  115. #endif /* !XCHAL_HAVE_LOOPS */
  116. .Lnextbyte:
  117. EX(10f) l8ui a6, a3, 0
  118. addi a3, a3, 1
  119. EX(10f) s8i a6, a5, 0
  120. addi a5, a5, 1
  121. #if !XCHAL_HAVE_LOOPS
  122. blt a3, a7, .Lnextbyte
  123. #endif /* !XCHAL_HAVE_LOOPS */
  124. .Lbytecopydone:
  125. movi a2, 0 # return success for len bytes copied
  126. abi_ret(STACK_SIZE)
  127. /*
  128. * Destination and source are word-aligned.
  129. */
  130. # copy 16 bytes per iteration for word-aligned dst and word-aligned src
  131. .align 4 # 1 mod 4 alignment for LOOPNEZ
  132. .byte 0 # (0 mod 4 alignment for LBEG)
  133. .Laligned:
  134. #if XCHAL_HAVE_LOOPS
  135. loopnez a7, .Loop1done
  136. #else /* !XCHAL_HAVE_LOOPS */
  137. beqz a7, .Loop1done
  138. slli a8, a7, 4
  139. add a8, a8, a3 # a8 = end of last 16B source chunk
  140. #endif /* !XCHAL_HAVE_LOOPS */
  141. .Loop1:
  142. EX(10f) l32i a6, a3, 0
  143. EX(10f) l32i a7, a3, 4
  144. EX(10f) s32i a6, a5, 0
  145. EX(10f) l32i a6, a3, 8
  146. EX(10f) s32i a7, a5, 4
  147. EX(10f) l32i a7, a3, 12
  148. EX(10f) s32i a6, a5, 8
  149. addi a3, a3, 16
  150. EX(10f) s32i a7, a5, 12
  151. addi a5, a5, 16
  152. #if !XCHAL_HAVE_LOOPS
  153. blt a3, a8, .Loop1
  154. #endif /* !XCHAL_HAVE_LOOPS */
  155. .Loop1done:
  156. bbci.l a4, 3, .L2
  157. # copy 8 bytes
  158. EX(10f) l32i a6, a3, 0
  159. EX(10f) l32i a7, a3, 4
  160. addi a3, a3, 8
  161. EX(10f) s32i a6, a5, 0
  162. EX(10f) s32i a7, a5, 4
  163. addi a5, a5, 8
  164. .L2:
  165. bbci.l a4, 2, .L3
  166. # copy 4 bytes
  167. EX(10f) l32i a6, a3, 0
  168. addi a3, a3, 4
  169. EX(10f) s32i a6, a5, 0
  170. addi a5, a5, 4
  171. .L3:
  172. bbci.l a4, 1, .L4
  173. # copy 2 bytes
  174. EX(10f) l16ui a6, a3, 0
  175. addi a3, a3, 2
  176. EX(10f) s16i a6, a5, 0
  177. addi a5, a5, 2
  178. .L4:
  179. bbci.l a4, 0, .L5
  180. # copy 1 byte
  181. EX(10f) l8ui a6, a3, 0
  182. EX(10f) s8i a6, a5, 0
  183. .L5:
  184. movi a2, 0 # return success for len bytes copied
  185. abi_ret(STACK_SIZE)
  186. /*
  187. * Destination is aligned, Source is unaligned
  188. */
  189. .align 4
  190. .byte 0 # 1 mod 4 alignement for LOOPNEZ
  191. # (0 mod 4 alignment for LBEG)
  192. .Lsrcunaligned:
  193. # copy 16 bytes per iteration for word-aligned dst and unaligned src
  194. and a10, a3, a8 # save unalignment offset for below
  195. sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
  196. EX(10f) l32i a6, a3, 0 # load first word
  197. #if XCHAL_HAVE_LOOPS
  198. loopnez a7, .Loop2done
  199. #else /* !XCHAL_HAVE_LOOPS */
  200. beqz a7, .Loop2done
  201. #if defined(__XTENSA_CALL0_ABI__)
  202. s32i a10, a1, 0
  203. slli a10, a7, 4
  204. add a10, a10, a3 # a10 = end of last 16B source chunk
  205. #else
  206. slli a12, a7, 4
  207. add a12, a12, a3 # a12 = end of last 16B source chunk
  208. #endif
  209. #endif /* !XCHAL_HAVE_LOOPS */
  210. .Loop2:
  211. EX(10f) l32i a7, a3, 4
  212. EX(10f) l32i a8, a3, 8
  213. __src_b a6, a6, a7
  214. EX(10f) s32i a6, a5, 0
  215. EX(10f) l32i a9, a3, 12
  216. __src_b a7, a7, a8
  217. EX(10f) s32i a7, a5, 4
  218. EX(10f) l32i a6, a3, 16
  219. __src_b a8, a8, a9
  220. EX(10f) s32i a8, a5, 8
  221. addi a3, a3, 16
  222. __src_b a9, a9, a6
  223. EX(10f) s32i a9, a5, 12
  224. addi a5, a5, 16
  225. #if !XCHAL_HAVE_LOOPS
  226. #if defined(__XTENSA_CALL0_ABI__)
  227. blt a3, a10, .Loop2
  228. l32i a10, a1, 0
  229. #else
  230. blt a3, a12, .Loop2
  231. #endif
  232. #endif /* !XCHAL_HAVE_LOOPS */
  233. .Loop2done:
  234. bbci.l a4, 3, .L12
  235. # copy 8 bytes
  236. EX(10f) l32i a7, a3, 4
  237. EX(10f) l32i a8, a3, 8
  238. __src_b a6, a6, a7
  239. EX(10f) s32i a6, a5, 0
  240. addi a3, a3, 8
  241. __src_b a7, a7, a8
  242. EX(10f) s32i a7, a5, 4
  243. addi a5, a5, 8
  244. mov a6, a8
  245. .L12:
  246. bbci.l a4, 2, .L13
  247. # copy 4 bytes
  248. EX(10f) l32i a7, a3, 4
  249. addi a3, a3, 4
  250. __src_b a6, a6, a7
  251. EX(10f) s32i a6, a5, 0
  252. addi a5, a5, 4
  253. mov a6, a7
  254. .L13:
  255. add a3, a3, a10 # readjust a3 with correct misalignment
  256. bbci.l a4, 1, .L14
  257. # copy 2 bytes
  258. EX(10f) l8ui a6, a3, 0
  259. EX(10f) l8ui a7, a3, 1
  260. addi a3, a3, 2
  261. EX(10f) s8i a6, a5, 0
  262. EX(10f) s8i a7, a5, 1
  263. addi a5, a5, 2
  264. .L14:
  265. bbci.l a4, 0, .L15
  266. # copy 1 byte
  267. EX(10f) l8ui a6, a3, 0
  268. EX(10f) s8i a6, a5, 0
  269. .L15:
  270. movi a2, 0 # return success for len bytes copied
  271. abi_ret(STACK_SIZE)
  272. ENDPROC(__xtensa_copy_user)
  273. .section .fixup, "ax"
  274. .align 4
  275. /* a2 = original dst; a5 = current dst; a11= original len
  276. * bytes_copied = a5 - a2
  277. * retval = bytes_not_copied = original len - bytes_copied
  278. * retval = a11 - (a5 - a2)
  279. */
  280. 10:
  281. sub a2, a5, a2 /* a2 <-- bytes copied */
  282. sub a2, a11, a2 /* a2 <-- bytes not copied */
  283. abi_ret(STACK_SIZE)