csumpartialcopygeneric.S 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * linux/arch/arm/lib/csumpartialcopygeneric.S
  4. *
  5. * Copyright (C) 1995-2001 Russell King
  6. */
  7. #include <asm/assembler.h>
  8. /*
  9. * unsigned int
  10. * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
  11. * r0 = src, r1 = dst, r2 = len, r3 = sum
  12. * Returns : r0 = checksum
  13. *
  14. * Note that 'tst' and 'teq' preserve the carry flag.
  15. */
  16. src .req r0
  17. dst .req r1
  18. len .req r2
  19. sum .req r3
  20. .Lzero: mov r0, sum
  21. load_regs
  22. /*
  23. * Align an unaligned destination pointer. We know that
  24. * we have >= 8 bytes here, so we don't need to check
  25. * the length. Note that the source pointer hasn't been
  26. * aligned yet.
  27. */
  28. .Ldst_unaligned:
  29. tst dst, #1
  30. beq .Ldst_16bit
  31. load1b ip
  32. sub len, len, #1
  33. adcs sum, sum, ip, put_byte_1 @ update checksum
  34. strb ip, [dst], #1
  35. tst dst, #2
  36. reteq lr @ dst is now 32bit aligned
  37. .Ldst_16bit: load2b r8, ip
  38. sub len, len, #2
  39. adcs sum, sum, r8, put_byte_0
  40. strb r8, [dst], #1
  41. adcs sum, sum, ip, put_byte_1
  42. strb ip, [dst], #1
  43. ret lr @ dst is now 32bit aligned
  44. /*
  45. * Handle 0 to 7 bytes, with any alignment of source and
  46. * destination pointers. Note that when we get here, C = 0
  47. */
  48. .Lless8: teq len, #0 @ check for zero count
  49. beq .Lzero
  50. /* we must have at least one byte. */
  51. tst dst, #1 @ dst 16-bit aligned
  52. beq .Lless8_aligned
  53. /* Align dst */
  54. load1b ip
  55. sub len, len, #1
  56. adcs sum, sum, ip, put_byte_1 @ update checksum
  57. strb ip, [dst], #1
  58. tst len, #6
  59. beq .Lless8_byteonly
  60. 1: load2b r8, ip
  61. sub len, len, #2
  62. adcs sum, sum, r8, put_byte_0
  63. strb r8, [dst], #1
  64. adcs sum, sum, ip, put_byte_1
  65. strb ip, [dst], #1
  66. .Lless8_aligned:
  67. tst len, #6
  68. bne 1b
  69. .Lless8_byteonly:
  70. tst len, #1
  71. beq .Ldone
  72. load1b r8
  73. adcs sum, sum, r8, put_byte_0 @ update checksum
  74. strb r8, [dst], #1
  75. b .Ldone
  76. FN_ENTRY
  77. save_regs
  78. mov sum, #-1
  79. cmp len, #8 @ Ensure that we have at least
  80. blo .Lless8 @ 8 bytes to copy.
  81. adds sum, sum, #0 @ C = 0
  82. tst dst, #3 @ Test destination alignment
  83. blne .Ldst_unaligned @ align destination, return here
  84. /*
  85. * Ok, the dst pointer is now 32bit aligned, and we know
  86. * that we must have more than 4 bytes to copy. Note
  87. * that C contains the carry from the dst alignment above.
  88. */
  89. tst src, #3 @ Test source alignment
  90. bne .Lsrc_not_aligned
  91. /* Routine for src & dst aligned */
  92. bics ip, len, #15
  93. beq 2f
  94. 1: load4l r4, r5, r6, r7
  95. stmia dst!, {r4, r5, r6, r7}
  96. adcs sum, sum, r4
  97. adcs sum, sum, r5
  98. adcs sum, sum, r6
  99. adcs sum, sum, r7
  100. sub ip, ip, #16
  101. teq ip, #0
  102. bne 1b
  103. 2: ands ip, len, #12
  104. beq 4f
  105. tst ip, #8
  106. beq 3f
  107. load2l r4, r5
  108. stmia dst!, {r4, r5}
  109. adcs sum, sum, r4
  110. adcs sum, sum, r5
  111. tst ip, #4
  112. beq 4f
  113. 3: load1l r4
  114. str r4, [dst], #4
  115. adcs sum, sum, r4
  116. 4: ands len, len, #3
  117. beq .Ldone
  118. load1l r4
  119. tst len, #2
  120. mov r5, r4, get_byte_0
  121. beq .Lexit
  122. adcs sum, sum, r4, lspush #16
  123. strb r5, [dst], #1
  124. mov r5, r4, get_byte_1
  125. strb r5, [dst], #1
  126. mov r5, r4, get_byte_2
  127. .Lexit: tst len, #1
  128. strbne r5, [dst], #1
  129. andne r5, r5, #255
  130. adcsne sum, sum, r5, put_byte_0
  131. /*
  132. * If the dst pointer was not 16-bit aligned, we
  133. * need to rotate the checksum here to get around
  134. * the inefficient byte manipulations in the
  135. * architecture independent code.
  136. */
  137. .Ldone: adc r0, sum, #0
  138. ldr sum, [sp, #0] @ dst
  139. tst sum, #1
  140. movne r0, r0, ror #8
  141. load_regs
  142. .Lsrc_not_aligned:
  143. adc sum, sum, #0 @ include C from dst alignment
  144. and ip, src, #3
  145. bic src, src, #3
  146. load1l r5
  147. cmp ip, #2
  148. beq .Lsrc2_aligned
  149. bhi .Lsrc3_aligned
  150. mov r4, r5, lspull #8 @ C = 0
  151. bics ip, len, #15
  152. beq 2f
  153. 1: load4l r5, r6, r7, r8
  154. orr r4, r4, r5, lspush #24
  155. mov r5, r5, lspull #8
  156. orr r5, r5, r6, lspush #24
  157. mov r6, r6, lspull #8
  158. orr r6, r6, r7, lspush #24
  159. mov r7, r7, lspull #8
  160. orr r7, r7, r8, lspush #24
  161. stmia dst!, {r4, r5, r6, r7}
  162. adcs sum, sum, r4
  163. adcs sum, sum, r5
  164. adcs sum, sum, r6
  165. adcs sum, sum, r7
  166. mov r4, r8, lspull #8
  167. sub ip, ip, #16
  168. teq ip, #0
  169. bne 1b
  170. 2: ands ip, len, #12
  171. beq 4f
  172. tst ip, #8
  173. beq 3f
  174. load2l r5, r6
  175. orr r4, r4, r5, lspush #24
  176. mov r5, r5, lspull #8
  177. orr r5, r5, r6, lspush #24
  178. stmia dst!, {r4, r5}
  179. adcs sum, sum, r4
  180. adcs sum, sum, r5
  181. mov r4, r6, lspull #8
  182. tst ip, #4
  183. beq 4f
  184. 3: load1l r5
  185. orr r4, r4, r5, lspush #24
  186. str r4, [dst], #4
  187. adcs sum, sum, r4
  188. mov r4, r5, lspull #8
  189. 4: ands len, len, #3
  190. beq .Ldone
  191. mov r5, r4, get_byte_0
  192. tst len, #2
  193. beq .Lexit
  194. adcs sum, sum, r4, lspush #16
  195. strb r5, [dst], #1
  196. mov r5, r4, get_byte_1
  197. strb r5, [dst], #1
  198. mov r5, r4, get_byte_2
  199. b .Lexit
  200. .Lsrc2_aligned: mov r4, r5, lspull #16
  201. adds sum, sum, #0
  202. bics ip, len, #15
  203. beq 2f
  204. 1: load4l r5, r6, r7, r8
  205. orr r4, r4, r5, lspush #16
  206. mov r5, r5, lspull #16
  207. orr r5, r5, r6, lspush #16
  208. mov r6, r6, lspull #16
  209. orr r6, r6, r7, lspush #16
  210. mov r7, r7, lspull #16
  211. orr r7, r7, r8, lspush #16
  212. stmia dst!, {r4, r5, r6, r7}
  213. adcs sum, sum, r4
  214. adcs sum, sum, r5
  215. adcs sum, sum, r6
  216. adcs sum, sum, r7
  217. mov r4, r8, lspull #16
  218. sub ip, ip, #16
  219. teq ip, #0
  220. bne 1b
  221. 2: ands ip, len, #12
  222. beq 4f
  223. tst ip, #8
  224. beq 3f
  225. load2l r5, r6
  226. orr r4, r4, r5, lspush #16
  227. mov r5, r5, lspull #16
  228. orr r5, r5, r6, lspush #16
  229. stmia dst!, {r4, r5}
  230. adcs sum, sum, r4
  231. adcs sum, sum, r5
  232. mov r4, r6, lspull #16
  233. tst ip, #4
  234. beq 4f
  235. 3: load1l r5
  236. orr r4, r4, r5, lspush #16
  237. str r4, [dst], #4
  238. adcs sum, sum, r4
  239. mov r4, r5, lspull #16
  240. 4: ands len, len, #3
  241. beq .Ldone
  242. mov r5, r4, get_byte_0
  243. tst len, #2
  244. beq .Lexit
  245. adcs sum, sum, r4
  246. strb r5, [dst], #1
  247. mov r5, r4, get_byte_1
  248. strb r5, [dst], #1
  249. tst len, #1
  250. beq .Ldone
  251. load1b r5
  252. b .Lexit
  253. .Lsrc3_aligned: mov r4, r5, lspull #24
  254. adds sum, sum, #0
  255. bics ip, len, #15
  256. beq 2f
  257. 1: load4l r5, r6, r7, r8
  258. orr r4, r4, r5, lspush #8
  259. mov r5, r5, lspull #24
  260. orr r5, r5, r6, lspush #8
  261. mov r6, r6, lspull #24
  262. orr r6, r6, r7, lspush #8
  263. mov r7, r7, lspull #24
  264. orr r7, r7, r8, lspush #8
  265. stmia dst!, {r4, r5, r6, r7}
  266. adcs sum, sum, r4
  267. adcs sum, sum, r5
  268. adcs sum, sum, r6
  269. adcs sum, sum, r7
  270. mov r4, r8, lspull #24
  271. sub ip, ip, #16
  272. teq ip, #0
  273. bne 1b
  274. 2: ands ip, len, #12
  275. beq 4f
  276. tst ip, #8
  277. beq 3f
  278. load2l r5, r6
  279. orr r4, r4, r5, lspush #8
  280. mov r5, r5, lspull #24
  281. orr r5, r5, r6, lspush #8
  282. stmia dst!, {r4, r5}
  283. adcs sum, sum, r4
  284. adcs sum, sum, r5
  285. mov r4, r6, lspull #24
  286. tst ip, #4
  287. beq 4f
  288. 3: load1l r5
  289. orr r4, r4, r5, lspush #8
  290. str r4, [dst], #4
  291. adcs sum, sum, r4
  292. mov r4, r5, lspull #24
  293. 4: ands len, len, #3
  294. beq .Ldone
  295. mov r5, r4, get_byte_0
  296. tst len, #2
  297. beq .Lexit
  298. strb r5, [dst], #1
  299. adcs sum, sum, r4
  300. load1l r4
  301. mov r5, r4, get_byte_0
  302. strb r5, [dst], #1
  303. adcs sum, sum, r4, lspush #24
  304. mov r5, r4, get_byte_1
  305. b .Lexit
  306. FN_EXIT