memcpy_32.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/string.h>
  3. #include <linux/export.h>
  4. #undef memcpy
  5. #undef memset
  6. #undef memmove
  7. __visible void *memcpy(void *to, const void *from, size_t n)
  8. {
  9. return __memcpy(to, from, n);
  10. }
  11. EXPORT_SYMBOL(memcpy);
  12. __visible void *memset(void *s, int c, size_t count)
  13. {
  14. return __memset(s, c, count);
  15. }
  16. EXPORT_SYMBOL(memset);
  17. __visible void *memmove(void *dest, const void *src, size_t n)
  18. {
  19. int d0,d1,d2,d3,d4,d5;
  20. char *ret = dest;
  21. __asm__ __volatile__(
  22. /* Handle more 16 bytes in loop */
  23. "cmp $0x10, %0\n\t"
  24. "jb 1f\n\t"
  25. /* Decide forward/backward copy mode */
  26. "cmp %2, %1\n\t"
  27. "jb 2f\n\t"
  28. /*
  29. * movs instruction have many startup latency
  30. * so we handle small size by general register.
  31. */
  32. "cmp $680, %0\n\t"
  33. "jb 3f\n\t"
  34. /*
  35. * movs instruction is only good for aligned case.
  36. */
  37. "mov %1, %3\n\t"
  38. "xor %2, %3\n\t"
  39. "and $0xff, %3\n\t"
  40. "jz 4f\n\t"
  41. "3:\n\t"
  42. "sub $0x10, %0\n\t"
  43. /*
  44. * We gobble 16 bytes forward in each loop.
  45. */
  46. "3:\n\t"
  47. "sub $0x10, %0\n\t"
  48. "mov 0*4(%1), %3\n\t"
  49. "mov 1*4(%1), %4\n\t"
  50. "mov %3, 0*4(%2)\n\t"
  51. "mov %4, 1*4(%2)\n\t"
  52. "mov 2*4(%1), %3\n\t"
  53. "mov 3*4(%1), %4\n\t"
  54. "mov %3, 2*4(%2)\n\t"
  55. "mov %4, 3*4(%2)\n\t"
  56. "lea 0x10(%1), %1\n\t"
  57. "lea 0x10(%2), %2\n\t"
  58. "jae 3b\n\t"
  59. "add $0x10, %0\n\t"
  60. "jmp 1f\n\t"
  61. /*
  62. * Handle data forward by movs.
  63. */
  64. ".p2align 4\n\t"
  65. "4:\n\t"
  66. "mov -4(%1, %0), %3\n\t"
  67. "lea -4(%2, %0), %4\n\t"
  68. "shr $2, %0\n\t"
  69. "rep movsl\n\t"
  70. "mov %3, (%4)\n\t"
  71. "jmp 11f\n\t"
  72. /*
  73. * Handle data backward by movs.
  74. */
  75. ".p2align 4\n\t"
  76. "6:\n\t"
  77. "mov (%1), %3\n\t"
  78. "mov %2, %4\n\t"
  79. "lea -4(%1, %0), %1\n\t"
  80. "lea -4(%2, %0), %2\n\t"
  81. "shr $2, %0\n\t"
  82. "std\n\t"
  83. "rep movsl\n\t"
  84. "mov %3,(%4)\n\t"
  85. "cld\n\t"
  86. "jmp 11f\n\t"
  87. /*
  88. * Start to prepare for backward copy.
  89. */
  90. ".p2align 4\n\t"
  91. "2:\n\t"
  92. "cmp $680, %0\n\t"
  93. "jb 5f\n\t"
  94. "mov %1, %3\n\t"
  95. "xor %2, %3\n\t"
  96. "and $0xff, %3\n\t"
  97. "jz 6b\n\t"
  98. /*
  99. * Calculate copy position to tail.
  100. */
  101. "5:\n\t"
  102. "add %0, %1\n\t"
  103. "add %0, %2\n\t"
  104. "sub $0x10, %0\n\t"
  105. /*
  106. * We gobble 16 bytes backward in each loop.
  107. */
  108. "7:\n\t"
  109. "sub $0x10, %0\n\t"
  110. "mov -1*4(%1), %3\n\t"
  111. "mov -2*4(%1), %4\n\t"
  112. "mov %3, -1*4(%2)\n\t"
  113. "mov %4, -2*4(%2)\n\t"
  114. "mov -3*4(%1), %3\n\t"
  115. "mov -4*4(%1), %4\n\t"
  116. "mov %3, -3*4(%2)\n\t"
  117. "mov %4, -4*4(%2)\n\t"
  118. "lea -0x10(%1), %1\n\t"
  119. "lea -0x10(%2), %2\n\t"
  120. "jae 7b\n\t"
  121. /*
  122. * Calculate copy position to head.
  123. */
  124. "add $0x10, %0\n\t"
  125. "sub %0, %1\n\t"
  126. "sub %0, %2\n\t"
  127. /*
  128. * Move data from 8 bytes to 15 bytes.
  129. */
  130. ".p2align 4\n\t"
  131. "1:\n\t"
  132. "cmp $8, %0\n\t"
  133. "jb 8f\n\t"
  134. "mov 0*4(%1), %3\n\t"
  135. "mov 1*4(%1), %4\n\t"
  136. "mov -2*4(%1, %0), %5\n\t"
  137. "mov -1*4(%1, %0), %1\n\t"
  138. "mov %3, 0*4(%2)\n\t"
  139. "mov %4, 1*4(%2)\n\t"
  140. "mov %5, -2*4(%2, %0)\n\t"
  141. "mov %1, -1*4(%2, %0)\n\t"
  142. "jmp 11f\n\t"
  143. /*
  144. * Move data from 4 bytes to 7 bytes.
  145. */
  146. ".p2align 4\n\t"
  147. "8:\n\t"
  148. "cmp $4, %0\n\t"
  149. "jb 9f\n\t"
  150. "mov 0*4(%1), %3\n\t"
  151. "mov -1*4(%1, %0), %4\n\t"
  152. "mov %3, 0*4(%2)\n\t"
  153. "mov %4, -1*4(%2, %0)\n\t"
  154. "jmp 11f\n\t"
  155. /*
  156. * Move data from 2 bytes to 3 bytes.
  157. */
  158. ".p2align 4\n\t"
  159. "9:\n\t"
  160. "cmp $2, %0\n\t"
  161. "jb 10f\n\t"
  162. "movw 0*2(%1), %%dx\n\t"
  163. "movw -1*2(%1, %0), %%bx\n\t"
  164. "movw %%dx, 0*2(%2)\n\t"
  165. "movw %%bx, -1*2(%2, %0)\n\t"
  166. "jmp 11f\n\t"
  167. /*
  168. * Move data for 1 byte.
  169. */
  170. ".p2align 4\n\t"
  171. "10:\n\t"
  172. "cmp $1, %0\n\t"
  173. "jb 11f\n\t"
  174. "movb (%1), %%cl\n\t"
  175. "movb %%cl, (%2)\n\t"
  176. ".p2align 4\n\t"
  177. "11:"
  178. : "=&c" (d0), "=&S" (d1), "=&D" (d2),
  179. "=r" (d3),"=r" (d4), "=r"(d5)
  180. :"0" (n),
  181. "1" (src),
  182. "2" (dest)
  183. :"memory");
  184. return ret;
  185. }
  186. EXPORT_SYMBOL(memmove);