xor.S 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. * arch/ia64/lib/xor.S
  4. *
  5. * Optimized RAID-5 checksumming functions for IA-64.
  6. */
  7. #include <asm/asmmacro.h>
  8. #include <asm/export.h>
  9. GLOBAL_ENTRY(xor_ia64_2)
  10. .prologue
  11. .fframe 0
  12. .save ar.pfs, r31
  13. alloc r31 = ar.pfs, 3, 0, 13, 16
  14. .save ar.lc, r30
  15. mov r30 = ar.lc
  16. .save pr, r29
  17. mov r29 = pr
  18. ;;
  19. .body
  20. mov r8 = in1
  21. mov ar.ec = 6 + 2
  22. shr in0 = in0, 3
  23. ;;
  24. adds in0 = -1, in0
  25. mov r16 = in1
  26. mov r17 = in2
  27. ;;
  28. mov ar.lc = in0
  29. mov pr.rot = 1 << 16
  30. ;;
  31. .rotr s1[6+1], s2[6+1], d[2]
  32. .rotp p[6+2]
  33. 0:
  34. (p[0]) ld8.nta s1[0] = [r16], 8
  35. (p[0]) ld8.nta s2[0] = [r17], 8
  36. (p[6]) xor d[0] = s1[6], s2[6]
  37. (p[6+1])st8.nta [r8] = d[1], 8
  38. nop.f 0
  39. br.ctop.dptk.few 0b
  40. ;;
  41. mov ar.lc = r30
  42. mov pr = r29, -1
  43. br.ret.sptk.few rp
  44. END(xor_ia64_2)
  45. EXPORT_SYMBOL(xor_ia64_2)
  46. GLOBAL_ENTRY(xor_ia64_3)
  47. .prologue
  48. .fframe 0
  49. .save ar.pfs, r31
  50. alloc r31 = ar.pfs, 4, 0, 20, 24
  51. .save ar.lc, r30
  52. mov r30 = ar.lc
  53. .save pr, r29
  54. mov r29 = pr
  55. ;;
  56. .body
  57. mov r8 = in1
  58. mov ar.ec = 6 + 2
  59. shr in0 = in0, 3
  60. ;;
  61. adds in0 = -1, in0
  62. mov r16 = in1
  63. mov r17 = in2
  64. ;;
  65. mov r18 = in3
  66. mov ar.lc = in0
  67. mov pr.rot = 1 << 16
  68. ;;
  69. .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  70. .rotp p[6+2]
  71. 0:
  72. (p[0]) ld8.nta s1[0] = [r16], 8
  73. (p[0]) ld8.nta s2[0] = [r17], 8
  74. (p[6]) xor d[0] = s1[6], s2[6]
  75. ;;
  76. (p[0]) ld8.nta s3[0] = [r18], 8
  77. (p[6+1])st8.nta [r8] = d[1], 8
  78. (p[6]) xor d[0] = d[0], s3[6]
  79. br.ctop.dptk.few 0b
  80. ;;
  81. mov ar.lc = r30
  82. mov pr = r29, -1
  83. br.ret.sptk.few rp
  84. END(xor_ia64_3)
  85. EXPORT_SYMBOL(xor_ia64_3)
  86. GLOBAL_ENTRY(xor_ia64_4)
  87. .prologue
  88. .fframe 0
  89. .save ar.pfs, r31
  90. alloc r31 = ar.pfs, 5, 0, 27, 32
  91. .save ar.lc, r30
  92. mov r30 = ar.lc
  93. .save pr, r29
  94. mov r29 = pr
  95. ;;
  96. .body
  97. mov r8 = in1
  98. mov ar.ec = 6 + 2
  99. shr in0 = in0, 3
  100. ;;
  101. adds in0 = -1, in0
  102. mov r16 = in1
  103. mov r17 = in2
  104. ;;
  105. mov r18 = in3
  106. mov ar.lc = in0
  107. mov pr.rot = 1 << 16
  108. mov r19 = in4
  109. ;;
  110. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
  111. .rotp p[6+2]
  112. 0:
  113. (p[0]) ld8.nta s1[0] = [r16], 8
  114. (p[0]) ld8.nta s2[0] = [r17], 8
  115. (p[6]) xor d[0] = s1[6], s2[6]
  116. (p[0]) ld8.nta s3[0] = [r18], 8
  117. (p[0]) ld8.nta s4[0] = [r19], 8
  118. (p[6]) xor r20 = s3[6], s4[6]
  119. ;;
  120. (p[6+1])st8.nta [r8] = d[1], 8
  121. (p[6]) xor d[0] = d[0], r20
  122. br.ctop.dptk.few 0b
  123. ;;
  124. mov ar.lc = r30
  125. mov pr = r29, -1
  126. br.ret.sptk.few rp
  127. END(xor_ia64_4)
  128. EXPORT_SYMBOL(xor_ia64_4)
  129. GLOBAL_ENTRY(xor_ia64_5)
  130. .prologue
  131. .fframe 0
  132. .save ar.pfs, r31
  133. alloc r31 = ar.pfs, 6, 0, 34, 40
  134. .save ar.lc, r30
  135. mov r30 = ar.lc
  136. .save pr, r29
  137. mov r29 = pr
  138. ;;
  139. .body
  140. mov r8 = in1
  141. mov ar.ec = 6 + 2
  142. shr in0 = in0, 3
  143. ;;
  144. adds in0 = -1, in0
  145. mov r16 = in1
  146. mov r17 = in2
  147. ;;
  148. mov r18 = in3
  149. mov ar.lc = in0
  150. mov pr.rot = 1 << 16
  151. mov r19 = in4
  152. mov r20 = in5
  153. ;;
  154. .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
  155. .rotp p[6+2]
  156. 0:
  157. (p[0]) ld8.nta s1[0] = [r16], 8
  158. (p[0]) ld8.nta s2[0] = [r17], 8
  159. (p[6]) xor d[0] = s1[6], s2[6]
  160. (p[0]) ld8.nta s3[0] = [r18], 8
  161. (p[0]) ld8.nta s4[0] = [r19], 8
  162. (p[6]) xor r21 = s3[6], s4[6]
  163. ;;
  164. (p[0]) ld8.nta s5[0] = [r20], 8
  165. (p[6+1])st8.nta [r8] = d[1], 8
  166. (p[6]) xor d[0] = d[0], r21
  167. ;;
  168. (p[6]) xor d[0] = d[0], s5[6]
  169. nop.f 0
  170. br.ctop.dptk.few 0b
  171. ;;
  172. mov ar.lc = r30
  173. mov pr = r29, -1
  174. br.ret.sptk.few rp
  175. END(xor_ia64_5)
  176. EXPORT_SYMBOL(xor_ia64_5)