xor.c 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Optimized xor_block operation for RAID4/5
  4. *
  5. * Copyright IBM Corp. 2016
  6. * Author(s): Martin Schwidefsky <[email protected]>
  7. */
  8. #include <linux/types.h>
  9. #include <linux/export.h>
  10. #include <linux/raid/xor.h>
  11. #include <asm/xor.h>
  12. static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
  13. const unsigned long * __restrict p2)
  14. {
  15. asm volatile(
  16. " larl 1,2f\n"
  17. " aghi %0,-1\n"
  18. " jm 3f\n"
  19. " srlg 0,%0,8\n"
  20. " ltgr 0,0\n"
  21. " jz 1f\n"
  22. "0: xc 0(256,%1),0(%2)\n"
  23. " la %1,256(%1)\n"
  24. " la %2,256(%2)\n"
  25. " brctg 0,0b\n"
  26. "1: ex %0,0(1)\n"
  27. " j 3f\n"
  28. "2: xc 0(1,%1),0(%2)\n"
  29. "3:\n"
  30. : : "d" (bytes), "a" (p1), "a" (p2)
  31. : "0", "1", "cc", "memory");
  32. }
  33. static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
  34. const unsigned long * __restrict p2,
  35. const unsigned long * __restrict p3)
  36. {
  37. asm volatile(
  38. " larl 1,2f\n"
  39. " aghi %0,-1\n"
  40. " jm 3f\n"
  41. " srlg 0,%0,8\n"
  42. " ltgr 0,0\n"
  43. " jz 1f\n"
  44. "0: xc 0(256,%1),0(%2)\n"
  45. " xc 0(256,%1),0(%3)\n"
  46. " la %1,256(%1)\n"
  47. " la %2,256(%2)\n"
  48. " la %3,256(%3)\n"
  49. " brctg 0,0b\n"
  50. "1: ex %0,0(1)\n"
  51. " ex %0,6(1)\n"
  52. " j 3f\n"
  53. "2: xc 0(1,%1),0(%2)\n"
  54. " xc 0(1,%1),0(%3)\n"
  55. "3:\n"
  56. : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
  57. : : "0", "1", "cc", "memory");
  58. }
  59. static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
  60. const unsigned long * __restrict p2,
  61. const unsigned long * __restrict p3,
  62. const unsigned long * __restrict p4)
  63. {
  64. asm volatile(
  65. " larl 1,2f\n"
  66. " aghi %0,-1\n"
  67. " jm 3f\n"
  68. " srlg 0,%0,8\n"
  69. " ltgr 0,0\n"
  70. " jz 1f\n"
  71. "0: xc 0(256,%1),0(%2)\n"
  72. " xc 0(256,%1),0(%3)\n"
  73. " xc 0(256,%1),0(%4)\n"
  74. " la %1,256(%1)\n"
  75. " la %2,256(%2)\n"
  76. " la %3,256(%3)\n"
  77. " la %4,256(%4)\n"
  78. " brctg 0,0b\n"
  79. "1: ex %0,0(1)\n"
  80. " ex %0,6(1)\n"
  81. " ex %0,12(1)\n"
  82. " j 3f\n"
  83. "2: xc 0(1,%1),0(%2)\n"
  84. " xc 0(1,%1),0(%3)\n"
  85. " xc 0(1,%1),0(%4)\n"
  86. "3:\n"
  87. : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
  88. : : "0", "1", "cc", "memory");
  89. }
  90. static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
  91. const unsigned long * __restrict p2,
  92. const unsigned long * __restrict p3,
  93. const unsigned long * __restrict p4,
  94. const unsigned long * __restrict p5)
  95. {
  96. asm volatile(
  97. " larl 1,2f\n"
  98. " aghi %0,-1\n"
  99. " jm 3f\n"
  100. " srlg 0,%0,8\n"
  101. " ltgr 0,0\n"
  102. " jz 1f\n"
  103. "0: xc 0(256,%1),0(%2)\n"
  104. " xc 0(256,%1),0(%3)\n"
  105. " xc 0(256,%1),0(%4)\n"
  106. " xc 0(256,%1),0(%5)\n"
  107. " la %1,256(%1)\n"
  108. " la %2,256(%2)\n"
  109. " la %3,256(%3)\n"
  110. " la %4,256(%4)\n"
  111. " la %5,256(%5)\n"
  112. " brctg 0,0b\n"
  113. "1: ex %0,0(1)\n"
  114. " ex %0,6(1)\n"
  115. " ex %0,12(1)\n"
  116. " ex %0,18(1)\n"
  117. " j 3f\n"
  118. "2: xc 0(1,%1),0(%2)\n"
  119. " xc 0(1,%1),0(%3)\n"
  120. " xc 0(1,%1),0(%4)\n"
  121. " xc 0(1,%1),0(%5)\n"
  122. "3:\n"
  123. : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
  124. "+a" (p5)
  125. : : "0", "1", "cc", "memory");
  126. }
  127. struct xor_block_template xor_block_xc = {
  128. .name = "xc",
  129. .do_2 = xor_xc_2,
  130. .do_3 = xor_xc_3,
  131. .do_4 = xor_xc_4,
  132. .do_5 = xor_xc_5,
  133. };
  134. EXPORT_SYMBOL(xor_block_xc);