xor_vmx.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. *
  4. * Copyright (C) IBM Corporation, 2012
  5. *
  6. * Author: Anton Blanchard <[email protected]>
  7. */
  8. /*
  9. * Sparse (as at v0.5.0) gets very, very confused by this file.
  10. * Make it a bit simpler for it.
  11. */
  12. #if !defined(__CHECKER__)
  13. #include <altivec.h>
  14. #else
  15. #define vec_xor(a, b) a ^ b
  16. #define vector __attribute__((vector_size(16)))
  17. #endif
  18. #include "xor_vmx.h"
  19. typedef vector signed char unative_t;
  20. #define DEFINE(V) \
  21. unative_t *V = (unative_t *)V##_in; \
  22. unative_t V##_0, V##_1, V##_2, V##_3
  23. #define LOAD(V) \
  24. do { \
  25. V##_0 = V[0]; \
  26. V##_1 = V[1]; \
  27. V##_2 = V[2]; \
  28. V##_3 = V[3]; \
  29. } while (0)
  30. #define STORE(V) \
  31. do { \
  32. V[0] = V##_0; \
  33. V[1] = V##_1; \
  34. V[2] = V##_2; \
  35. V[3] = V##_3; \
  36. } while (0)
  37. #define XOR(V1, V2) \
  38. do { \
  39. V1##_0 = vec_xor(V1##_0, V2##_0); \
  40. V1##_1 = vec_xor(V1##_1, V2##_1); \
  41. V1##_2 = vec_xor(V1##_2, V2##_2); \
  42. V1##_3 = vec_xor(V1##_3, V2##_3); \
  43. } while (0)
  44. void __xor_altivec_2(unsigned long bytes,
  45. unsigned long * __restrict v1_in,
  46. const unsigned long * __restrict v2_in)
  47. {
  48. DEFINE(v1);
  49. DEFINE(v2);
  50. unsigned long lines = bytes / (sizeof(unative_t)) / 4;
  51. do {
  52. LOAD(v1);
  53. LOAD(v2);
  54. XOR(v1, v2);
  55. STORE(v1);
  56. v1 += 4;
  57. v2 += 4;
  58. } while (--lines > 0);
  59. }
  60. void __xor_altivec_3(unsigned long bytes,
  61. unsigned long * __restrict v1_in,
  62. const unsigned long * __restrict v2_in,
  63. const unsigned long * __restrict v3_in)
  64. {
  65. DEFINE(v1);
  66. DEFINE(v2);
  67. DEFINE(v3);
  68. unsigned long lines = bytes / (sizeof(unative_t)) / 4;
  69. do {
  70. LOAD(v1);
  71. LOAD(v2);
  72. LOAD(v3);
  73. XOR(v1, v2);
  74. XOR(v1, v3);
  75. STORE(v1);
  76. v1 += 4;
  77. v2 += 4;
  78. v3 += 4;
  79. } while (--lines > 0);
  80. }
  81. void __xor_altivec_4(unsigned long bytes,
  82. unsigned long * __restrict v1_in,
  83. const unsigned long * __restrict v2_in,
  84. const unsigned long * __restrict v3_in,
  85. const unsigned long * __restrict v4_in)
  86. {
  87. DEFINE(v1);
  88. DEFINE(v2);
  89. DEFINE(v3);
  90. DEFINE(v4);
  91. unsigned long lines = bytes / (sizeof(unative_t)) / 4;
  92. do {
  93. LOAD(v1);
  94. LOAD(v2);
  95. LOAD(v3);
  96. LOAD(v4);
  97. XOR(v1, v2);
  98. XOR(v3, v4);
  99. XOR(v1, v3);
  100. STORE(v1);
  101. v1 += 4;
  102. v2 += 4;
  103. v3 += 4;
  104. v4 += 4;
  105. } while (--lines > 0);
  106. }
  107. void __xor_altivec_5(unsigned long bytes,
  108. unsigned long * __restrict v1_in,
  109. const unsigned long * __restrict v2_in,
  110. const unsigned long * __restrict v3_in,
  111. const unsigned long * __restrict v4_in,
  112. const unsigned long * __restrict v5_in)
  113. {
  114. DEFINE(v1);
  115. DEFINE(v2);
  116. DEFINE(v3);
  117. DEFINE(v4);
  118. DEFINE(v5);
  119. unsigned long lines = bytes / (sizeof(unative_t)) / 4;
  120. do {
  121. LOAD(v1);
  122. LOAD(v2);
  123. LOAD(v3);
  124. LOAD(v4);
  125. LOAD(v5);
  126. XOR(v1, v2);
  127. XOR(v3, v4);
  128. XOR(v1, v5);
  129. XOR(v1, v3);
  130. STORE(v1);
  131. v1 += 4;
  132. v2 += 4;
  133. v3 += 4;
  134. v4 += 4;
  135. v5 += 4;
  136. } while (--lines > 0);
  137. }