crc32.S 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Accelerated CRC32(C) using AArch64 CRC instructions
  4. *
  5. * Copyright (C) 2016 - 2018 Linaro Ltd <[email protected]>
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/alternative.h>
  9. #include <asm/assembler.h>
  10. .arch armv8-a+crc
  11. .macro byteorder, reg, be
  12. .if \be
  13. CPU_LE( rev \reg, \reg )
  14. .else
  15. CPU_BE( rev \reg, \reg )
  16. .endif
  17. .endm
  18. .macro byteorder16, reg, be
  19. .if \be
  20. CPU_LE( rev16 \reg, \reg )
  21. .else
  22. CPU_BE( rev16 \reg, \reg )
  23. .endif
  24. .endm
  25. .macro bitorder, reg, be
  26. .if \be
  27. rbit \reg, \reg
  28. .endif
  29. .endm
  30. .macro bitorder16, reg, be
  31. .if \be
  32. rbit \reg, \reg
  33. lsr \reg, \reg, #16
  34. .endif
  35. .endm
  36. .macro bitorder8, reg, be
  37. .if \be
  38. rbit \reg, \reg
  39. lsr \reg, \reg, #24
  40. .endif
  41. .endm
  42. .macro __crc32, c, be=0
  43. bitorder w0, \be
  44. cmp x2, #16
  45. b.lt 8f // less than 16 bytes
  46. and x7, x2, #0x1f
  47. and x2, x2, #~0x1f
  48. cbz x7, 32f // multiple of 32 bytes
  49. and x8, x7, #0xf
  50. ldp x3, x4, [x1]
  51. add x8, x8, x1
  52. add x1, x1, x7
  53. ldp x5, x6, [x8]
  54. byteorder x3, \be
  55. byteorder x4, \be
  56. byteorder x5, \be
  57. byteorder x6, \be
  58. bitorder x3, \be
  59. bitorder x4, \be
  60. bitorder x5, \be
  61. bitorder x6, \be
  62. tst x7, #8
  63. crc32\c\()x w8, w0, x3
  64. csel x3, x3, x4, eq
  65. csel w0, w0, w8, eq
  66. tst x7, #4
  67. lsr x4, x3, #32
  68. crc32\c\()w w8, w0, w3
  69. csel x3, x3, x4, eq
  70. csel w0, w0, w8, eq
  71. tst x7, #2
  72. lsr w4, w3, #16
  73. crc32\c\()h w8, w0, w3
  74. csel w3, w3, w4, eq
  75. csel w0, w0, w8, eq
  76. tst x7, #1
  77. crc32\c\()b w8, w0, w3
  78. csel w0, w0, w8, eq
  79. tst x7, #16
  80. crc32\c\()x w8, w0, x5
  81. crc32\c\()x w8, w8, x6
  82. csel w0, w0, w8, eq
  83. cbz x2, 0f
  84. 32: ldp x3, x4, [x1], #32
  85. sub x2, x2, #32
  86. ldp x5, x6, [x1, #-16]
  87. byteorder x3, \be
  88. byteorder x4, \be
  89. byteorder x5, \be
  90. byteorder x6, \be
  91. bitorder x3, \be
  92. bitorder x4, \be
  93. bitorder x5, \be
  94. bitorder x6, \be
  95. crc32\c\()x w0, w0, x3
  96. crc32\c\()x w0, w0, x4
  97. crc32\c\()x w0, w0, x5
  98. crc32\c\()x w0, w0, x6
  99. cbnz x2, 32b
  100. 0: bitorder w0, \be
  101. ret
  102. 8: tbz x2, #3, 4f
  103. ldr x3, [x1], #8
  104. byteorder x3, \be
  105. bitorder x3, \be
  106. crc32\c\()x w0, w0, x3
  107. 4: tbz x2, #2, 2f
  108. ldr w3, [x1], #4
  109. byteorder w3, \be
  110. bitorder w3, \be
  111. crc32\c\()w w0, w0, w3
  112. 2: tbz x2, #1, 1f
  113. ldrh w3, [x1], #2
  114. byteorder16 w3, \be
  115. bitorder16 w3, \be
  116. crc32\c\()h w0, w0, w3
  117. 1: tbz x2, #0, 0f
  118. ldrb w3, [x1]
  119. bitorder8 w3, \be
  120. crc32\c\()b w0, w0, w3
  121. 0: bitorder w0, \be
  122. ret
  123. .endm
  124. .align 5
  125. SYM_FUNC_START(crc32_le)
  126. alternative_if_not ARM64_HAS_CRC32
  127. b crc32_le_base
  128. alternative_else_nop_endif
  129. __crc32
  130. SYM_FUNC_END(crc32_le)
  131. .align 5
  132. SYM_FUNC_START(__crc32c_le)
  133. alternative_if_not ARM64_HAS_CRC32
  134. b __crc32c_le_base
  135. alternative_else_nop_endif
  136. __crc32 c
  137. SYM_FUNC_END(__crc32c_le)
  138. .align 5
  139. SYM_FUNC_START(crc32_be)
  140. alternative_if_not ARM64_HAS_CRC32
  141. b crc32_be_base
  142. alternative_else_nop_endif
  143. __crc32 be=1
  144. SYM_FUNC_END(crc32_be)