crc32c-intel_glue.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
  4. * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
  5. * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
  6. * http://www.intel.com/products/processor/manuals/
  7. * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
  8. * Volume 2A: Instruction Set Reference, A-M
  9. *
  10. * Copyright (C) 2008 Intel Corporation
  11. * Authors: Austin Zhang <[email protected]>
  12. * Kent Liu <[email protected]>
  13. */
  14. #include <linux/init.h>
  15. #include <linux/module.h>
  16. #include <linux/string.h>
  17. #include <linux/kernel.h>
  18. #include <crypto/internal/hash.h>
  19. #include <crypto/internal/simd.h>
  20. #include <asm/cpufeatures.h>
  21. #include <asm/cpu_device_id.h>
  22. #include <asm/simd.h>
  23. #define CHKSUM_BLOCK_SIZE 1
  24. #define CHKSUM_DIGEST_SIZE 4
  25. #define SCALE_F sizeof(unsigned long)
  26. #ifdef CONFIG_X86_64
  27. #define CRC32_INST "crc32q %1, %q0"
  28. #else
  29. #define CRC32_INST "crc32l %1, %0"
  30. #endif
  31. #ifdef CONFIG_X86_64
  32. /*
  33. * use carryless multiply version of crc32c when buffer
  34. * size is >= 512 to account
  35. * for fpu state save/restore overhead.
  36. */
  37. #define CRC32C_PCL_BREAKEVEN 512
  38. asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
  39. unsigned int crc_init);
  40. #endif /* CONFIG_X86_64 */
  41. static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
  42. {
  43. while (length--) {
  44. asm("crc32b %1, %0"
  45. : "+r" (crc) : "rm" (*data));
  46. data++;
  47. }
  48. return crc;
  49. }
  50. static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
  51. {
  52. unsigned int iquotient = len / SCALE_F;
  53. unsigned int iremainder = len % SCALE_F;
  54. unsigned long *ptmp = (unsigned long *)p;
  55. while (iquotient--) {
  56. asm(CRC32_INST
  57. : "+r" (crc) : "rm" (*ptmp));
  58. ptmp++;
  59. }
  60. if (iremainder)
  61. crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
  62. iremainder);
  63. return crc;
  64. }
  65. /*
  66. * Setting the seed allows arbitrary accumulators and flexible XOR policy
  67. * If your algorithm starts with ~0, then XOR with ~0 before you set
  68. * the seed.
  69. */
  70. static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
  71. unsigned int keylen)
  72. {
  73. u32 *mctx = crypto_shash_ctx(hash);
  74. if (keylen != sizeof(u32))
  75. return -EINVAL;
  76. *mctx = le32_to_cpup((__le32 *)key);
  77. return 0;
  78. }
  79. static int crc32c_intel_init(struct shash_desc *desc)
  80. {
  81. u32 *mctx = crypto_shash_ctx(desc->tfm);
  82. u32 *crcp = shash_desc_ctx(desc);
  83. *crcp = *mctx;
  84. return 0;
  85. }
  86. static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
  87. unsigned int len)
  88. {
  89. u32 *crcp = shash_desc_ctx(desc);
  90. *crcp = crc32c_intel_le_hw(*crcp, data, len);
  91. return 0;
  92. }
  93. static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
  94. u8 *out)
  95. {
  96. *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
  97. return 0;
  98. }
  99. static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
  100. unsigned int len, u8 *out)
  101. {
  102. return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
  103. }
  104. static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
  105. {
  106. u32 *crcp = shash_desc_ctx(desc);
  107. *(__le32 *)out = ~cpu_to_le32p(crcp);
  108. return 0;
  109. }
  110. static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
  111. unsigned int len, u8 *out)
  112. {
  113. return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
  114. out);
  115. }
  116. static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
  117. {
  118. u32 *key = crypto_tfm_ctx(tfm);
  119. *key = ~0;
  120. return 0;
  121. }
  122. #ifdef CONFIG_X86_64
  123. static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
  124. unsigned int len)
  125. {
  126. u32 *crcp = shash_desc_ctx(desc);
  127. /*
  128. * use faster PCL version if datasize is large enough to
  129. * overcome kernel fpu state save/restore overhead
  130. */
  131. if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
  132. kernel_fpu_begin();
  133. *crcp = crc_pcl(data, len, *crcp);
  134. kernel_fpu_end();
  135. } else
  136. *crcp = crc32c_intel_le_hw(*crcp, data, len);
  137. return 0;
  138. }
  139. static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
  140. u8 *out)
  141. {
  142. if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
  143. kernel_fpu_begin();
  144. *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
  145. kernel_fpu_end();
  146. } else
  147. *(__le32 *)out =
  148. ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
  149. return 0;
  150. }
  151. static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
  152. unsigned int len, u8 *out)
  153. {
  154. return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
  155. }
  156. static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
  157. unsigned int len, u8 *out)
  158. {
  159. return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
  160. out);
  161. }
  162. #endif /* CONFIG_X86_64 */
  163. static struct shash_alg alg = {
  164. .setkey = crc32c_intel_setkey,
  165. .init = crc32c_intel_init,
  166. .update = crc32c_intel_update,
  167. .final = crc32c_intel_final,
  168. .finup = crc32c_intel_finup,
  169. .digest = crc32c_intel_digest,
  170. .descsize = sizeof(u32),
  171. .digestsize = CHKSUM_DIGEST_SIZE,
  172. .base = {
  173. .cra_name = "crc32c",
  174. .cra_driver_name = "crc32c-intel",
  175. .cra_priority = 200,
  176. .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
  177. .cra_blocksize = CHKSUM_BLOCK_SIZE,
  178. .cra_ctxsize = sizeof(u32),
  179. .cra_module = THIS_MODULE,
  180. .cra_init = crc32c_intel_cra_init,
  181. }
  182. };
  183. static const struct x86_cpu_id crc32c_cpu_id[] = {
  184. X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
  185. {}
  186. };
  187. MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
  188. static int __init crc32c_intel_mod_init(void)
  189. {
  190. if (!x86_match_cpu(crc32c_cpu_id))
  191. return -ENODEV;
  192. #ifdef CONFIG_X86_64
  193. if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
  194. alg.update = crc32c_pcl_intel_update;
  195. alg.finup = crc32c_pcl_intel_finup;
  196. alg.digest = crc32c_pcl_intel_digest;
  197. }
  198. #endif
  199. return crypto_register_shash(&alg);
  200. }
  201. static void __exit crc32c_intel_mod_fini(void)
  202. {
  203. crypto_unregister_shash(&alg);
  204. }
  205. module_init(crc32c_intel_mod_init);
  206. module_exit(crc32c_intel_mod_fini);
  207. MODULE_AUTHOR("Austin Zhang <[email protected]>, Kent Liu <[email protected]>");
  208. MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
  209. MODULE_LICENSE("GPL");
  210. MODULE_ALIAS_CRYPTO("crc32c");
  211. MODULE_ALIAS_CRYPTO("crc32c-intel");