poly1305-glue.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
  4. *
  5. * Copyright (C) 2019 Linaro Ltd. <[email protected]>
  6. */
  7. #include <asm/hwcap.h>
  8. #include <asm/neon.h>
  9. #include <asm/simd.h>
  10. #include <asm/unaligned.h>
  11. #include <crypto/algapi.h>
  12. #include <crypto/internal/hash.h>
  13. #include <crypto/internal/poly1305.h>
  14. #include <crypto/internal/simd.h>
  15. #include <linux/cpufeature.h>
  16. #include <linux/crypto.h>
  17. #include <linux/jump_label.h>
  18. #include <linux/module.h>
  19. asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
  20. asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
  21. asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
  22. asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
  23. static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
  24. void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
  25. {
  26. poly1305_init_arm64(&dctx->h, key);
  27. dctx->s[0] = get_unaligned_le32(key + 16);
  28. dctx->s[1] = get_unaligned_le32(key + 20);
  29. dctx->s[2] = get_unaligned_le32(key + 24);
  30. dctx->s[3] = get_unaligned_le32(key + 28);
  31. dctx->buflen = 0;
  32. }
  33. EXPORT_SYMBOL(poly1305_init_arch);
  34. static int neon_poly1305_init(struct shash_desc *desc)
  35. {
  36. struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
  37. dctx->buflen = 0;
  38. dctx->rset = 0;
  39. dctx->sset = false;
  40. return 0;
  41. }
  42. static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
  43. u32 len, u32 hibit, bool do_neon)
  44. {
  45. if (unlikely(!dctx->sset)) {
  46. if (!dctx->rset) {
  47. poly1305_init_arm64(&dctx->h, src);
  48. src += POLY1305_BLOCK_SIZE;
  49. len -= POLY1305_BLOCK_SIZE;
  50. dctx->rset = 1;
  51. }
  52. if (len >= POLY1305_BLOCK_SIZE) {
  53. dctx->s[0] = get_unaligned_le32(src + 0);
  54. dctx->s[1] = get_unaligned_le32(src + 4);
  55. dctx->s[2] = get_unaligned_le32(src + 8);
  56. dctx->s[3] = get_unaligned_le32(src + 12);
  57. src += POLY1305_BLOCK_SIZE;
  58. len -= POLY1305_BLOCK_SIZE;
  59. dctx->sset = true;
  60. }
  61. if (len < POLY1305_BLOCK_SIZE)
  62. return;
  63. }
  64. len &= ~(POLY1305_BLOCK_SIZE - 1);
  65. if (static_branch_likely(&have_neon) && likely(do_neon))
  66. poly1305_blocks_neon(&dctx->h, src, len, hibit);
  67. else
  68. poly1305_blocks(&dctx->h, src, len, hibit);
  69. }
  70. static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
  71. const u8 *src, u32 len, bool do_neon)
  72. {
  73. if (unlikely(dctx->buflen)) {
  74. u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
  75. memcpy(dctx->buf + dctx->buflen, src, bytes);
  76. src += bytes;
  77. len -= bytes;
  78. dctx->buflen += bytes;
  79. if (dctx->buflen == POLY1305_BLOCK_SIZE) {
  80. neon_poly1305_blocks(dctx, dctx->buf,
  81. POLY1305_BLOCK_SIZE, 1, false);
  82. dctx->buflen = 0;
  83. }
  84. }
  85. if (likely(len >= POLY1305_BLOCK_SIZE)) {
  86. neon_poly1305_blocks(dctx, src, len, 1, do_neon);
  87. src += round_down(len, POLY1305_BLOCK_SIZE);
  88. len %= POLY1305_BLOCK_SIZE;
  89. }
  90. if (unlikely(len)) {
  91. dctx->buflen = len;
  92. memcpy(dctx->buf, src, len);
  93. }
  94. }
  95. static int neon_poly1305_update(struct shash_desc *desc,
  96. const u8 *src, unsigned int srclen)
  97. {
  98. bool do_neon = crypto_simd_usable() && srclen > 128;
  99. struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
  100. if (static_branch_likely(&have_neon) && do_neon)
  101. kernel_neon_begin();
  102. neon_poly1305_do_update(dctx, src, srclen, do_neon);
  103. if (static_branch_likely(&have_neon) && do_neon)
  104. kernel_neon_end();
  105. return 0;
  106. }
  107. void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
  108. unsigned int nbytes)
  109. {
  110. if (unlikely(dctx->buflen)) {
  111. u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
  112. memcpy(dctx->buf + dctx->buflen, src, bytes);
  113. src += bytes;
  114. nbytes -= bytes;
  115. dctx->buflen += bytes;
  116. if (dctx->buflen == POLY1305_BLOCK_SIZE) {
  117. poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
  118. dctx->buflen = 0;
  119. }
  120. }
  121. if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
  122. unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
  123. if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
  124. do {
  125. unsigned int todo = min_t(unsigned int, len, SZ_4K);
  126. kernel_neon_begin();
  127. poly1305_blocks_neon(&dctx->h, src, todo, 1);
  128. kernel_neon_end();
  129. len -= todo;
  130. src += todo;
  131. } while (len);
  132. } else {
  133. poly1305_blocks(&dctx->h, src, len, 1);
  134. src += len;
  135. }
  136. nbytes %= POLY1305_BLOCK_SIZE;
  137. }
  138. if (unlikely(nbytes)) {
  139. dctx->buflen = nbytes;
  140. memcpy(dctx->buf, src, nbytes);
  141. }
  142. }
  143. EXPORT_SYMBOL(poly1305_update_arch);
  144. void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
  145. {
  146. if (unlikely(dctx->buflen)) {
  147. dctx->buf[dctx->buflen++] = 1;
  148. memset(dctx->buf + dctx->buflen, 0,
  149. POLY1305_BLOCK_SIZE - dctx->buflen);
  150. poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
  151. }
  152. poly1305_emit(&dctx->h, dst, dctx->s);
  153. memzero_explicit(dctx, sizeof(*dctx));
  154. }
  155. EXPORT_SYMBOL(poly1305_final_arch);
  156. static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
  157. {
  158. struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
  159. if (unlikely(!dctx->sset))
  160. return -ENOKEY;
  161. poly1305_final_arch(dctx, dst);
  162. return 0;
  163. }
  164. static struct shash_alg neon_poly1305_alg = {
  165. .init = neon_poly1305_init,
  166. .update = neon_poly1305_update,
  167. .final = neon_poly1305_final,
  168. .digestsize = POLY1305_DIGEST_SIZE,
  169. .descsize = sizeof(struct poly1305_desc_ctx),
  170. .base.cra_name = "poly1305",
  171. .base.cra_driver_name = "poly1305-neon",
  172. .base.cra_priority = 200,
  173. .base.cra_blocksize = POLY1305_BLOCK_SIZE,
  174. .base.cra_module = THIS_MODULE,
  175. };
  176. static int __init neon_poly1305_mod_init(void)
  177. {
  178. if (!cpu_have_named_feature(ASIMD))
  179. return 0;
  180. static_branch_enable(&have_neon);
  181. return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
  182. crypto_register_shash(&neon_poly1305_alg) : 0;
  183. }
  184. static void __exit neon_poly1305_mod_exit(void)
  185. {
  186. if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
  187. crypto_unregister_shash(&neon_poly1305_alg);
  188. }
  189. module_init(neon_poly1305_mod_init);
  190. module_exit(neon_poly1305_mod_exit);
  191. MODULE_LICENSE("GPL v2");
  192. MODULE_ALIAS_CRYPTO("poly1305");
  193. MODULE_ALIAS_CRYPTO("poly1305-neon");