chacha-glue.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
  4. * including ChaCha20 (RFC7539)
  5. *
  6. * Copyright (C) 2016-2019 Linaro, Ltd. <[email protected]>
  7. * Copyright (C) 2015 Martin Willi
  8. */
  9. #include <crypto/algapi.h>
  10. #include <crypto/internal/chacha.h>
  11. #include <crypto/internal/simd.h>
  12. #include <crypto/internal/skcipher.h>
  13. #include <linux/jump_label.h>
  14. #include <linux/kernel.h>
  15. #include <linux/module.h>
  16. #include <asm/cputype.h>
  17. #include <asm/hwcap.h>
  18. #include <asm/neon.h>
  19. #include <asm/simd.h>
  20. asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
  21. int nrounds);
  22. asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
  23. int nrounds, unsigned int nbytes);
  24. asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
  25. asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
  26. asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
  27. const u32 *state, int nrounds);
  28. static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
  29. static inline bool neon_usable(void)
  30. {
  31. return static_branch_likely(&use_neon) && crypto_simd_usable();
  32. }
  33. static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
  34. unsigned int bytes, int nrounds)
  35. {
  36. u8 buf[CHACHA_BLOCK_SIZE];
  37. while (bytes > CHACHA_BLOCK_SIZE) {
  38. unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
  39. chacha_4block_xor_neon(state, dst, src, nrounds, l);
  40. bytes -= l;
  41. src += l;
  42. dst += l;
  43. state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
  44. }
  45. if (bytes) {
  46. const u8 *s = src;
  47. u8 *d = dst;
  48. if (bytes != CHACHA_BLOCK_SIZE)
  49. s = d = memcpy(buf, src, bytes);
  50. chacha_block_xor_neon(state, d, s, nrounds);
  51. if (d != dst)
  52. memcpy(dst, buf, bytes);
  53. state[12]++;
  54. }
  55. }
  56. void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
  57. {
  58. if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
  59. hchacha_block_arm(state, stream, nrounds);
  60. } else {
  61. kernel_neon_begin();
  62. hchacha_block_neon(state, stream, nrounds);
  63. kernel_neon_end();
  64. }
  65. }
  66. EXPORT_SYMBOL(hchacha_block_arch);
  67. void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
  68. {
  69. chacha_init_generic(state, key, iv);
  70. }
  71. EXPORT_SYMBOL(chacha_init_arch);
  72. void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
  73. int nrounds)
  74. {
  75. if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
  76. bytes <= CHACHA_BLOCK_SIZE) {
  77. chacha_doarm(dst, src, bytes, state, nrounds);
  78. state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
  79. return;
  80. }
  81. do {
  82. unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
  83. kernel_neon_begin();
  84. chacha_doneon(state, dst, src, todo, nrounds);
  85. kernel_neon_end();
  86. bytes -= todo;
  87. src += todo;
  88. dst += todo;
  89. } while (bytes);
  90. }
  91. EXPORT_SYMBOL(chacha_crypt_arch);
  92. static int chacha_stream_xor(struct skcipher_request *req,
  93. const struct chacha_ctx *ctx, const u8 *iv,
  94. bool neon)
  95. {
  96. struct skcipher_walk walk;
  97. u32 state[16];
  98. int err;
  99. err = skcipher_walk_virt(&walk, req, false);
  100. chacha_init_generic(state, ctx->key, iv);
  101. while (walk.nbytes > 0) {
  102. unsigned int nbytes = walk.nbytes;
  103. if (nbytes < walk.total)
  104. nbytes = round_down(nbytes, walk.stride);
  105. if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
  106. chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
  107. nbytes, state, ctx->nrounds);
  108. state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
  109. } else {
  110. kernel_neon_begin();
  111. chacha_doneon(state, walk.dst.virt.addr,
  112. walk.src.virt.addr, nbytes, ctx->nrounds);
  113. kernel_neon_end();
  114. }
  115. err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
  116. }
  117. return err;
  118. }
  119. static int do_chacha(struct skcipher_request *req, bool neon)
  120. {
  121. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  122. struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
  123. return chacha_stream_xor(req, ctx, req->iv, neon);
  124. }
  125. static int chacha_arm(struct skcipher_request *req)
  126. {
  127. return do_chacha(req, false);
  128. }
  129. static int chacha_neon(struct skcipher_request *req)
  130. {
  131. return do_chacha(req, neon_usable());
  132. }
  133. static int do_xchacha(struct skcipher_request *req, bool neon)
  134. {
  135. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  136. struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
  137. struct chacha_ctx subctx;
  138. u32 state[16];
  139. u8 real_iv[16];
  140. chacha_init_generic(state, ctx->key, req->iv);
  141. if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
  142. hchacha_block_arm(state, subctx.key, ctx->nrounds);
  143. } else {
  144. kernel_neon_begin();
  145. hchacha_block_neon(state, subctx.key, ctx->nrounds);
  146. kernel_neon_end();
  147. }
  148. subctx.nrounds = ctx->nrounds;
  149. memcpy(&real_iv[0], req->iv + 24, 8);
  150. memcpy(&real_iv[8], req->iv + 16, 8);
  151. return chacha_stream_xor(req, &subctx, real_iv, neon);
  152. }
  153. static int xchacha_arm(struct skcipher_request *req)
  154. {
  155. return do_xchacha(req, false);
  156. }
  157. static int xchacha_neon(struct skcipher_request *req)
  158. {
  159. return do_xchacha(req, neon_usable());
  160. }
  161. static struct skcipher_alg arm_algs[] = {
  162. {
  163. .base.cra_name = "chacha20",
  164. .base.cra_driver_name = "chacha20-arm",
  165. .base.cra_priority = 200,
  166. .base.cra_blocksize = 1,
  167. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  168. .base.cra_module = THIS_MODULE,
  169. .min_keysize = CHACHA_KEY_SIZE,
  170. .max_keysize = CHACHA_KEY_SIZE,
  171. .ivsize = CHACHA_IV_SIZE,
  172. .chunksize = CHACHA_BLOCK_SIZE,
  173. .setkey = chacha20_setkey,
  174. .encrypt = chacha_arm,
  175. .decrypt = chacha_arm,
  176. }, {
  177. .base.cra_name = "xchacha20",
  178. .base.cra_driver_name = "xchacha20-arm",
  179. .base.cra_priority = 200,
  180. .base.cra_blocksize = 1,
  181. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  182. .base.cra_module = THIS_MODULE,
  183. .min_keysize = CHACHA_KEY_SIZE,
  184. .max_keysize = CHACHA_KEY_SIZE,
  185. .ivsize = XCHACHA_IV_SIZE,
  186. .chunksize = CHACHA_BLOCK_SIZE,
  187. .setkey = chacha20_setkey,
  188. .encrypt = xchacha_arm,
  189. .decrypt = xchacha_arm,
  190. }, {
  191. .base.cra_name = "xchacha12",
  192. .base.cra_driver_name = "xchacha12-arm",
  193. .base.cra_priority = 200,
  194. .base.cra_blocksize = 1,
  195. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  196. .base.cra_module = THIS_MODULE,
  197. .min_keysize = CHACHA_KEY_SIZE,
  198. .max_keysize = CHACHA_KEY_SIZE,
  199. .ivsize = XCHACHA_IV_SIZE,
  200. .chunksize = CHACHA_BLOCK_SIZE,
  201. .setkey = chacha12_setkey,
  202. .encrypt = xchacha_arm,
  203. .decrypt = xchacha_arm,
  204. },
  205. };
  206. static struct skcipher_alg neon_algs[] = {
  207. {
  208. .base.cra_name = "chacha20",
  209. .base.cra_driver_name = "chacha20-neon",
  210. .base.cra_priority = 300,
  211. .base.cra_blocksize = 1,
  212. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  213. .base.cra_module = THIS_MODULE,
  214. .min_keysize = CHACHA_KEY_SIZE,
  215. .max_keysize = CHACHA_KEY_SIZE,
  216. .ivsize = CHACHA_IV_SIZE,
  217. .chunksize = CHACHA_BLOCK_SIZE,
  218. .walksize = 4 * CHACHA_BLOCK_SIZE,
  219. .setkey = chacha20_setkey,
  220. .encrypt = chacha_neon,
  221. .decrypt = chacha_neon,
  222. }, {
  223. .base.cra_name = "xchacha20",
  224. .base.cra_driver_name = "xchacha20-neon",
  225. .base.cra_priority = 300,
  226. .base.cra_blocksize = 1,
  227. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  228. .base.cra_module = THIS_MODULE,
  229. .min_keysize = CHACHA_KEY_SIZE,
  230. .max_keysize = CHACHA_KEY_SIZE,
  231. .ivsize = XCHACHA_IV_SIZE,
  232. .chunksize = CHACHA_BLOCK_SIZE,
  233. .walksize = 4 * CHACHA_BLOCK_SIZE,
  234. .setkey = chacha20_setkey,
  235. .encrypt = xchacha_neon,
  236. .decrypt = xchacha_neon,
  237. }, {
  238. .base.cra_name = "xchacha12",
  239. .base.cra_driver_name = "xchacha12-neon",
  240. .base.cra_priority = 300,
  241. .base.cra_blocksize = 1,
  242. .base.cra_ctxsize = sizeof(struct chacha_ctx),
  243. .base.cra_module = THIS_MODULE,
  244. .min_keysize = CHACHA_KEY_SIZE,
  245. .max_keysize = CHACHA_KEY_SIZE,
  246. .ivsize = XCHACHA_IV_SIZE,
  247. .chunksize = CHACHA_BLOCK_SIZE,
  248. .walksize = 4 * CHACHA_BLOCK_SIZE,
  249. .setkey = chacha12_setkey,
  250. .encrypt = xchacha_neon,
  251. .decrypt = xchacha_neon,
  252. }
  253. };
  254. static int __init chacha_simd_mod_init(void)
  255. {
  256. int err = 0;
  257. if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
  258. err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
  259. if (err)
  260. return err;
  261. }
  262. if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
  263. int i;
  264. switch (read_cpuid_part()) {
  265. case ARM_CPU_PART_CORTEX_A7:
  266. case ARM_CPU_PART_CORTEX_A5:
  267. /*
  268. * The Cortex-A7 and Cortex-A5 do not perform well with
  269. * the NEON implementation but do incredibly with the
  270. * scalar one and use less power.
  271. */
  272. for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
  273. neon_algs[i].base.cra_priority = 0;
  274. break;
  275. default:
  276. static_branch_enable(&use_neon);
  277. }
  278. if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
  279. err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
  280. if (err)
  281. crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
  282. }
  283. }
  284. return err;
  285. }
  286. static void __exit chacha_simd_mod_fini(void)
  287. {
  288. if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
  289. crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
  290. if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
  291. crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
  292. }
  293. }
  294. module_init(chacha_simd_mod_init);
  295. module_exit(chacha_simd_mod_fini);
  296. MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
  297. MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
  298. MODULE_LICENSE("GPL v2");
  299. MODULE_ALIAS_CRYPTO("chacha20");
  300. MODULE_ALIAS_CRYPTO("chacha20-arm");
  301. MODULE_ALIAS_CRYPTO("xchacha20");
  302. MODULE_ALIAS_CRYPTO("xchacha20-arm");
  303. MODULE_ALIAS_CRYPTO("xchacha12");
  304. MODULE_ALIAS_CRYPTO("xchacha12-arm");
  305. #ifdef CONFIG_KERNEL_MODE_NEON
  306. MODULE_ALIAS_CRYPTO("chacha20-neon");
  307. MODULE_ALIAS_CRYPTO("xchacha20-neon");
  308. MODULE_ALIAS_CRYPTO("xchacha12-neon");
  309. #endif