aes-spe-glue.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Glue code for AES implementation for SPE instructions (PPC)
  4. *
  5. * Based on generic implementation. The assembler module takes care
  6. * about the SPE registers so it can run from interrupt context.
  7. *
  8. * Copyright (c) 2015 Markus Stockhausen <[email protected]>
  9. */
  10. #include <crypto/aes.h>
  11. #include <linux/module.h>
  12. #include <linux/init.h>
  13. #include <linux/types.h>
  14. #include <linux/errno.h>
  15. #include <linux/crypto.h>
  16. #include <asm/byteorder.h>
  17. #include <asm/switch_to.h>
  18. #include <crypto/algapi.h>
  19. #include <crypto/internal/skcipher.h>
  20. #include <crypto/xts.h>
  21. #include <crypto/gf128mul.h>
  22. #include <crypto/scatterwalk.h>
  23. /*
  24. * MAX_BYTES defines the number of bytes that are allowed to be processed
  25. * between preempt_disable() and preempt_enable(). e500 cores can issue two
  26. * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
  27. * bit unit (SU2). One of these can be a memory access that is executed via
  28. * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
  29. * 16 byte block or 25 cycles per byte. Thus 768 bytes of input data
  30. * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
  31. * included. Even with the low end model clocked at 667 MHz this equals to a
  32. * critical time window of less than 30us. The value has been chosen to
  33. * process a 512 byte disk block in one or a large 1400 bytes IPsec network
  34. * packet in two runs.
  35. *
  36. */
  37. #define MAX_BYTES 768
  38. struct ppc_aes_ctx {
  39. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  40. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  41. u32 rounds;
  42. };
  43. struct ppc_xts_ctx {
  44. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  45. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  46. u32 key_twk[AES_MAX_KEYLENGTH_U32];
  47. u32 rounds;
  48. };
  49. extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
  50. extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
  51. extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  52. u32 bytes);
  53. extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  54. u32 bytes);
  55. extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  56. u32 bytes, u8 *iv);
  57. extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  58. u32 bytes, u8 *iv);
  59. extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  60. u32 bytes, u8 *iv);
  61. extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  62. u32 bytes, u8 *iv, u32 *key_twk);
  63. extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  64. u32 bytes, u8 *iv, u32 *key_twk);
  65. extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
  66. extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
  67. extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
  68. extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
  69. unsigned int key_len);
  70. static void spe_begin(void)
  71. {
  72. /* disable preemption and save users SPE registers if required */
  73. preempt_disable();
  74. enable_kernel_spe();
  75. }
  76. static void spe_end(void)
  77. {
  78. disable_kernel_spe();
  79. /* reenable preemption */
  80. preempt_enable();
  81. }
  82. static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
  83. unsigned int key_len)
  84. {
  85. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  86. switch (key_len) {
  87. case AES_KEYSIZE_128:
  88. ctx->rounds = 4;
  89. ppc_expand_key_128(ctx->key_enc, in_key);
  90. break;
  91. case AES_KEYSIZE_192:
  92. ctx->rounds = 5;
  93. ppc_expand_key_192(ctx->key_enc, in_key);
  94. break;
  95. case AES_KEYSIZE_256:
  96. ctx->rounds = 6;
  97. ppc_expand_key_256(ctx->key_enc, in_key);
  98. break;
  99. default:
  100. return -EINVAL;
  101. }
  102. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  103. return 0;
  104. }
  105. static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
  106. const u8 *in_key, unsigned int key_len)
  107. {
  108. return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
  109. }
  110. static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
  111. unsigned int key_len)
  112. {
  113. struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
  114. int err;
  115. err = xts_verify_key(tfm, in_key, key_len);
  116. if (err)
  117. return err;
  118. key_len >>= 1;
  119. switch (key_len) {
  120. case AES_KEYSIZE_128:
  121. ctx->rounds = 4;
  122. ppc_expand_key_128(ctx->key_enc, in_key);
  123. ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
  124. break;
  125. case AES_KEYSIZE_192:
  126. ctx->rounds = 5;
  127. ppc_expand_key_192(ctx->key_enc, in_key);
  128. ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
  129. break;
  130. case AES_KEYSIZE_256:
  131. ctx->rounds = 6;
  132. ppc_expand_key_256(ctx->key_enc, in_key);
  133. ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
  134. break;
  135. default:
  136. return -EINVAL;
  137. }
  138. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  139. return 0;
  140. }
  141. static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  142. {
  143. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  144. spe_begin();
  145. ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
  146. spe_end();
  147. }
  148. static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  149. {
  150. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  151. spe_begin();
  152. ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
  153. spe_end();
  154. }
  155. static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
  156. {
  157. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  158. struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
  159. struct skcipher_walk walk;
  160. unsigned int nbytes;
  161. int err;
  162. err = skcipher_walk_virt(&walk, req, false);
  163. while ((nbytes = walk.nbytes) != 0) {
  164. nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
  165. nbytes = round_down(nbytes, AES_BLOCK_SIZE);
  166. spe_begin();
  167. if (enc)
  168. ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  169. ctx->key_enc, ctx->rounds, nbytes);
  170. else
  171. ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  172. ctx->key_dec, ctx->rounds, nbytes);
  173. spe_end();
  174. err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
  175. }
  176. return err;
  177. }
  178. static int ppc_ecb_encrypt(struct skcipher_request *req)
  179. {
  180. return ppc_ecb_crypt(req, true);
  181. }
  182. static int ppc_ecb_decrypt(struct skcipher_request *req)
  183. {
  184. return ppc_ecb_crypt(req, false);
  185. }
  186. static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
  187. {
  188. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  189. struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
  190. struct skcipher_walk walk;
  191. unsigned int nbytes;
  192. int err;
  193. err = skcipher_walk_virt(&walk, req, false);
  194. while ((nbytes = walk.nbytes) != 0) {
  195. nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
  196. nbytes = round_down(nbytes, AES_BLOCK_SIZE);
  197. spe_begin();
  198. if (enc)
  199. ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  200. ctx->key_enc, ctx->rounds, nbytes,
  201. walk.iv);
  202. else
  203. ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  204. ctx->key_dec, ctx->rounds, nbytes,
  205. walk.iv);
  206. spe_end();
  207. err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
  208. }
  209. return err;
  210. }
  211. static int ppc_cbc_encrypt(struct skcipher_request *req)
  212. {
  213. return ppc_cbc_crypt(req, true);
  214. }
  215. static int ppc_cbc_decrypt(struct skcipher_request *req)
  216. {
  217. return ppc_cbc_crypt(req, false);
  218. }
  219. static int ppc_ctr_crypt(struct skcipher_request *req)
  220. {
  221. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  222. struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
  223. struct skcipher_walk walk;
  224. unsigned int nbytes;
  225. int err;
  226. err = skcipher_walk_virt(&walk, req, false);
  227. while ((nbytes = walk.nbytes) != 0) {
  228. nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
  229. if (nbytes < walk.total)
  230. nbytes = round_down(nbytes, AES_BLOCK_SIZE);
  231. spe_begin();
  232. ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
  233. ctx->key_enc, ctx->rounds, nbytes, walk.iv);
  234. spe_end();
  235. err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
  236. }
  237. return err;
  238. }
  239. static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
  240. {
  241. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  242. struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
  243. struct skcipher_walk walk;
  244. unsigned int nbytes;
  245. int err;
  246. u32 *twk;
  247. err = skcipher_walk_virt(&walk, req, false);
  248. twk = ctx->key_twk;
  249. while ((nbytes = walk.nbytes) != 0) {
  250. nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
  251. nbytes = round_down(nbytes, AES_BLOCK_SIZE);
  252. spe_begin();
  253. if (enc)
  254. ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  255. ctx->key_enc, ctx->rounds, nbytes,
  256. walk.iv, twk);
  257. else
  258. ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  259. ctx->key_dec, ctx->rounds, nbytes,
  260. walk.iv, twk);
  261. spe_end();
  262. twk = NULL;
  263. err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
  264. }
  265. return err;
  266. }
  267. static int ppc_xts_encrypt(struct skcipher_request *req)
  268. {
  269. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  270. struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
  271. int tail = req->cryptlen % AES_BLOCK_SIZE;
  272. int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
  273. struct skcipher_request subreq;
  274. u8 b[2][AES_BLOCK_SIZE];
  275. int err;
  276. if (req->cryptlen < AES_BLOCK_SIZE)
  277. return -EINVAL;
  278. if (tail) {
  279. subreq = *req;
  280. skcipher_request_set_crypt(&subreq, req->src, req->dst,
  281. req->cryptlen - tail, req->iv);
  282. req = &subreq;
  283. }
  284. err = ppc_xts_crypt(req, true);
  285. if (err || !tail)
  286. return err;
  287. scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
  288. memcpy(b[1], b[0], tail);
  289. scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
  290. spe_begin();
  291. ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
  292. req->iv, NULL);
  293. spe_end();
  294. scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
  295. return 0;
  296. }
  297. static int ppc_xts_decrypt(struct skcipher_request *req)
  298. {
  299. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  300. struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
  301. int tail = req->cryptlen % AES_BLOCK_SIZE;
  302. int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
  303. struct skcipher_request subreq;
  304. u8 b[3][AES_BLOCK_SIZE];
  305. le128 twk;
  306. int err;
  307. if (req->cryptlen < AES_BLOCK_SIZE)
  308. return -EINVAL;
  309. if (tail) {
  310. subreq = *req;
  311. skcipher_request_set_crypt(&subreq, req->src, req->dst,
  312. offset, req->iv);
  313. req = &subreq;
  314. }
  315. err = ppc_xts_crypt(req, false);
  316. if (err || !tail)
  317. return err;
  318. scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
  319. spe_begin();
  320. if (!offset)
  321. ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
  322. AES_BLOCK_SIZE);
  323. gf128mul_x_ble(&twk, (le128 *)req->iv);
  324. ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
  325. (u8 *)&twk, NULL);
  326. memcpy(b[0], b[2], tail);
  327. memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
  328. ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
  329. req->iv, NULL);
  330. spe_end();
  331. scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
  332. return 0;
  333. }
  334. /*
  335. * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
  336. * because the e500 platform can handle unaligned reads/writes very efficiently.
  337. * This improves IPsec thoughput by another few percent. Additionally we assume
  338. * that AES context is always aligned to at least 8 bytes because it is created
  339. * with kmalloc() in the crypto infrastructure
  340. */
  341. static struct crypto_alg aes_cipher_alg = {
  342. .cra_name = "aes",
  343. .cra_driver_name = "aes-ppc-spe",
  344. .cra_priority = 300,
  345. .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
  346. .cra_blocksize = AES_BLOCK_SIZE,
  347. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  348. .cra_alignmask = 0,
  349. .cra_module = THIS_MODULE,
  350. .cra_u = {
  351. .cipher = {
  352. .cia_min_keysize = AES_MIN_KEY_SIZE,
  353. .cia_max_keysize = AES_MAX_KEY_SIZE,
  354. .cia_setkey = ppc_aes_setkey,
  355. .cia_encrypt = ppc_aes_encrypt,
  356. .cia_decrypt = ppc_aes_decrypt
  357. }
  358. }
  359. };
  360. static struct skcipher_alg aes_skcipher_algs[] = {
  361. {
  362. .base.cra_name = "ecb(aes)",
  363. .base.cra_driver_name = "ecb-ppc-spe",
  364. .base.cra_priority = 300,
  365. .base.cra_blocksize = AES_BLOCK_SIZE,
  366. .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
  367. .base.cra_module = THIS_MODULE,
  368. .min_keysize = AES_MIN_KEY_SIZE,
  369. .max_keysize = AES_MAX_KEY_SIZE,
  370. .setkey = ppc_aes_setkey_skcipher,
  371. .encrypt = ppc_ecb_encrypt,
  372. .decrypt = ppc_ecb_decrypt,
  373. }, {
  374. .base.cra_name = "cbc(aes)",
  375. .base.cra_driver_name = "cbc-ppc-spe",
  376. .base.cra_priority = 300,
  377. .base.cra_blocksize = AES_BLOCK_SIZE,
  378. .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
  379. .base.cra_module = THIS_MODULE,
  380. .min_keysize = AES_MIN_KEY_SIZE,
  381. .max_keysize = AES_MAX_KEY_SIZE,
  382. .ivsize = AES_BLOCK_SIZE,
  383. .setkey = ppc_aes_setkey_skcipher,
  384. .encrypt = ppc_cbc_encrypt,
  385. .decrypt = ppc_cbc_decrypt,
  386. }, {
  387. .base.cra_name = "ctr(aes)",
  388. .base.cra_driver_name = "ctr-ppc-spe",
  389. .base.cra_priority = 300,
  390. .base.cra_blocksize = 1,
  391. .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
  392. .base.cra_module = THIS_MODULE,
  393. .min_keysize = AES_MIN_KEY_SIZE,
  394. .max_keysize = AES_MAX_KEY_SIZE,
  395. .ivsize = AES_BLOCK_SIZE,
  396. .setkey = ppc_aes_setkey_skcipher,
  397. .encrypt = ppc_ctr_crypt,
  398. .decrypt = ppc_ctr_crypt,
  399. .chunksize = AES_BLOCK_SIZE,
  400. }, {
  401. .base.cra_name = "xts(aes)",
  402. .base.cra_driver_name = "xts-ppc-spe",
  403. .base.cra_priority = 300,
  404. .base.cra_blocksize = AES_BLOCK_SIZE,
  405. .base.cra_ctxsize = sizeof(struct ppc_xts_ctx),
  406. .base.cra_module = THIS_MODULE,
  407. .min_keysize = AES_MIN_KEY_SIZE * 2,
  408. .max_keysize = AES_MAX_KEY_SIZE * 2,
  409. .ivsize = AES_BLOCK_SIZE,
  410. .setkey = ppc_xts_setkey,
  411. .encrypt = ppc_xts_encrypt,
  412. .decrypt = ppc_xts_decrypt,
  413. }
  414. };
  415. static int __init ppc_aes_mod_init(void)
  416. {
  417. int err;
  418. err = crypto_register_alg(&aes_cipher_alg);
  419. if (err)
  420. return err;
  421. err = crypto_register_skciphers(aes_skcipher_algs,
  422. ARRAY_SIZE(aes_skcipher_algs));
  423. if (err)
  424. crypto_unregister_alg(&aes_cipher_alg);
  425. return err;
  426. }
  427. static void __exit ppc_aes_mod_fini(void)
  428. {
  429. crypto_unregister_alg(&aes_cipher_alg);
  430. crypto_unregister_skciphers(aes_skcipher_algs,
  431. ARRAY_SIZE(aes_skcipher_algs));
  432. }
  433. module_init(ppc_aes_mod_init);
  434. module_exit(ppc_aes_mod_fini);
  435. MODULE_LICENSE("GPL");
  436. MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
  437. MODULE_ALIAS_CRYPTO("aes");
  438. MODULE_ALIAS_CRYPTO("ecb(aes)");
  439. MODULE_ALIAS_CRYPTO("cbc(aes)");
  440. MODULE_ALIAS_CRYPTO("ctr(aes)");
  441. MODULE_ALIAS_CRYPTO("xts(aes)");
  442. MODULE_ALIAS_CRYPTO("aes-ppc-spe");