sun4i-ss-hash.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
  4. *
  5. * Copyright (C) 2013-2015 Corentin LABBE <[email protected]>
  6. *
  7. * This file add support for MD5 and SHA1.
  8. *
  9. * You could find the datasheet in Documentation/arm/sunxi.rst
  10. */
  11. #include "sun4i-ss.h"
  12. #include <asm/unaligned.h>
  13. #include <linux/scatterlist.h>
  14. /* This is a totally arbitrary value */
  15. #define SS_TIMEOUT 100
  16. int sun4i_hash_crainit(struct crypto_tfm *tfm)
  17. {
  18. struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  19. struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
  20. struct sun4i_ss_alg_template *algt;
  21. int err;
  22. memset(op, 0, sizeof(struct sun4i_tfm_ctx));
  23. algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  24. op->ss = algt->ss;
  25. err = pm_runtime_resume_and_get(op->ss->dev);
  26. if (err < 0)
  27. return err;
  28. crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
  29. sizeof(struct sun4i_req_ctx));
  30. return 0;
  31. }
  32. void sun4i_hash_craexit(struct crypto_tfm *tfm)
  33. {
  34. struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  35. pm_runtime_put(op->ss->dev);
  36. }
  37. /* sun4i_hash_init: initialize request context */
  38. int sun4i_hash_init(struct ahash_request *areq)
  39. {
  40. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  41. struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  42. struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  43. struct sun4i_ss_alg_template *algt;
  44. memset(op, 0, sizeof(struct sun4i_req_ctx));
  45. algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  46. op->mode = algt->mode;
  47. return 0;
  48. }
  49. int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
  50. {
  51. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  52. struct md5_state *octx = out;
  53. int i;
  54. octx->byte_count = op->byte_count + op->len;
  55. memcpy(octx->block, op->buf, op->len);
  56. if (op->byte_count) {
  57. for (i = 0; i < 4; i++)
  58. octx->hash[i] = op->hash[i];
  59. } else {
  60. octx->hash[0] = SHA1_H0;
  61. octx->hash[1] = SHA1_H1;
  62. octx->hash[2] = SHA1_H2;
  63. octx->hash[3] = SHA1_H3;
  64. }
  65. return 0;
  66. }
  67. int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
  68. {
  69. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  70. const struct md5_state *ictx = in;
  71. int i;
  72. sun4i_hash_init(areq);
  73. op->byte_count = ictx->byte_count & ~0x3F;
  74. op->len = ictx->byte_count & 0x3F;
  75. memcpy(op->buf, ictx->block, op->len);
  76. for (i = 0; i < 4; i++)
  77. op->hash[i] = ictx->hash[i];
  78. return 0;
  79. }
  80. int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
  81. {
  82. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  83. struct sha1_state *octx = out;
  84. int i;
  85. octx->count = op->byte_count + op->len;
  86. memcpy(octx->buffer, op->buf, op->len);
  87. if (op->byte_count) {
  88. for (i = 0; i < 5; i++)
  89. octx->state[i] = op->hash[i];
  90. } else {
  91. octx->state[0] = SHA1_H0;
  92. octx->state[1] = SHA1_H1;
  93. octx->state[2] = SHA1_H2;
  94. octx->state[3] = SHA1_H3;
  95. octx->state[4] = SHA1_H4;
  96. }
  97. return 0;
  98. }
  99. int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
  100. {
  101. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  102. const struct sha1_state *ictx = in;
  103. int i;
  104. sun4i_hash_init(areq);
  105. op->byte_count = ictx->count & ~0x3F;
  106. op->len = ictx->count & 0x3F;
  107. memcpy(op->buf, ictx->buffer, op->len);
  108. for (i = 0; i < 5; i++)
  109. op->hash[i] = ictx->state[i];
  110. return 0;
  111. }
  112. #define SS_HASH_UPDATE 1
  113. #define SS_HASH_FINAL 2
  114. /*
  115. * sun4i_hash_update: update hash engine
  116. *
  117. * Could be used for both SHA1 and MD5
  118. * Write data by step of 32bits and put then in the SS.
  119. *
  120. * Since we cannot leave partial data and hash state in the engine,
  121. * we need to get the hash state at the end of this function.
  122. * We can get the hash state every 64 bytes
  123. *
  124. * So the first work is to get the number of bytes to write to SS modulo 64
  125. * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
  126. *
  127. * So at the begin of update()
  128. * if op->len + areq->nbytes < 64
  129. * => all data will be written to wait buffer (op->buf) and end=0
  130. * if not, write all data from op->buf to the device and position end to
  131. * complete to 64bytes
  132. *
  133. * example 1:
  134. * update1 60o => op->len=60
  135. * update2 60o => need one more word to have 64 bytes
  136. * end=4
  137. * so write all data from op->buf and one word of SGs
  138. * write remaining data in op->buf
  139. * final state op->len=56
  140. */
  141. static int sun4i_hash(struct ahash_request *areq)
  142. {
  143. /*
  144. * i is the total bytes read from SGs, to be compared to areq->nbytes
  145. * i is important because we cannot rely on SG length since the sum of
  146. * SG->length could be greater than areq->nbytes
  147. *
  148. * end is the position when we need to stop writing to the device,
  149. * to be compared to i
  150. *
  151. * in_i: advancement in the current SG
  152. */
  153. unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
  154. unsigned int in_i = 0;
  155. u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0;
  156. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  157. struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  158. struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  159. struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
  160. struct sun4i_ss_ctx *ss = tfmctx->ss;
  161. struct sun4i_ss_alg_template *algt;
  162. struct scatterlist *in_sg = areq->src;
  163. struct sg_mapping_iter mi;
  164. int in_r, err = 0;
  165. size_t copied = 0;
  166. u32 wb = 0;
  167. dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
  168. __func__, crypto_tfm_alg_name(areq->base.tfm),
  169. op->byte_count, areq->nbytes, op->mode,
  170. op->len, op->hash[0]);
  171. if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL))
  172. return 0;
  173. /* protect against overflow */
  174. if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
  175. dev_err(ss->dev, "Cannot process too large request\n");
  176. return -EINVAL;
  177. }
  178. if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) {
  179. /* linearize data to op->buf */
  180. copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
  181. op->buf + op->len, areq->nbytes, 0);
  182. op->len += copied;
  183. return 0;
  184. }
  185. spin_lock_bh(&ss->slock);
  186. /*
  187. * if some data have been processed before,
  188. * we need to restore the partial hash state
  189. */
  190. if (op->byte_count) {
  191. ivmode = SS_IV_ARBITRARY;
  192. for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
  193. writel(op->hash[i], ss->base + SS_IV0 + i * 4);
  194. }
  195. /* Enable the device */
  196. writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
  197. if (!(op->flags & SS_HASH_UPDATE))
  198. goto hash_final;
  199. /* start of handling data */
  200. if (!(op->flags & SS_HASH_FINAL)) {
  201. end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
  202. if (end > areq->nbytes || areq->nbytes - end > 63) {
  203. dev_err(ss->dev, "ERROR: Bound error %u %u\n",
  204. end, areq->nbytes);
  205. err = -EINVAL;
  206. goto release_ss;
  207. }
  208. } else {
  209. /* Since we have the flag final, we can go up to modulo 4 */
  210. if (areq->nbytes < 4)
  211. end = 0;
  212. else
  213. end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
  214. }
  215. /* TODO if SGlen % 4 and !op->len then DMA */
  216. i = 1;
  217. while (in_sg && i == 1) {
  218. if (in_sg->length % 4)
  219. i = 0;
  220. in_sg = sg_next(in_sg);
  221. }
  222. if (i == 1 && !op->len && areq->nbytes)
  223. dev_dbg(ss->dev, "We can DMA\n");
  224. i = 0;
  225. sg_miter_start(&mi, areq->src, sg_nents(areq->src),
  226. SG_MITER_FROM_SG | SG_MITER_ATOMIC);
  227. sg_miter_next(&mi);
  228. in_i = 0;
  229. do {
  230. /*
  231. * we need to linearize in two case:
  232. * - the buffer is already used
  233. * - the SG does not have enough byte remaining ( < 4)
  234. */
  235. if (op->len || (mi.length - in_i) < 4) {
  236. /*
  237. * if we have entered here we have two reason to stop
  238. * - the buffer is full
  239. * - reach the end
  240. */
  241. while (op->len < 64 && i < end) {
  242. /* how many bytes we can read from current SG */
  243. in_r = min(end - i, 64 - op->len);
  244. in_r = min_t(size_t, mi.length - in_i, in_r);
  245. memcpy(op->buf + op->len, mi.addr + in_i, in_r);
  246. op->len += in_r;
  247. i += in_r;
  248. in_i += in_r;
  249. if (in_i == mi.length) {
  250. sg_miter_next(&mi);
  251. in_i = 0;
  252. }
  253. }
  254. if (op->len > 3 && !(op->len % 4)) {
  255. /* write buf to the device */
  256. writesl(ss->base + SS_RXFIFO, op->buf,
  257. op->len / 4);
  258. op->byte_count += op->len;
  259. op->len = 0;
  260. }
  261. }
  262. if (mi.length - in_i > 3 && i < end) {
  263. /* how many bytes we can read from current SG */
  264. in_r = min_t(size_t, mi.length - in_i, areq->nbytes - i);
  265. in_r = min_t(size_t, ((mi.length - in_i) / 4) * 4, in_r);
  266. /* how many bytes we can write in the device*/
  267. todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
  268. writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
  269. op->byte_count += todo * 4;
  270. i += todo * 4;
  271. in_i += todo * 4;
  272. rx_cnt -= todo;
  273. if (!rx_cnt) {
  274. spaces = readl(ss->base + SS_FCSR);
  275. rx_cnt = SS_RXFIFO_SPACES(spaces);
  276. }
  277. if (in_i == mi.length) {
  278. sg_miter_next(&mi);
  279. in_i = 0;
  280. }
  281. }
  282. } while (i < end);
  283. /*
  284. * Now we have written to the device all that we can,
  285. * store the remaining bytes in op->buf
  286. */
  287. if ((areq->nbytes - i) < 64) {
  288. while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
  289. /* how many bytes we can read from current SG */
  290. in_r = min(areq->nbytes - i, 64 - op->len);
  291. in_r = min_t(size_t, mi.length - in_i, in_r);
  292. memcpy(op->buf + op->len, mi.addr + in_i, in_r);
  293. op->len += in_r;
  294. i += in_r;
  295. in_i += in_r;
  296. if (in_i == mi.length) {
  297. sg_miter_next(&mi);
  298. in_i = 0;
  299. }
  300. }
  301. }
  302. sg_miter_stop(&mi);
  303. /*
  304. * End of data process
  305. * Now if we have the flag final go to finalize part
  306. * If not, store the partial hash
  307. */
  308. if (op->flags & SS_HASH_FINAL)
  309. goto hash_final;
  310. writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
  311. i = 0;
  312. do {
  313. v = readl(ss->base + SS_CTL);
  314. i++;
  315. } while (i < SS_TIMEOUT && (v & SS_DATA_END));
  316. if (unlikely(i >= SS_TIMEOUT)) {
  317. dev_err_ratelimited(ss->dev,
  318. "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
  319. i, SS_TIMEOUT, v, areq->nbytes);
  320. err = -EIO;
  321. goto release_ss;
  322. }
  323. /*
  324. * The datasheet isn't very clear about when to retrieve the digest. The
  325. * bit SS_DATA_END is cleared when the engine has processed the data and
  326. * when the digest is computed *but* it doesn't mean the digest is
  327. * available in the digest registers. Hence the delay to be sure we can
  328. * read it.
  329. */
  330. ndelay(1);
  331. for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
  332. op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
  333. goto release_ss;
  334. /*
  335. * hash_final: finalize hashing operation
  336. *
  337. * If we have some remaining bytes, we write them.
  338. * Then ask the SS for finalizing the hashing operation
  339. *
  340. * I do not check RX FIFO size in this function since the size is 32
  341. * after each enabling and this function neither write more than 32 words.
  342. * If we come from the update part, we cannot have more than
  343. * 3 remaining bytes to write and SS is fast enough to not care about it.
  344. */
  345. hash_final:
  346. if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG)) {
  347. algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  348. algt->stat_req++;
  349. }
  350. /* write the remaining words of the wait buffer */
  351. if (op->len) {
  352. nwait = op->len / 4;
  353. if (nwait) {
  354. writesl(ss->base + SS_RXFIFO, op->buf, nwait);
  355. op->byte_count += 4 * nwait;
  356. }
  357. nbw = op->len - 4 * nwait;
  358. if (nbw) {
  359. wb = le32_to_cpup((__le32 *)(op->buf + nwait * 4));
  360. wb &= GENMASK((nbw * 8) - 1, 0);
  361. op->byte_count += nbw;
  362. }
  363. }
  364. /* write the remaining bytes of the nbw buffer */
  365. wb |= ((1 << 7) << (nbw * 8));
  366. ((__le32 *)bf)[j++] = cpu_to_le32(wb);
  367. /*
  368. * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
  369. * I take the operations from other MD5/SHA1 implementations
  370. */
  371. /* last block size */
  372. fill = 64 - (op->byte_count % 64);
  373. min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
  374. /* if we can't fill all data, jump to the next 64 block */
  375. if (fill < min_fill)
  376. fill += 64;
  377. j += (fill - min_fill) / sizeof(u32);
  378. /* write the length of data */
  379. if (op->mode == SS_OP_SHA1) {
  380. __be64 *bits = (__be64 *)&bf[j];
  381. *bits = cpu_to_be64(op->byte_count << 3);
  382. j += 2;
  383. } else {
  384. __le64 *bits = (__le64 *)&bf[j];
  385. *bits = cpu_to_le64(op->byte_count << 3);
  386. j += 2;
  387. }
  388. writesl(ss->base + SS_RXFIFO, bf, j);
  389. /* Tell the SS to stop the hashing */
  390. writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
  391. /*
  392. * Wait for SS to finish the hash.
  393. * The timeout could happen only in case of bad overclocking
  394. * or driver bug.
  395. */
  396. i = 0;
  397. do {
  398. v = readl(ss->base + SS_CTL);
  399. i++;
  400. } while (i < SS_TIMEOUT && (v & SS_DATA_END));
  401. if (unlikely(i >= SS_TIMEOUT)) {
  402. dev_err_ratelimited(ss->dev,
  403. "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
  404. i, SS_TIMEOUT, v, areq->nbytes);
  405. err = -EIO;
  406. goto release_ss;
  407. }
  408. /*
  409. * The datasheet isn't very clear about when to retrieve the digest. The
  410. * bit SS_DATA_END is cleared when the engine has processed the data and
  411. * when the digest is computed *but* it doesn't mean the digest is
  412. * available in the digest registers. Hence the delay to be sure we can
  413. * read it.
  414. */
  415. ndelay(1);
  416. /* Get the hash from the device */
  417. if (op->mode == SS_OP_SHA1) {
  418. for (i = 0; i < 5; i++) {
  419. v = readl(ss->base + SS_MD0 + i * 4);
  420. if (ss->variant->sha1_in_be)
  421. put_unaligned_le32(v, areq->result + i * 4);
  422. else
  423. put_unaligned_be32(v, areq->result + i * 4);
  424. }
  425. } else {
  426. for (i = 0; i < 4; i++) {
  427. v = readl(ss->base + SS_MD0 + i * 4);
  428. put_unaligned_le32(v, areq->result + i * 4);
  429. }
  430. }
  431. release_ss:
  432. writel(0, ss->base + SS_CTL);
  433. spin_unlock_bh(&ss->slock);
  434. return err;
  435. }
  436. int sun4i_hash_final(struct ahash_request *areq)
  437. {
  438. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  439. op->flags = SS_HASH_FINAL;
  440. return sun4i_hash(areq);
  441. }
  442. int sun4i_hash_update(struct ahash_request *areq)
  443. {
  444. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  445. op->flags = SS_HASH_UPDATE;
  446. return sun4i_hash(areq);
  447. }
  448. /* sun4i_hash_finup: finalize hashing operation after an update */
  449. int sun4i_hash_finup(struct ahash_request *areq)
  450. {
  451. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  452. op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
  453. return sun4i_hash(areq);
  454. }
  455. /* combo of init/update/final functions */
  456. int sun4i_hash_digest(struct ahash_request *areq)
  457. {
  458. int err;
  459. struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  460. err = sun4i_hash_init(areq);
  461. if (err)
  462. return err;
  463. op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
  464. return sun4i_hash(areq);
  465. }