nx-842.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Cryptographic API for the NX-842 hardware compression.
  4. *
  5. * Copyright (C) IBM Corporation, 2011-2015
  6. *
  7. * Designer of the Power data compression engine:
  8. * Bulent Abali <[email protected]>
  9. *
  10. * Original Authors: Robert Jennings <[email protected]>
  11. * Seth Jennings <[email protected]>
  12. *
  13. * Rewrite: Dan Streetman <[email protected]>
  14. *
  15. * This is an interface to the NX-842 compression hardware in PowerPC
  16. * processors. Most of the complexity of this drvier is due to the fact that
  17. * the NX-842 compression hardware requires the input and output data buffers
  18. * to be specifically aligned, to be a specific multiple in length, and within
  19. * specific minimum and maximum lengths. Those restrictions, provided by the
  20. * nx-842 driver via nx842_constraints, mean this driver must use bounce
  21. * buffers and headers to correct misaligned in or out buffers, and to split
  22. * input buffers that are too large.
  23. *
  24. * This driver will fall back to software decompression if the hardware
  25. * decompression fails, so this driver's decompression should never fail as
  26. * long as the provided compressed buffer is valid. Any compressed buffer
  27. * created by this driver will have a header (except ones where the input
  28. * perfectly matches the constraints); so users of this driver cannot simply
  29. * pass a compressed buffer created by this driver over to the 842 software
  30. * decompression library. Instead, users must use this driver to decompress;
  31. * if the hardware fails or is unavailable, the compressed buffer will be
  32. * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
  33. * software decompression library.
  34. *
  35. * This does not fall back to software compression, however, since the caller
  36. * of this function is specifically requesting hardware compression; if the
  37. * hardware compression fails, the caller can fall back to software
  38. * compression, and the raw 842 compressed buffer that the software compressor
  39. * creates can be passed to this driver for hardware decompression; any
  40. * buffer without our specific header magic is assumed to be a raw 842 buffer
  41. * and passed directly to the hardware. Note that the software compression
  42. * library will produce a compressed buffer that is incompatible with the
  43. * hardware decompressor if the original input buffer length is not a multiple
  44. * of 8; if such a compressed buffer is passed to this driver for
  45. * decompression, the hardware will reject it and this driver will then pass
  46. * it over to the software library for decompression.
  47. */
  48. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  49. #include <linux/vmalloc.h>
  50. #include <linux/sw842.h>
  51. #include <linux/spinlock.h>
  52. #include "nx-842.h"
  53. /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
  54. * template (see lib/842/842.h), so this magic number will never appear at
  55. * the start of a raw 842 compressed buffer. That is important, as any buffer
  56. * passed to us without this magic is assumed to be a raw 842 compressed
  57. * buffer, and passed directly to the hardware to decompress.
  58. */
  59. #define NX842_CRYPTO_MAGIC (0xf842)
  60. #define NX842_CRYPTO_HEADER_SIZE(g) \
  61. (sizeof(struct nx842_crypto_header) + \
  62. sizeof(struct nx842_crypto_header_group) * (g))
  63. #define NX842_CRYPTO_HEADER_MAX_SIZE \
  64. NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
  65. /* bounce buffer size */
  66. #define BOUNCE_BUFFER_ORDER (2)
  67. #define BOUNCE_BUFFER_SIZE \
  68. ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
  69. /* try longer on comp because we can fallback to sw decomp if hw is busy */
  70. #define COMP_BUSY_TIMEOUT (250) /* ms */
  71. #define DECOMP_BUSY_TIMEOUT (50) /* ms */
  72. struct nx842_crypto_param {
  73. u8 *in;
  74. unsigned int iremain;
  75. u8 *out;
  76. unsigned int oremain;
  77. unsigned int ototal;
  78. };
  79. static int update_param(struct nx842_crypto_param *p,
  80. unsigned int slen, unsigned int dlen)
  81. {
  82. if (p->iremain < slen)
  83. return -EOVERFLOW;
  84. if (p->oremain < dlen)
  85. return -ENOSPC;
  86. p->in += slen;
  87. p->iremain -= slen;
  88. p->out += dlen;
  89. p->oremain -= dlen;
  90. p->ototal += dlen;
  91. return 0;
  92. }
  93. int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
  94. {
  95. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  96. spin_lock_init(&ctx->lock);
  97. ctx->driver = driver;
  98. ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
  99. ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
  100. ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
  101. if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
  102. kfree(ctx->wmem);
  103. free_page((unsigned long)ctx->sbounce);
  104. free_page((unsigned long)ctx->dbounce);
  105. return -ENOMEM;
  106. }
  107. return 0;
  108. }
  109. EXPORT_SYMBOL_GPL(nx842_crypto_init);
  110. void nx842_crypto_exit(struct crypto_tfm *tfm)
  111. {
  112. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  113. kfree(ctx->wmem);
  114. free_page((unsigned long)ctx->sbounce);
  115. free_page((unsigned long)ctx->dbounce);
  116. }
  117. EXPORT_SYMBOL_GPL(nx842_crypto_exit);
  118. static void check_constraints(struct nx842_constraints *c)
  119. {
  120. /* limit maximum, to always have enough bounce buffer to decompress */
  121. if (c->maximum > BOUNCE_BUFFER_SIZE)
  122. c->maximum = BOUNCE_BUFFER_SIZE;
  123. }
  124. static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
  125. {
  126. int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
  127. /* compress should have added space for header */
  128. if (s > be16_to_cpu(hdr->group[0].padding)) {
  129. pr_err("Internal error: no space for header\n");
  130. return -EINVAL;
  131. }
  132. memcpy(buf, hdr, s);
  133. print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
  134. return 0;
  135. }
  136. static int compress(struct nx842_crypto_ctx *ctx,
  137. struct nx842_crypto_param *p,
  138. struct nx842_crypto_header_group *g,
  139. struct nx842_constraints *c,
  140. u16 *ignore,
  141. unsigned int hdrsize)
  142. {
  143. unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
  144. unsigned int adj_slen = slen;
  145. u8 *src = p->in, *dst = p->out;
  146. int ret, dskip = 0;
  147. ktime_t timeout;
  148. if (p->iremain == 0)
  149. return -EOVERFLOW;
  150. if (p->oremain == 0 || hdrsize + c->minimum > dlen)
  151. return -ENOSPC;
  152. if (slen % c->multiple)
  153. adj_slen = round_up(slen, c->multiple);
  154. if (slen < c->minimum)
  155. adj_slen = c->minimum;
  156. if (slen > c->maximum)
  157. adj_slen = slen = c->maximum;
  158. if (adj_slen > slen || (u64)src % c->alignment) {
  159. adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
  160. slen = min(slen, BOUNCE_BUFFER_SIZE);
  161. if (adj_slen > slen)
  162. memset(ctx->sbounce + slen, 0, adj_slen - slen);
  163. memcpy(ctx->sbounce, src, slen);
  164. src = ctx->sbounce;
  165. slen = adj_slen;
  166. pr_debug("using comp sbounce buffer, len %x\n", slen);
  167. }
  168. dst += hdrsize;
  169. dlen -= hdrsize;
  170. if ((u64)dst % c->alignment) {
  171. dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
  172. dst += dskip;
  173. dlen -= dskip;
  174. }
  175. if (dlen % c->multiple)
  176. dlen = round_down(dlen, c->multiple);
  177. if (dlen < c->minimum) {
  178. nospc:
  179. dst = ctx->dbounce;
  180. dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
  181. dlen = round_down(dlen, c->multiple);
  182. dskip = 0;
  183. pr_debug("using comp dbounce buffer, len %x\n", dlen);
  184. }
  185. if (dlen > c->maximum)
  186. dlen = c->maximum;
  187. tmplen = dlen;
  188. timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
  189. do {
  190. dlen = tmplen; /* reset dlen, if we're retrying */
  191. ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
  192. /* possibly we should reduce the slen here, instead of
  193. * retrying with the dbounce buffer?
  194. */
  195. if (ret == -ENOSPC && dst != ctx->dbounce)
  196. goto nospc;
  197. } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
  198. if (ret)
  199. return ret;
  200. dskip += hdrsize;
  201. if (dst == ctx->dbounce)
  202. memcpy(p->out + dskip, dst, dlen);
  203. g->padding = cpu_to_be16(dskip);
  204. g->compressed_length = cpu_to_be32(dlen);
  205. g->uncompressed_length = cpu_to_be32(slen);
  206. if (p->iremain < slen) {
  207. *ignore = slen - p->iremain;
  208. slen = p->iremain;
  209. }
  210. pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
  211. slen, *ignore, dlen, dskip);
  212. return update_param(p, slen, dskip + dlen);
  213. }
  214. int nx842_crypto_compress(struct crypto_tfm *tfm,
  215. const u8 *src, unsigned int slen,
  216. u8 *dst, unsigned int *dlen)
  217. {
  218. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  219. struct nx842_crypto_header *hdr = &ctx->header;
  220. struct nx842_crypto_param p;
  221. struct nx842_constraints c = *ctx->driver->constraints;
  222. unsigned int groups, hdrsize, h;
  223. int ret, n;
  224. bool add_header;
  225. u16 ignore = 0;
  226. check_constraints(&c);
  227. p.in = (u8 *)src;
  228. p.iremain = slen;
  229. p.out = dst;
  230. p.oremain = *dlen;
  231. p.ototal = 0;
  232. *dlen = 0;
  233. groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
  234. DIV_ROUND_UP(p.iremain, c.maximum));
  235. hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
  236. spin_lock_bh(&ctx->lock);
  237. /* skip adding header if the buffers meet all constraints */
  238. add_header = (p.iremain % c.multiple ||
  239. p.iremain < c.minimum ||
  240. p.iremain > c.maximum ||
  241. (u64)p.in % c.alignment ||
  242. p.oremain % c.multiple ||
  243. p.oremain < c.minimum ||
  244. p.oremain > c.maximum ||
  245. (u64)p.out % c.alignment);
  246. hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
  247. hdr->groups = 0;
  248. hdr->ignore = 0;
  249. while (p.iremain > 0) {
  250. n = hdr->groups++;
  251. ret = -ENOSPC;
  252. if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
  253. goto unlock;
  254. /* header goes before first group */
  255. h = !n && add_header ? hdrsize : 0;
  256. if (ignore)
  257. pr_warn("internal error, ignore is set %x\n", ignore);
  258. ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
  259. if (ret)
  260. goto unlock;
  261. }
  262. if (!add_header && hdr->groups > 1) {
  263. pr_err("Internal error: No header but multiple groups\n");
  264. ret = -EINVAL;
  265. goto unlock;
  266. }
  267. /* ignore indicates the input stream needed to be padded */
  268. hdr->ignore = cpu_to_be16(ignore);
  269. if (ignore)
  270. pr_debug("marked %d bytes as ignore\n", ignore);
  271. if (add_header)
  272. ret = nx842_crypto_add_header(hdr, dst);
  273. if (ret)
  274. goto unlock;
  275. *dlen = p.ototal;
  276. pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
  277. unlock:
  278. spin_unlock_bh(&ctx->lock);
  279. return ret;
  280. }
  281. EXPORT_SYMBOL_GPL(nx842_crypto_compress);
  282. static int decompress(struct nx842_crypto_ctx *ctx,
  283. struct nx842_crypto_param *p,
  284. struct nx842_crypto_header_group *g,
  285. struct nx842_constraints *c,
  286. u16 ignore)
  287. {
  288. unsigned int slen = be32_to_cpu(g->compressed_length);
  289. unsigned int required_len = be32_to_cpu(g->uncompressed_length);
  290. unsigned int dlen = p->oremain, tmplen;
  291. unsigned int adj_slen = slen;
  292. u8 *src = p->in, *dst = p->out;
  293. u16 padding = be16_to_cpu(g->padding);
  294. int ret, spadding = 0;
  295. ktime_t timeout;
  296. if (!slen || !required_len)
  297. return -EINVAL;
  298. if (p->iremain <= 0 || padding + slen > p->iremain)
  299. return -EOVERFLOW;
  300. if (p->oremain <= 0 || required_len - ignore > p->oremain)
  301. return -ENOSPC;
  302. src += padding;
  303. if (slen % c->multiple)
  304. adj_slen = round_up(slen, c->multiple);
  305. if (slen < c->minimum)
  306. adj_slen = c->minimum;
  307. if (slen > c->maximum)
  308. goto usesw;
  309. if (slen < adj_slen || (u64)src % c->alignment) {
  310. /* we can append padding bytes because the 842 format defines
  311. * an "end" template (see lib/842/842_decompress.c) and will
  312. * ignore any bytes following it.
  313. */
  314. if (slen < adj_slen)
  315. memset(ctx->sbounce + slen, 0, adj_slen - slen);
  316. memcpy(ctx->sbounce, src, slen);
  317. src = ctx->sbounce;
  318. spadding = adj_slen - slen;
  319. slen = adj_slen;
  320. pr_debug("using decomp sbounce buffer, len %x\n", slen);
  321. }
  322. if (dlen % c->multiple)
  323. dlen = round_down(dlen, c->multiple);
  324. if (dlen < required_len || (u64)dst % c->alignment) {
  325. dst = ctx->dbounce;
  326. dlen = min(required_len, BOUNCE_BUFFER_SIZE);
  327. pr_debug("using decomp dbounce buffer, len %x\n", dlen);
  328. }
  329. if (dlen < c->minimum)
  330. goto usesw;
  331. if (dlen > c->maximum)
  332. dlen = c->maximum;
  333. tmplen = dlen;
  334. timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
  335. do {
  336. dlen = tmplen; /* reset dlen, if we're retrying */
  337. ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
  338. } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
  339. if (ret) {
  340. usesw:
  341. /* reset everything, sw doesn't have constraints */
  342. src = p->in + padding;
  343. slen = be32_to_cpu(g->compressed_length);
  344. spadding = 0;
  345. dst = p->out;
  346. dlen = p->oremain;
  347. if (dlen < required_len) { /* have ignore bytes */
  348. dst = ctx->dbounce;
  349. dlen = BOUNCE_BUFFER_SIZE;
  350. }
  351. pr_info_ratelimited("using software 842 decompression\n");
  352. ret = sw842_decompress(src, slen, dst, &dlen);
  353. }
  354. if (ret)
  355. return ret;
  356. slen -= spadding;
  357. dlen -= ignore;
  358. if (ignore)
  359. pr_debug("ignoring last %x bytes\n", ignore);
  360. if (dst == ctx->dbounce)
  361. memcpy(p->out, dst, dlen);
  362. pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
  363. slen, padding, dlen, ignore);
  364. return update_param(p, slen + padding, dlen);
  365. }
  366. int nx842_crypto_decompress(struct crypto_tfm *tfm,
  367. const u8 *src, unsigned int slen,
  368. u8 *dst, unsigned int *dlen)
  369. {
  370. struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
  371. struct nx842_crypto_header *hdr;
  372. struct nx842_crypto_param p;
  373. struct nx842_constraints c = *ctx->driver->constraints;
  374. int n, ret, hdr_len;
  375. u16 ignore = 0;
  376. check_constraints(&c);
  377. p.in = (u8 *)src;
  378. p.iremain = slen;
  379. p.out = dst;
  380. p.oremain = *dlen;
  381. p.ototal = 0;
  382. *dlen = 0;
  383. hdr = (struct nx842_crypto_header *)src;
  384. spin_lock_bh(&ctx->lock);
  385. /* If it doesn't start with our header magic number, assume it's a raw
  386. * 842 compressed buffer and pass it directly to the hardware driver
  387. */
  388. if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
  389. struct nx842_crypto_header_group g = {
  390. .padding = 0,
  391. .compressed_length = cpu_to_be32(p.iremain),
  392. .uncompressed_length = cpu_to_be32(p.oremain),
  393. };
  394. ret = decompress(ctx, &p, &g, &c, 0);
  395. if (ret)
  396. goto unlock;
  397. goto success;
  398. }
  399. if (!hdr->groups) {
  400. pr_err("header has no groups\n");
  401. ret = -EINVAL;
  402. goto unlock;
  403. }
  404. if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
  405. pr_err("header has too many groups %x, max %x\n",
  406. hdr->groups, NX842_CRYPTO_GROUP_MAX);
  407. ret = -EINVAL;
  408. goto unlock;
  409. }
  410. hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
  411. if (hdr_len > slen) {
  412. ret = -EOVERFLOW;
  413. goto unlock;
  414. }
  415. memcpy(&ctx->header, src, hdr_len);
  416. hdr = &ctx->header;
  417. for (n = 0; n < hdr->groups; n++) {
  418. /* ignore applies to last group */
  419. if (n + 1 == hdr->groups)
  420. ignore = be16_to_cpu(hdr->ignore);
  421. ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
  422. if (ret)
  423. goto unlock;
  424. }
  425. success:
  426. *dlen = p.ototal;
  427. pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
  428. ret = 0;
  429. unlock:
  430. spin_unlock_bh(&ctx->lock);
  431. return ret;
  432. }
  433. EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
  434. MODULE_LICENSE("GPL");
  435. MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
  436. MODULE_AUTHOR("Dan Streetman <[email protected]>");