Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.10:

  API:
   - add skcipher walk interface
   - add asynchronous compression (acomp) interface
   - fix algif_aed AIO handling of zero buffer

  Algorithms:
   - fix unaligned access in poly1305
   - fix DRBG output to large buffers

  Drivers:
   - add support for iMX6UL to caam
   - fix givenc descriptors (used by IPsec) in caam
   - accelerated SHA256/SHA512 for ARM64 from OpenSSL
   - add SSE CRCT10DIF and CRC32 to ARM/ARM64
   - add AEAD support to Chelsio chcr
   - add Armada 8K support to omap-rng"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (148 commits)
  crypto: testmgr - fix overlap in chunked tests again
  crypto: arm/crc32 - accelerated support based on x86 SSE implementation
  crypto: arm64/crc32 - accelerated support based on x86 SSE implementation
  crypto: arm/crct10dif - port x86 SSE implementation to ARM
  crypto: arm64/crct10dif - port x86 SSE implementation to arm64
  crypto: testmgr - add/enhance test cases for CRC-T10DIF
  crypto: testmgr - avoid overlap in chunked tests
  crypto: chcr - checking for IS_ERR() instead of NULL
  crypto: caam - check caam_emi_slow instead of re-lookup platform
  crypto: algif_aead - fix AIO handling of zero buffer
  crypto: aes-ce - Make aes_simd_algs static
  crypto: algif_skcipher - set error code when kcalloc fails
  crypto: caam - make aamalg_desc a proper module
  crypto: caam - pass key buffers with typesafe pointers
  crypto: arm64/aes-ce-ccm - Fix AEAD decryption length
  MAINTAINERS: add crypto headers to crypto entry
  crypt: doc - remove misleading mention of async API
  crypto: doc - fix header file name
  crypto: api - fix comment typo
  crypto: skcipher - Add separate walker for AEAD decryption
  ..
This commit is contained in:
Linus Torvalds
2016-12-14 13:31:29 -08:00
151 changed files with 15768 additions and 4519 deletions

View File

@@ -88,9 +88,9 @@ config CRYPTO_AES_ARM
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM
select CRYPTO_ABLK_HELPER
select CRYPTO_BLKCIPHER
select CRYPTO_SIMD
help
Use a faster and more secure NEON based implementation of AES in CBC,
CTR and XTS modes
@@ -104,8 +104,8 @@ config CRYPTO_AES_ARM_BS
config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_ABLK_HELPER
select CRYPTO_BLKCIPHER
select CRYPTO_SIMD
help
Use an implementation of AES in CBC, CTR and XTS modes that uses
ARMv8 Crypto Extensions
@@ -120,4 +120,14 @@ config CRYPTO_GHASH_ARM_CE
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
that is part of the ARMv8 Crypto Extensions
config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
depends on KERNEL_MODE_NEON && CRC_T10DIF
select CRYPTO_HASH
config CRYPTO_CRC32_ARM_CE
tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
depends on KERNEL_MODE_NEON && CRC32
select CRYPTO_HASH
endif

View File

@@ -13,6 +13,8 @@ ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
ifneq ($(ce-obj-y)$(ce-obj-m),)
ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
@@ -36,6 +38,8 @@ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)

View File

@@ -12,8 +12,8 @@
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
#ifndef CONFIG_CPU_BIG_ENDIAN
rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
rko[0] = rko[0] ^ rki[0] ^ rcon[i];
#else
rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
#endif
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
@@ -128,17 +133,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
return 0;
}
static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
ret = ce_aes_expandkey(ctx, in_key, key_len);
if (!ret)
return 0;
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -147,13 +152,13 @@ struct crypto_aes_xts_ctx {
struct crypto_aes_ctx __aligned(8) key2;
};
static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
ret = xts_check_key(tfm, in_key, key_len);
ret = xts_verify_key(tfm, in_key, key_len);
if (ret)
return ret;
@@ -164,130 +169,113 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
if (!ret)
return 0;
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int ecb_encrypt(struct skcipher_request *req)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int ecb_decrypt(struct skcipher_request *req)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks,
walk.iv);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err, blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
nbytes -= blocks * AES_BLOCK_SIZE;
if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
break;
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes % AES_BLOCK_SIZE) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
if (walk.nbytes) {
u8 __aligned(8) tail[AES_BLOCK_SIZE];
unsigned int nbytes = walk.nbytes;
u8 *tdst = walk.dst.virt.addr;
u8 *tsrc = walk.src.virt.addr;
/*
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
@@ -298,231 +286,172 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
num_rounds(ctx), blocks, walk.iv);
memcpy(tdst, tail, nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
return err;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
struct blkcipher_walk walk;
struct skcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_enc, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
struct blkcipher_walk walk;
struct skcipher_walk walk;
unsigned int blocks;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
err = skcipher_walk_virt(&walk, req, true);
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_dec, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-ce",
.cra_driver_name = "__driver-ecb-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = 0,
.setkey = ce_aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
static struct skcipher_alg aes_algs[] = { {
.base = {
.cra_name = "__ecb(aes)",
.cra_driver_name = "__ecb-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.cra_name = "__cbc-aes-ce",
.cra_driver_name = "__driver-cbc-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
.base = {
.cra_name = "__cbc(aes)",
.cra_driver_name = "__cbc-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.cra_name = "__ctr-aes-ce",
.cra_driver_name = "__driver-ctr-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
.base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.cra_name = "__xts-aes-ce",
.cra_driver_name = "__driver-xts-aes-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
.base = {
.cra_name = "__xts(aes)",
.cra_driver_name = "__xts-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
}, {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = 0,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
simd_skcipher_free(aes_simd_algs[i]);
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
static int __init aes_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!(elf_hwcap2 & HWCAP2_AES))
return -ENODEV;
return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
}
static void __exit aes_exit(void)
{
crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
if (err)
return err;
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aes_simd_algs[i] = simd;
}
return 0;
unregister_simds:
aes_exit();
return err;
}
module_init(aes_init);

View File

@@ -10,8 +10,9 @@
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cbc.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -55,14 +56,14 @@ struct aesbs_xts_ctx {
struct AES_KEY twkey;
};
static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 8;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->dec.rk = ctx->enc;
@@ -71,33 +72,33 @@ static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 8;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->enc.converted = 0;
return 0;
}
static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int bits = key_len * 4;
int err;
err = xts_check_key(tfm, in_key, key_len);
err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
ctx->dec.rk = ctx->enc.rk;
@@ -107,88 +108,52 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
const u8 *src, u8 *dst)
{
struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while (walk.nbytes) {
u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *src = walk.src.virt.addr;
if (walk.dst.virt.addr == walk.src.virt.addr) {
u8 *iv = walk.iv;
do {
crypto_xor(src, iv, AES_BLOCK_SIZE);
AES_encrypt(src, src, &ctx->enc);
iv = src;
src += AES_BLOCK_SIZE;
} while (--blocks);
memcpy(walk.iv, iv, AES_BLOCK_SIZE);
} else {
u8 *dst = walk.dst.virt.addr;
do {
crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
AES_encrypt(walk.iv, dst, &ctx->enc);
memcpy(walk.iv, dst, AES_BLOCK_SIZE);
src += AES_BLOCK_SIZE;
dst += AES_BLOCK_SIZE;
} while (--blocks);
}
err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
AES_encrypt(src, dst, &ctx->enc);
}
static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int aesbs_cbc_encrypt(struct skcipher_request *req)
{
struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
}
static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
const u8 *src, u8 *dst)
{
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
AES_decrypt(src, dst, &ctx->dec.rk);
}
static int aesbs_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
kernel_neon_begin();
bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
walk.nbytes, &ctx->dec, walk.iv);
kernel_neon_end();
err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
}
while (walk.nbytes) {
u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
for (err = skcipher_walk_virt(&walk, req, false);
(nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
u32 blocks = nbytes / AES_BLOCK_SIZE;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
u8 bk[2][AES_BLOCK_SIZE];
u8 *iv = walk.iv;
do {
if (walk.dst.virt.addr == walk.src.virt.addr)
memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
if (blocks >= 8) {
kernel_neon_begin();
bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
kernel_neon_end();
nbytes %= AES_BLOCK_SIZE;
continue;
}
AES_decrypt(src, dst, &ctx->dec.rk);
crypto_xor(dst, iv, AES_BLOCK_SIZE);
if (walk.dst.virt.addr == walk.src.virt.addr)
iv = bk[blocks & 1];
else
iv = src;
dst += AES_BLOCK_SIZE;
src += AES_BLOCK_SIZE;
} while (--blocks);
err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
aesbs_decrypt_one);
}
return err;
}
@@ -206,17 +171,15 @@ static void inc_be128_ctr(__be32 ctr[], u32 addend)
}
}
static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
static int aesbs_ctr_encrypt(struct skcipher_request *req)
{
struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
u32 blocks;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
@@ -235,11 +198,7 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
kernel_neon_end();
inc_be128_ctr(ctr, blocks);
nbytes -= blocks * AES_BLOCK_SIZE;
if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
break;
err = blkcipher_walk_done(desc, &walk, tail);
err = skcipher_walk_done(&walk, tail);
}
if (walk.nbytes) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -248,23 +207,21 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
AES_encrypt(walk.iv, ks, &ctx->enc.rk);
if (tdst != tsrc)
memcpy(tdst, tsrc, nbytes);
crypto_xor(tdst, ks, nbytes);
err = blkcipher_walk_done(desc, &walk, 0);
memcpy(tdst, tsrc, walk.nbytes);
crypto_xor(tdst, ks, walk.nbytes);
err = skcipher_walk_done(&walk, 0);
}
return err;
}
static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int aesbs_xts_encrypt(struct skcipher_request *req)
{
struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, false);
/* generate the initial tweak */
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -274,21 +231,19 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
walk.nbytes, &ctx->enc, walk.iv);
kernel_neon_end();
err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
static int aesbs_xts_decrypt(struct skcipher_request *req)
{
struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
err = skcipher_walk_virt(&walk, req, false);
/* generate the initial tweak */
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -298,141 +253,110 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
walk.nbytes, &ctx->dec, walk.iv);
kernel_neon_end();
err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__cbc-aes-neonbs",
.cra_driver_name = "__driver-cbc-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_cbc_set_key,
.encrypt = aesbs_cbc_encrypt,
.decrypt = aesbs_cbc_decrypt,
static struct skcipher_alg aesbs_algs[] = { {
.base = {
.cra_name = "__cbc(aes)",
.cra_driver_name = "__cbc-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_cbc_set_key,
.encrypt = aesbs_cbc_encrypt,
.decrypt = aesbs_cbc_decrypt,
}, {
.cra_name = "__ctr-aes-neonbs",
.cra_driver_name = "__driver-ctr-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_ctr_set_key,
.encrypt = aesbs_ctr_encrypt,
.decrypt = aesbs_ctr_encrypt,
.base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = aesbs_ctr_set_key,
.encrypt = aesbs_ctr_encrypt,
.decrypt = aesbs_ctr_encrypt,
}, {
.cra_name = "__xts-aes-neonbs",
.cra_driver_name = "__driver-xts-aes-neonbs",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_blkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_xts_set_key,
.encrypt = aesbs_xts_encrypt,
.decrypt = aesbs_xts_decrypt,
.base = {
.cra_name = "__xts(aes)",
.cra_driver_name = "__xts-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
}, {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-neonbs",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_ablkcipher = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
}
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_xts_set_key,
.encrypt = aesbs_xts_encrypt,
.decrypt = aesbs_xts_decrypt,
} };
struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
static void aesbs_mod_exit(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
simd_skcipher_free(aesbs_simd_algs[i]);
crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
}
static int __init aesbs_mod_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
}
err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
if (err)
return err;
static void __exit aesbs_mod_exit(void)
{
crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
algname = aesbs_algs[i].base.cra_name + 2;
drvname = aesbs_algs[i].base.cra_driver_name + 2;
basename = aesbs_algs[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aesbs_simd_algs[i] = simd;
}
return 0;
unregister_simds:
aesbs_mod_exit();
return err;
}
module_init(aesbs_mod_init);

View File

@@ -0,0 +1,306 @@
/*
* Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
*
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
* calculation.
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
* at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2B: Instruction Set Reference, N-Z
*
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
* Alexander Boyko <Alexander_Boyko@xyratex.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.align 6
.arch armv8-a
.arch_extension crc
.fpu crypto-neon-fp-armv8
.Lcrc32_constants:
/*
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
* #define CONSTANT_R1 0x154442bd4LL
*
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
* #define CONSTANT_R2 0x1c6e41596LL
*/
.quad 0x0000000154442bd4
.quad 0x00000001c6e41596
/*
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
* #define CONSTANT_R3 0x1751997d0LL
*
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
* #define CONSTANT_R4 0x0ccaa009eLL
*/
.quad 0x00000001751997d0
.quad 0x00000000ccaa009e
/*
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
* #define CONSTANT_R5 0x163cd6124LL
*/
.quad 0x0000000163cd6124
.quad 0x00000000FFFFFFFF
/*
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
*
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
* = 0x1F7011641LL
* #define CONSTANT_RU 0x1F7011641LL
*/
.quad 0x00000001DB710641
.quad 0x00000001F7011641
.Lcrc32c_constants:
.quad 0x00000000740eef02
.quad 0x000000009e4addf8
.quad 0x00000000f20c0dfe
.quad 0x000000014cd00bd6
.quad 0x00000000dd45aab8
.quad 0x00000000FFFFFFFF
.quad 0x0000000105ec76f0
.quad 0x00000000dea713f1
dCONSTANTl .req d0
dCONSTANTh .req d1
qCONSTANT .req q0
BUF .req r0
LEN .req r1
CRC .req r2
qzr .req q9
/**
* Calculate crc32
* BUF - buffer
* LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
* CRC - initial crc32
* return %eax crc32
* uint crc32_pmull_le(unsigned char const *buffer,
* size_t len, uint crc32)
*/
ENTRY(crc32_pmull_le)
adr r3, .Lcrc32_constants
b 0f
ENTRY(crc32c_pmull_le)
adr r3, .Lcrc32c_constants
0: bic LEN, LEN, #15
vld1.8 {q1-q2}, [BUF, :128]!
vld1.8 {q3-q4}, [BUF, :128]!
vmov.i8 qzr, #0
vmov.i8 qCONSTANT, #0
vmov dCONSTANTl[0], CRC
veor.8 d2, d2, dCONSTANTl
sub LEN, LEN, #0x40
cmp LEN, #0x40
blt less_64
vld1.64 {qCONSTANT}, [r3]
loop_64: /* 64 bytes Full cache line folding */
sub LEN, LEN, #0x40
vmull.p64 q5, d3, dCONSTANTh
vmull.p64 q6, d5, dCONSTANTh
vmull.p64 q7, d7, dCONSTANTh
vmull.p64 q8, d9, dCONSTANTh
vmull.p64 q1, d2, dCONSTANTl
vmull.p64 q2, d4, dCONSTANTl
vmull.p64 q3, d6, dCONSTANTl
vmull.p64 q4, d8, dCONSTANTl
veor.8 q1, q1, q5
vld1.8 {q5}, [BUF, :128]!
veor.8 q2, q2, q6
vld1.8 {q6}, [BUF, :128]!
veor.8 q3, q3, q7
vld1.8 {q7}, [BUF, :128]!
veor.8 q4, q4, q8
vld1.8 {q8}, [BUF, :128]!
veor.8 q1, q1, q5
veor.8 q2, q2, q6
veor.8 q3, q3, q7
veor.8 q4, q4, q8
cmp LEN, #0x40
bge loop_64
less_64: /* Folding cache line into 128bit */
vldr dCONSTANTl, [r3, #16]
vldr dCONSTANTh, [r3, #24]
vmull.p64 q5, d3, dCONSTANTh
vmull.p64 q1, d2, dCONSTANTl
veor.8 q1, q1, q5
veor.8 q1, q1, q2
vmull.p64 q5, d3, dCONSTANTh
vmull.p64 q1, d2, dCONSTANTl
veor.8 q1, q1, q5
veor.8 q1, q1, q3
vmull.p64 q5, d3, dCONSTANTh
vmull.p64 q1, d2, dCONSTANTl
veor.8 q1, q1, q5
veor.8 q1, q1, q4
teq LEN, #0
beq fold_64
loop_16: /* Folding rest buffer into 128bit */
subs LEN, LEN, #0x10
vld1.8 {q2}, [BUF, :128]!
vmull.p64 q5, d3, dCONSTANTh
vmull.p64 q1, d2, dCONSTANTl
veor.8 q1, q1, q5
veor.8 q1, q1, q2
bne loop_16
fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
vmull.p64 q2, d2, dCONSTANTh
vext.8 q1, q1, qzr, #8
veor.8 q1, q1, q2
/* final 32-bit fold */
vldr dCONSTANTl, [r3, #32]
vldr d6, [r3, #40]
vmov.i8 d7, #0
vext.8 q2, q1, qzr, #4
vand.8 d2, d2, d6
vmull.p64 q1, d2, dCONSTANTl
veor.8 q1, q1, q2
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
vldr dCONSTANTl, [r3, #48]
vldr dCONSTANTh, [r3, #56]
vand.8 q2, q1, q3
vext.8 q2, qzr, q2, #8
vmull.p64 q2, d5, dCONSTANTh
vand.8 q2, q2, q3
vmull.p64 q2, d4, dCONSTANTl
veor.8 q1, q1, q2
vmov r0, s5
bx lr
ENDPROC(crc32_pmull_le)
ENDPROC(crc32c_pmull_le)
.macro __crc32, c
subs ip, r2, #8
bmi .Ltail\c
tst r1, #3
bne .Lunaligned\c
teq ip, #0
.Laligned8\c:
ldrd r2, r3, [r1], #8
ARM_BE8(rev r2, r2 )
ARM_BE8(rev r3, r3 )
crc32\c\()w r0, r0, r2
crc32\c\()w r0, r0, r3
bxeq lr
subs ip, ip, #8
bpl .Laligned8\c
.Ltail\c:
tst ip, #4
beq 2f
ldr r3, [r1], #4
ARM_BE8(rev r3, r3 )
crc32\c\()w r0, r0, r3
2: tst ip, #2
beq 1f
ldrh r3, [r1], #2
ARM_BE8(rev16 r3, r3 )
crc32\c\()h r0, r0, r3
1: tst ip, #1
bxeq lr
ldrb r3, [r1]
crc32\c\()b r0, r0, r3
bx lr
.Lunaligned\c:
tst r1, #1
beq 2f
ldrb r3, [r1], #1
subs r2, r2, #1
crc32\c\()b r0, r0, r3
tst r1, #2
beq 0f
2: ldrh r3, [r1], #2
subs r2, r2, #2
ARM_BE8(rev16 r3, r3 )
crc32\c\()h r0, r0, r3
0: subs ip, r2, #8
bpl .Laligned8\c
b .Ltail\c
.endm
.align 5
ENTRY(crc32_armv8_le)
__crc32
ENDPROC(crc32_armv8_le)
.align 5
ENTRY(crc32c_armv8_le)
__crc32 c
ENDPROC(crc32c_armv8_le)

View File

@@ -0,0 +1,242 @@
/*
* Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
*
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/crc32.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <crypto/internal/hash.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#define PMULL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pmull_le_16 */
#define SCALE_F 16L /* size of NEON register */
asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len);
static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len);
static int crc32_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = 0;
return 0;
}
static int crc32c_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = ~0;
return 0;
}
static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crc = shash_desc_ctx(desc);
*crc = *mctx;
return 0;
}
static int crc32_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
*crc = crc32_armv8_le(*crc, data, length);
return 0;
}
static int crc32c_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
*crc = crc32c_armv8_le(*crc, data, length);
return 0;
}
static int crc32_final(struct shash_desc *desc, u8 *out)
{
u32 *crc = shash_desc_ctx(desc);
put_unaligned_le32(*crc, out);
return 0;
}
static int crc32c_final(struct shash_desc *desc, u8 *out)
{
u32 *crc = shash_desc_ctx(desc);
put_unaligned_le32(~*crc, out);
return 0;
}
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
unsigned int l;
if (may_use_simd()) {
if ((u32)data % SCALE_F) {
l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
*crc = fallback_crc32(*crc, data, l);
data += l;
length -= l;
}
if (length >= PMULL_MIN_LEN) {
l = round_down(length, SCALE_F);
kernel_neon_begin();
*crc = crc32_pmull_le(data, l, *crc);
kernel_neon_end();
data += l;
length -= l;
}
}
if (length > 0)
*crc = fallback_crc32(*crc, data, length);
return 0;
}
static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u32 *crc = shash_desc_ctx(desc);
unsigned int l;
if (may_use_simd()) {
if ((u32)data % SCALE_F) {
l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
*crc = fallback_crc32c(*crc, data, l);
data += l;
length -= l;
}
if (length >= PMULL_MIN_LEN) {
l = round_down(length, SCALE_F);
kernel_neon_begin();
*crc = crc32c_pmull_le(data, l, *crc);
kernel_neon_end();
data += l;
length -= l;
}
}
if (length > 0)
*crc = fallback_crc32c(*crc, data, length);
return 0;
}
static struct shash_alg crc32_pmull_algs[] = { {
.setkey = crc32_setkey,
.init = crc32_init,
.update = crc32_update,
.final = crc32_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
.base.cra_ctxsize = sizeof(u32),
.base.cra_init = crc32_cra_init,
.base.cra_name = "crc32",
.base.cra_driver_name = "crc32-arm-ce",
.base.cra_priority = 200,
.base.cra_blocksize = 1,
.base.cra_module = THIS_MODULE,
}, {
.setkey = crc32_setkey,
.init = crc32_init,
.update = crc32c_update,
.final = crc32c_final,
.descsize = sizeof(u32),
.digestsize = sizeof(u32),
.base.cra_ctxsize = sizeof(u32),
.base.cra_init = crc32c_cra_init,
.base.cra_name = "crc32c",
.base.cra_driver_name = "crc32c-arm-ce",
.base.cra_priority = 200,
.base.cra_blocksize = 1,
.base.cra_module = THIS_MODULE,
} };
static int __init crc32_pmull_mod_init(void)
{
if (elf_hwcap2 & HWCAP2_PMULL) {
crc32_pmull_algs[0].update = crc32_pmull_update;
crc32_pmull_algs[1].update = crc32c_pmull_update;
if (elf_hwcap2 & HWCAP2_CRC32) {
fallback_crc32 = crc32_armv8_le;
fallback_crc32c = crc32c_armv8_le;
} else {
fallback_crc32 = crc32_le;
fallback_crc32c = __crc32c_le;
}
} else if (!(elf_hwcap2 & HWCAP2_CRC32)) {
return -ENODEV;
}
return crypto_register_shashes(crc32_pmull_algs,
ARRAY_SIZE(crc32_pmull_algs));
}
static void __exit crc32_pmull_mod_exit(void)
{
crypto_unregister_shashes(crc32_pmull_algs,
ARRAY_SIZE(crc32_pmull_algs));
}
module_init(crc32_pmull_mod_init);
module_exit(crc32_pmull_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("crc32");
MODULE_ALIAS_CRYPTO("crc32c");

View File

@@ -0,0 +1,427 @@
//
// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
//
// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.
//
//
// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
//
// Copyright (c) 2013, Intel Corporation
//
// Authors:
// Erdinc Ozturk <erdinc.ozturk@intel.com>
// Vinodh Gopal <vinodh.gopal@intel.com>
// James Guilford <james.guilford@intel.com>
// Tim Chen <tim.c.chen@linux.intel.com>
//
// This software is available to you under a choice of one of two
// licenses. You may choose to be licensed under the terms of the GNU
// General Public License (GPL) Version 2, available from the file
// COPYING in the main directory of this source tree, or the
// OpenIB.org BSD license below:
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the
// distribution.
//
// * Neither the name of the Intel Corporation nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
//
// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Function API:
// UINT16 crc_t10dif_pcl(
// UINT16 init_crc, //initial CRC value, 16 bits
// const unsigned char *buf, //buffer pointer to calculate CRC on
// UINT64 len //buffer length in bytes (64-bit data)
// );
//
// Reference paper titled "Fast CRC Computation for Generic
// Polynomials Using PCLMULQDQ Instruction"
// URL: http://www.intel.com/content/dam/www/public/us/en/documents
// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
//
//
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef CONFIG_CPU_ENDIAN_BE8
#define CPU_LE(code...)
#else
#define CPU_LE(code...) code
#endif
.text
.fpu crypto-neon-fp-armv8
arg1_low32 .req r0
arg2 .req r1
arg3 .req r2
qzr .req q13
q0l .req d0
q0h .req d1
q1l .req d2
q1h .req d3
q2l .req d4
q2h .req d5
q3l .req d6
q3h .req d7
q4l .req d8
q4h .req d9
q5l .req d10
q5h .req d11
q6l .req d12
q6h .req d13
q7l .req d14
q7h .req d15
ENTRY(crc_t10dif_pmull)
vmov.i8 qzr, #0 // init zero register
// adjust the 16-bit initial_crc value, scale it to 32 bits
lsl arg1_low32, arg1_low32, #16
// check if smaller than 256
cmp arg3, #256
// for sizes less than 128, we can't fold 64B at a time...
blt _less_than_128
// load the initial crc value
// crc value does not need to be byte-reflected, but it needs
// to be moved to the high part of the register.
// because data will be byte-reflected and will align with
// initial crc at correct place.
vmov s0, arg1_low32 // initial crc
vext.8 q10, qzr, q0, #4
// receive the initial 64B data, xor the initial crc value
vld1.64 {q0-q1}, [arg2, :128]!
vld1.64 {q2-q3}, [arg2, :128]!
vld1.64 {q4-q5}, [arg2, :128]!
vld1.64 {q6-q7}, [arg2, :128]!
CPU_LE( vrev64.8 q0, q0 )
CPU_LE( vrev64.8 q1, q1 )
CPU_LE( vrev64.8 q2, q2 )
CPU_LE( vrev64.8 q3, q3 )
CPU_LE( vrev64.8 q4, q4 )
CPU_LE( vrev64.8 q5, q5 )
CPU_LE( vrev64.8 q6, q6 )
CPU_LE( vrev64.8 q7, q7 )
vswp d0, d1
vswp d2, d3
vswp d4, d5
vswp d6, d7
vswp d8, d9
vswp d10, d11
vswp d12, d13
vswp d14, d15
// XOR the initial_crc value
veor.8 q0, q0, q10
adr ip, rk3
vld1.64 {q10}, [ip, :128] // xmm10 has rk3 and rk4
//
// we subtract 256 instead of 128 to save one instruction from the loop
//
sub arg3, arg3, #256
// at this section of the code, there is 64*x+y (0<=y<64) bytes of
// buffer. The _fold_64_B_loop will fold 64B at a time
// until we have 64+y Bytes of buffer
// fold 64B at a time. This section of the code folds 4 vector
// registers in parallel
_fold_64_B_loop:
.macro fold64, reg1, reg2
vld1.64 {q11-q12}, [arg2, :128]!
vmull.p64 q8, \reg1\()h, d21
vmull.p64 \reg1, \reg1\()l, d20
vmull.p64 q9, \reg2\()h, d21
vmull.p64 \reg2, \reg2\()l, d20
CPU_LE( vrev64.8 q11, q11 )
CPU_LE( vrev64.8 q12, q12 )
vswp d22, d23
vswp d24, d25
veor.8 \reg1, \reg1, q8
veor.8 \reg2, \reg2, q9
veor.8 \reg1, \reg1, q11
veor.8 \reg2, \reg2, q12
.endm
fold64 q0, q1
fold64 q2, q3
fold64 q4, q5
fold64 q6, q7
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
bge _fold_64_B_loop
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
// fold the 8 vector registers to 1 vector register with different
// constants
adr ip, rk9
vld1.64 {q10}, [ip, :128]!
.macro fold16, reg, rk
vmull.p64 q8, \reg\()l, d20
vmull.p64 \reg, \reg\()h, d21
.ifnb \rk
vld1.64 {q10}, [ip, :128]!
.endif
veor.8 q7, q7, q8
veor.8 q7, q7, \reg
.endm
fold16 q0, rk11
fold16 q1, rk13
fold16 q2, rk15
fold16 q3, rk17
fold16 q4, rk19
fold16 q5, rk1
fold16 q6
// instead of 64, we add 48 to the loop counter to save 1 instruction
// from the loop instead of a cmp instruction, we use the negative
// flag with the jl instruction
adds arg3, arg3, #(128-16)
blt _final_reduction_for_128
// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
// and the rest is in memory. We can fold 16 bytes at a time if y>=16
// continue folding 16B at a time
_16B_reduction_loop:
vmull.p64 q8, d14, d20
vmull.p64 q7, d15, d21
veor.8 q7, q7, q8
vld1.64 {q0}, [arg2, :128]!
CPU_LE( vrev64.8 q0, q0 )
vswp d0, d1
veor.8 q7, q7, q0
subs arg3, arg3, #16
// instead of a cmp instruction, we utilize the flags with the
// jge instruction equivalent of: cmp arg3, 16-16
// check if there is any more 16B in the buffer to be able to fold
bge _16B_reduction_loop
// now we have 16+z bytes left to reduce, where 0<= z < 16.
// first, we reduce the data in the xmm7 register
_final_reduction_for_128:
// check if any more data to fold. If not, compute the CRC of
// the final 128 bits
adds arg3, arg3, #16
beq _128_done
// here we are getting data that is less than 16 bytes.
// since we know that there was data before the pointer, we can
// offset the input pointer before the actual point, to receive
// exactly 16 bytes. after that the registers need to be adjusted.
_get_last_two_regs:
add arg2, arg2, arg3
sub arg2, arg2, #16
vld1.64 {q1}, [arg2]
CPU_LE( vrev64.8 q1, q1 )
vswp d2, d3
// get rid of the extra data that was loaded before
// load the shift constant
adr ip, tbl_shf_table + 16
sub ip, ip, arg3
vld1.8 {q0}, [ip]
// shift v2 to the left by arg3 bytes
vtbl.8 d4, {d14-d15}, d0
vtbl.8 d5, {d14-d15}, d1
// shift v7 to the right by 16-arg3 bytes
vmov.i8 q9, #0x80
veor.8 q0, q0, q9
vtbl.8 d18, {d14-d15}, d0
vtbl.8 d19, {d14-d15}, d1
// blend
vshr.s8 q0, q0, #7 // convert to 8-bit mask
vbsl.8 q0, q2, q1
// fold 16 Bytes
vmull.p64 q8, d18, d20
vmull.p64 q7, d19, d21
veor.8 q7, q7, q8
veor.8 q7, q7, q0
_128_done:
// compute crc of a 128-bit value
vldr d20, rk5
vldr d21, rk6 // rk5 and rk6 in xmm10
// 64b fold
vext.8 q0, qzr, q7, #8
vmull.p64 q7, d15, d20
veor.8 q7, q7, q0
// 32b fold
vext.8 q0, q7, qzr, #12
vmov s31, s3
vmull.p64 q0, d0, d21
veor.8 q7, q0, q7
// barrett reduction
_barrett:
vldr d20, rk7
vldr d21, rk8
vmull.p64 q0, d15, d20
vext.8 q0, qzr, q0, #12
vmull.p64 q0, d1, d21
vext.8 q0, qzr, q0, #12
veor.8 q7, q7, q0
vmov r0, s29
_cleanup:
// scale the result back to 16 bits
lsr r0, r0, #16
bx lr
_less_than_128:
teq arg3, #0
beq _cleanup
vmov.i8 q0, #0
vmov s3, arg1_low32 // get the initial crc value
vld1.64 {q7}, [arg2, :128]!
CPU_LE( vrev64.8 q7, q7 )
vswp d14, d15
veor.8 q7, q7, q0
cmp arg3, #16
beq _128_done // exactly 16 left
blt _less_than_16_left
// now if there is, load the constants
vldr d20, rk1
vldr d21, rk2 // rk1 and rk2 in xmm10
// check if there is enough buffer to be able to fold 16B at a time
subs arg3, arg3, #32
addlt arg3, arg3, #16
blt _get_last_two_regs
b _16B_reduction_loop
_less_than_16_left:
// shl r9, 4
adr ip, tbl_shf_table + 16
sub ip, ip, arg3
vld1.8 {q0}, [ip]
vmov.i8 q9, #0x80
veor.8 q0, q0, q9
vtbl.8 d18, {d14-d15}, d0
vtbl.8 d15, {d14-d15}, d1
vmov d14, d18
b _128_done
ENDPROC(crc_t10dif_pmull)
// precomputed constants
// these constants are precomputed from the poly:
// 0x8bb70000 (0x8bb7 scaled to 32 bits)
.align 4
// Q = 0x18BB70000
// rk1 = 2^(32*3) mod Q << 32
// rk2 = 2^(32*5) mod Q << 32
// rk3 = 2^(32*15) mod Q << 32
// rk4 = 2^(32*17) mod Q << 32
// rk5 = 2^(32*3) mod Q << 32
// rk6 = 2^(32*2) mod Q << 32
// rk7 = floor(2^64/Q)
// rk8 = Q
rk3: .quad 0x9d9d000000000000
rk4: .quad 0x7cf5000000000000
rk5: .quad 0x2d56000000000000
rk6: .quad 0x1368000000000000
rk7: .quad 0x00000001f65a57f8
rk8: .quad 0x000000018bb70000
rk9: .quad 0xceae000000000000
rk10: .quad 0xbfd6000000000000
rk11: .quad 0x1e16000000000000
rk12: .quad 0x713c000000000000
rk13: .quad 0xf7f9000000000000
rk14: .quad 0x80a6000000000000
rk15: .quad 0x044c000000000000
rk16: .quad 0xe658000000000000
rk17: .quad 0xad18000000000000
rk18: .quad 0xa497000000000000
rk19: .quad 0x6ee3000000000000
rk20: .quad 0xe7b5000000000000
rk1: .quad 0x2d56000000000000
rk2: .quad 0x06df000000000000
tbl_shf_table:
// use these values for shift constants for the tbl/tbx instruction
// different alignments result in values as shown:
// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
.byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0

View File

@@ -0,0 +1,101 @@
/*
* Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
*
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/crc-t10dif.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <crypto/internal/hash.h>
#include <asm/neon.h>
#include <asm/simd.h>
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u32 len);
static int crct10dif_init(struct shash_desc *desc)
{
u16 *crc = shash_desc_ctx(desc);
*crc = 0;
return 0;
}
static int crct10dif_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u16 *crc = shash_desc_ctx(desc);
unsigned int l;
if (!may_use_simd()) {
*crc = crc_t10dif_generic(*crc, data, length);
} else {
if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
*crc = crc_t10dif_generic(*crc, data, l);
length -= l;
data += l;
}
if (length > 0) {
kernel_neon_begin();
*crc = crc_t10dif_pmull(*crc, data, length);
kernel_neon_end();
}
}
return 0;
}
static int crct10dif_final(struct shash_desc *desc, u8 *out)
{
u16 *crc = shash_desc_ctx(desc);
*(u16 *)out = *crc;
return 0;
}
static struct shash_alg crc_t10dif_alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = crct10dif_init,
.update = crct10dif_update,
.final = crct10dif_final,
.descsize = CRC_T10DIF_DIGEST_SIZE,
.base.cra_name = "crct10dif",
.base.cra_driver_name = "crct10dif-arm-ce",
.base.cra_priority = 200,
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
};
static int __init crc_t10dif_mod_init(void)
{
if (!(elf_hwcap2 & HWCAP2_PMULL))
return -ENODEV;
return crypto_register_shash(&crc_t10dif_alg);
}
static void __exit crc_t10dif_mod_exit(void)
{
crypto_unregister_shash(&crc_t10dif_alg);
}
module_init(crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("crct10dif");