crypto: aesni-intel - Add AES-NI accelerated CTR mode

To take advantage of the hardware pipeline implementation of AES-NI
instructions. CTR mode cryption is implemented in ASM to schedule
multiple AES-NI instructions one after another. This way, some latency
of AES-NI instruction can be eliminated.

Performance testing based on dm-crypt should 50% reduction of
ecryption/decryption time.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Huang Ying
2010-03-10 18:28:55 +08:00
committed by Herbert Xu
parent 269ab459da
commit 12387a46bb
2 changed files with 238 additions and 7 deletions

View File

@@ -32,6 +32,9 @@
#define IN IN1
#define KEY %xmm2
#define IV %xmm3
#define BSWAP_MASK %xmm10
#define CTR %xmm11
#define INC %xmm12
#define KEYP %rdi
#define OUTP %rsi
@@ -42,6 +45,7 @@
#define T1 %r10
#define TKEYP T1
#define T2 %r11
#define TCTR_LOW T2
_key_expansion_128:
_key_expansion_256a:
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
movups IV, (IVP)
.Lcbc_dec_just_ret:
ret
.align 16
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/*
* _aesni_inc_init: internal ABI
* setup registers used by _aesni_inc
* input:
* IV
* output:
* CTR: == IV, in little endian
* TCTR_LOW: == lower qword of CTR
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
_aesni_inc_init:
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR
mov $1, TCTR_LOW
movq TCTR_LOW, INC
movq CTR, TCTR_LOW
ret
/*
* _aesni_inc: internal ABI
* Increase IV by 1, IV is in big endian
* input:
* IV
* CTR: == IV, in little endian
* TCTR_LOW: == lower qword of CTR
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
* output:
* IV: Increase by 1
* changed:
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
_aesni_inc:
paddq INC, CTR
add $1, TCTR_LOW
jnc .Linc_low
pslldq $8, INC
paddq INC, CTR
psrldq $8, INC
.Linc_low:
movaps CTR, IV
PSHUFB_XMM BSWAP_MASK IV
ret
/*
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len, u8 *iv)
*/
ENTRY(aesni_ctr_enc)
cmp $16, LEN
jb .Lctr_enc_just_ret
mov 480(KEYP), KLEN
movups (IVP), IV
call _aesni_inc_init
cmp $64, LEN
jb .Lctr_enc_loop1
.align 4
.Lctr_enc_loop4:
movaps IV, STATE1
call _aesni_inc
movups (INP), IN1
movaps IV, STATE2
call _aesni_inc
movups 0x10(INP), IN2
movaps IV, STATE3
call _aesni_inc
movups 0x20(INP), IN3
movaps IV, STATE4
call _aesni_inc
movups 0x30(INP), IN4
call _aesni_enc4
pxor IN1, STATE1
movups STATE1, (OUTP)
pxor IN2, STATE2
movups STATE2, 0x10(OUTP)
pxor IN3, STATE3
movups STATE3, 0x20(OUTP)
pxor IN4, STATE4
movups STATE4, 0x30(OUTP)
sub $64, LEN
add $64, INP
add $64, OUTP
cmp $64, LEN
jge .Lctr_enc_loop4
cmp $16, LEN
jb .Lctr_enc_ret
.align 4
.Lctr_enc_loop1:
movaps IV, STATE
call _aesni_inc
movups (INP), IN
call _aesni_enc1
pxor IN, STATE
movups STATE, (OUTP)
sub $16, LEN
add $16, INP
add $16, OUTP
cmp $16, LEN
jge .Lctr_enc_loop1
.Lctr_enc_ret:
movups IV, (IVP)
.Lctr_enc_just_ret:
ret