Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Add the ability to abort a skcipher walk. Algorithms: - Fix XTS to actually do the stealing. - Add library helpers for AES and DES for single-block users. - Add library helpers for SHA256. - Add new DES key verification helper. - Add surrounding bits for ESSIV generator. - Add accelerations for aegis128. - Add test vectors for lzo-rle. Drivers: - Add i.MX8MQ support to caam. - Add gcm/ccm/cfb/ofb aes support in inside-secure. - Add ofb/cfb aes support in media-tek. - Add HiSilicon ZIP accelerator support. Others: - Fix potential race condition in padata. - Use unbound workqueues in padata" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits) crypto: caam - Cast to long first before pointer conversion crypto: ccree - enable CTS support in AES-XTS crypto: inside-secure - Probe transform record cache RAM sizes crypto: inside-secure - Base RD fetchcount on actual RD FIFO size crypto: inside-secure - Base CD fetchcount on actual CD FIFO size crypto: inside-secure - Enable extended algorithms on newer HW crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL crypto: inside-secure - Add EIP97/EIP197 and endianness detection padata: remove cpu_index from the parallel_queue padata: unbind parallel jobs from specific CPUs padata: use separate workqueues for parallel and serial work padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible crypto: pcrypt - remove padata cpumask notifier padata: make padata_do_parallel find alternate callback CPU workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs workqueue: unconfine alloc/apply/free_workqueue_attrs() padata: allocate workqueue internally arm64: dts: imx8mq: Add CAAM node random: Use wait_event_freezable() in add_hwgenerator_randomness() crypto: ux500 - Fix COMPILE_TEST warnings ...
This commit is contained in:
@@ -82,8 +82,8 @@ config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_AES
|
||||
help
|
||||
Use a faster and more secure NEON based implementation of AES in CBC,
|
||||
CTR and XTS modes
|
||||
|
@@ -44,63 +44,73 @@
|
||||
veor q0, q0, \key3
|
||||
.endm
|
||||
|
||||
.macro enc_dround_3x, key1, key2
|
||||
.macro enc_dround_4x, key1, key2
|
||||
enc_round q0, \key1
|
||||
enc_round q1, \key1
|
||||
enc_round q2, \key1
|
||||
enc_round q3, \key1
|
||||
enc_round q0, \key2
|
||||
enc_round q1, \key2
|
||||
enc_round q2, \key2
|
||||
enc_round q3, \key2
|
||||
.endm
|
||||
|
||||
.macro dec_dround_3x, key1, key2
|
||||
.macro dec_dround_4x, key1, key2
|
||||
dec_round q0, \key1
|
||||
dec_round q1, \key1
|
||||
dec_round q2, \key1
|
||||
dec_round q3, \key1
|
||||
dec_round q0, \key2
|
||||
dec_round q1, \key2
|
||||
dec_round q2, \key2
|
||||
dec_round q3, \key2
|
||||
.endm
|
||||
|
||||
.macro enc_fround_3x, key1, key2, key3
|
||||
.macro enc_fround_4x, key1, key2, key3
|
||||
enc_round q0, \key1
|
||||
enc_round q1, \key1
|
||||
enc_round q2, \key1
|
||||
enc_round q3, \key1
|
||||
aese.8 q0, \key2
|
||||
aese.8 q1, \key2
|
||||
aese.8 q2, \key2
|
||||
aese.8 q3, \key2
|
||||
veor q0, q0, \key3
|
||||
veor q1, q1, \key3
|
||||
veor q2, q2, \key3
|
||||
veor q3, q3, \key3
|
||||
.endm
|
||||
|
||||
.macro dec_fround_3x, key1, key2, key3
|
||||
.macro dec_fround_4x, key1, key2, key3
|
||||
dec_round q0, \key1
|
||||
dec_round q1, \key1
|
||||
dec_round q2, \key1
|
||||
dec_round q3, \key1
|
||||
aesd.8 q0, \key2
|
||||
aesd.8 q1, \key2
|
||||
aesd.8 q2, \key2
|
||||
aesd.8 q3, \key2
|
||||
veor q0, q0, \key3
|
||||
veor q1, q1, \key3
|
||||
veor q2, q2, \key3
|
||||
veor q3, q3, \key3
|
||||
.endm
|
||||
|
||||
.macro do_block, dround, fround
|
||||
cmp r3, #12 @ which key size?
|
||||
vld1.8 {q10-q11}, [ip]!
|
||||
vld1.32 {q10-q11}, [ip]!
|
||||
\dround q8, q9
|
||||
vld1.8 {q12-q13}, [ip]!
|
||||
vld1.32 {q12-q13}, [ip]!
|
||||
\dround q10, q11
|
||||
vld1.8 {q10-q11}, [ip]!
|
||||
vld1.32 {q10-q11}, [ip]!
|
||||
\dround q12, q13
|
||||
vld1.8 {q12-q13}, [ip]!
|
||||
vld1.32 {q12-q13}, [ip]!
|
||||
\dround q10, q11
|
||||
blo 0f @ AES-128: 10 rounds
|
||||
vld1.8 {q10-q11}, [ip]!
|
||||
vld1.32 {q10-q11}, [ip]!
|
||||
\dround q12, q13
|
||||
beq 1f @ AES-192: 12 rounds
|
||||
vld1.8 {q12-q13}, [ip]
|
||||
vld1.32 {q12-q13}, [ip]
|
||||
\dround q10, q11
|
||||
0: \fround q12, q13, q14
|
||||
bx lr
|
||||
@@ -114,8 +124,9 @@
|
||||
* transforms. These should preserve all registers except q0 - q2 and ip
|
||||
* Arguments:
|
||||
* q0 : first in/output block
|
||||
* q1 : second in/output block (_3x version only)
|
||||
* q2 : third in/output block (_3x version only)
|
||||
* q1 : second in/output block (_4x version only)
|
||||
* q2 : third in/output block (_4x version only)
|
||||
* q3 : fourth in/output block (_4x version only)
|
||||
* q8 : first round key
|
||||
* q9 : secound round key
|
||||
* q14 : final round key
|
||||
@@ -136,44 +147,44 @@ aes_decrypt:
|
||||
ENDPROC(aes_decrypt)
|
||||
|
||||
.align 6
|
||||
aes_encrypt_3x:
|
||||
aes_encrypt_4x:
|
||||
add ip, r2, #32 @ 3rd round key
|
||||
do_block enc_dround_3x, enc_fround_3x
|
||||
ENDPROC(aes_encrypt_3x)
|
||||
do_block enc_dround_4x, enc_fround_4x
|
||||
ENDPROC(aes_encrypt_4x)
|
||||
|
||||
.align 6
|
||||
aes_decrypt_3x:
|
||||
aes_decrypt_4x:
|
||||
add ip, r2, #32 @ 3rd round key
|
||||
do_block dec_dround_3x, dec_fround_3x
|
||||
ENDPROC(aes_decrypt_3x)
|
||||
do_block dec_dround_4x, dec_fround_4x
|
||||
ENDPROC(aes_decrypt_4x)
|
||||
|
||||
.macro prepare_key, rk, rounds
|
||||
add ip, \rk, \rounds, lsl #4
|
||||
vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
|
||||
vld1.8 {q14}, [ip] @ load last round key
|
||||
vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
|
||||
vld1.32 {q14}, [ip] @ load last round key
|
||||
.endm
|
||||
|
||||
/*
|
||||
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks)
|
||||
* aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks)
|
||||
*/
|
||||
ENTRY(ce_aes_ecb_encrypt)
|
||||
push {r4, lr}
|
||||
ldr r4, [sp, #8]
|
||||
prepare_key r2, r3
|
||||
.Lecbencloop3x:
|
||||
subs r4, r4, #3
|
||||
.Lecbencloop4x:
|
||||
subs r4, r4, #4
|
||||
bmi .Lecbenc1x
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
vld1.8 {q2-q3}, [r1]!
|
||||
bl aes_encrypt_4x
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbencloop3x
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
b .Lecbencloop4x
|
||||
.Lecbenc1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #4
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
vld1.8 {q0}, [r1]!
|
||||
@@ -189,17 +200,17 @@ ENTRY(ce_aes_ecb_decrypt)
|
||||
push {r4, lr}
|
||||
ldr r4, [sp, #8]
|
||||
prepare_key r2, r3
|
||||
.Lecbdecloop3x:
|
||||
subs r4, r4, #3
|
||||
.Lecbdecloop4x:
|
||||
subs r4, r4, #4
|
||||
bmi .Lecbdec1x
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_decrypt_3x
|
||||
vld1.8 {q2-q3}, [r1]!
|
||||
bl aes_decrypt_4x
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbdecloop3x
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
b .Lecbdecloop4x
|
||||
.Lecbdec1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #4
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
vld1.8 {q0}, [r1]!
|
||||
@@ -212,9 +223,9 @@ ENTRY(ce_aes_ecb_decrypt)
|
||||
ENDPROC(ce_aes_ecb_decrypt)
|
||||
|
||||
/*
|
||||
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
ENTRY(ce_aes_cbc_encrypt)
|
||||
@@ -236,88 +247,181 @@ ENDPROC(ce_aes_cbc_encrypt)
|
||||
ENTRY(ce_aes_cbc_decrypt)
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #16]
|
||||
vld1.8 {q6}, [r5] @ keep iv in q6
|
||||
vld1.8 {q15}, [r5] @ keep iv in q15
|
||||
prepare_key r2, r3
|
||||
.Lcbcdecloop3x:
|
||||
subs r4, r4, #3
|
||||
.Lcbcdecloop4x:
|
||||
subs r4, r4, #4
|
||||
bmi .Lcbcdec1x
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
vmov q3, q0
|
||||
vmov q4, q1
|
||||
vmov q5, q2
|
||||
bl aes_decrypt_3x
|
||||
veor q0, q0, q6
|
||||
veor q1, q1, q3
|
||||
veor q2, q2, q4
|
||||
vmov q6, q5
|
||||
vld1.8 {q2-q3}, [r1]!
|
||||
vmov q4, q0
|
||||
vmov q5, q1
|
||||
vmov q6, q2
|
||||
vmov q7, q3
|
||||
bl aes_decrypt_4x
|
||||
veor q0, q0, q15
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
veor q3, q3, q6
|
||||
vmov q15, q7
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lcbcdecloop3x
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
b .Lcbcdecloop4x
|
||||
.Lcbcdec1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #4
|
||||
beq .Lcbcdecout
|
||||
vmov q15, q14 @ preserve last round key
|
||||
vmov q6, q14 @ preserve last round key
|
||||
.Lcbcdecloop:
|
||||
vld1.8 {q0}, [r1]! @ get next ct block
|
||||
veor q14, q15, q6 @ combine prev ct with last key
|
||||
vmov q6, q0
|
||||
vmov q15, q0
|
||||
bl aes_decrypt
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
vst1.8 {q6}, [r5] @ keep iv in q6
|
||||
vst1.8 {q15}, [r5] @ keep iv in q15
|
||||
pop {r4-r6, pc}
|
||||
ENDPROC(ce_aes_cbc_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
* ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
*/
|
||||
|
||||
ENTRY(ce_aes_cbc_cts_encrypt)
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #16]
|
||||
|
||||
movw ip, :lower16:.Lcts_permute_table
|
||||
movt ip, :upper16:.Lcts_permute_table
|
||||
sub r4, r4, #16
|
||||
add lr, ip, #32
|
||||
add ip, ip, r4
|
||||
sub lr, lr, r4
|
||||
vld1.8 {q5}, [ip]
|
||||
vld1.8 {q6}, [lr]
|
||||
|
||||
add ip, r1, r4
|
||||
vld1.8 {q0}, [r1] @ overlapping loads
|
||||
vld1.8 {q3}, [ip]
|
||||
|
||||
vld1.8 {q1}, [r5] @ get iv
|
||||
prepare_key r2, r3
|
||||
|
||||
veor q0, q0, q1 @ xor with iv
|
||||
bl aes_encrypt
|
||||
|
||||
vtbl.8 d4, {d0-d1}, d10
|
||||
vtbl.8 d5, {d0-d1}, d11
|
||||
vtbl.8 d2, {d6-d7}, d12
|
||||
vtbl.8 d3, {d6-d7}, d13
|
||||
|
||||
veor q0, q0, q1
|
||||
bl aes_encrypt
|
||||
|
||||
add r4, r0, r4
|
||||
vst1.8 {q2}, [r4] @ overlapping stores
|
||||
vst1.8 {q0}, [r0]
|
||||
|
||||
pop {r4-r6, pc}
|
||||
ENDPROC(ce_aes_cbc_cts_encrypt)
|
||||
|
||||
ENTRY(ce_aes_cbc_cts_decrypt)
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #16]
|
||||
|
||||
movw ip, :lower16:.Lcts_permute_table
|
||||
movt ip, :upper16:.Lcts_permute_table
|
||||
sub r4, r4, #16
|
||||
add lr, ip, #32
|
||||
add ip, ip, r4
|
||||
sub lr, lr, r4
|
||||
vld1.8 {q5}, [ip]
|
||||
vld1.8 {q6}, [lr]
|
||||
|
||||
add ip, r1, r4
|
||||
vld1.8 {q0}, [r1] @ overlapping loads
|
||||
vld1.8 {q1}, [ip]
|
||||
|
||||
vld1.8 {q3}, [r5] @ get iv
|
||||
prepare_key r2, r3
|
||||
|
||||
bl aes_decrypt
|
||||
|
||||
vtbl.8 d4, {d0-d1}, d10
|
||||
vtbl.8 d5, {d0-d1}, d11
|
||||
vtbx.8 d0, {d2-d3}, d12
|
||||
vtbx.8 d1, {d2-d3}, d13
|
||||
|
||||
veor q1, q1, q2
|
||||
bl aes_decrypt
|
||||
veor q0, q0, q3 @ xor with iv
|
||||
|
||||
add r4, r0, r4
|
||||
vst1.8 {q1}, [r4] @ overlapping stores
|
||||
vst1.8 {q0}, [r0]
|
||||
|
||||
pop {r4-r6, pc}
|
||||
ENDPROC(ce_aes_cbc_cts_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 ctr[])
|
||||
*/
|
||||
ENTRY(ce_aes_ctr_encrypt)
|
||||
push {r4-r6, lr}
|
||||
ldrd r4, r5, [sp, #16]
|
||||
vld1.8 {q6}, [r5] @ load ctr
|
||||
vld1.8 {q7}, [r5] @ load ctr
|
||||
prepare_key r2, r3
|
||||
vmov r6, s27 @ keep swabbed ctr in r6
|
||||
vmov r6, s31 @ keep swabbed ctr in r6
|
||||
rev r6, r6
|
||||
cmn r6, r4 @ 32 bit overflow?
|
||||
bcs .Lctrloop
|
||||
.Lctrloop3x:
|
||||
subs r4, r4, #3
|
||||
.Lctrloop4x:
|
||||
subs r4, r4, #4
|
||||
bmi .Lctr1x
|
||||
add r6, r6, #1
|
||||
vmov q0, q6
|
||||
vmov q1, q6
|
||||
vmov q0, q7
|
||||
vmov q1, q7
|
||||
rev ip, r6
|
||||
add r6, r6, #1
|
||||
vmov q2, q6
|
||||
vmov q2, q7
|
||||
vmov s7, ip
|
||||
rev ip, r6
|
||||
add r6, r6, #1
|
||||
vmov q3, q7
|
||||
vmov s11, ip
|
||||
vld1.8 {q3-q4}, [r1]!
|
||||
vld1.8 {q5}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
rev ip, r6
|
||||
add r6, r6, #1
|
||||
vmov s15, ip
|
||||
vld1.8 {q4-q5}, [r1]!
|
||||
vld1.8 {q6}, [r1]!
|
||||
vld1.8 {q15}, [r1]!
|
||||
bl aes_encrypt_4x
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
veor q2, q2, q6
|
||||
veor q3, q3, q15
|
||||
rev ip, r6
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov s27, ip
|
||||
b .Lctrloop3x
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
vmov s31, ip
|
||||
b .Lctrloop4x
|
||||
.Lctr1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #4
|
||||
beq .Lctrout
|
||||
.Lctrloop:
|
||||
vmov q0, q6
|
||||
vmov q0, q7
|
||||
bl aes_encrypt
|
||||
|
||||
adds r6, r6, #1 @ increment BE ctr
|
||||
rev ip, r6
|
||||
vmov s27, ip
|
||||
vmov s31, ip
|
||||
bcs .Lctrcarry
|
||||
|
||||
.Lctrcarrydone:
|
||||
@@ -329,7 +433,7 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
bne .Lctrloop
|
||||
|
||||
.Lctrout:
|
||||
vst1.8 {q6}, [r5] @ return next CTR value
|
||||
vst1.8 {q7}, [r5] @ return next CTR value
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lctrtailblock:
|
||||
@@ -337,7 +441,7 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
b .Lctrout
|
||||
|
||||
.Lctrcarry:
|
||||
.irp sreg, s26, s25, s24
|
||||
.irp sreg, s30, s29, s28
|
||||
vmov ip, \sreg @ load next word of ctr
|
||||
rev ip, ip @ ... to handle the carry
|
||||
adds ip, ip, #1
|
||||
@@ -349,10 +453,10 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
ENDPROC(ce_aes_ctr_encrypt)
|
||||
|
||||
/*
|
||||
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int blocks, u8 iv[], u8 const rk2[], int first)
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int blocks, u8 iv[], u8 const rk2[], int first)
|
||||
* aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
|
||||
* int bytes, u8 iv[], u32 const rk2[], int first)
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
|
||||
* int bytes, u8 iv[], u32 const rk2[], int first)
|
||||
*/
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
@@ -363,13 +467,10 @@ ENDPROC(ce_aes_ctr_encrypt)
|
||||
veor \out, \out, \tmp
|
||||
.endm
|
||||
|
||||
.align 3
|
||||
.Lxts_mul_x:
|
||||
.quad 1, 0x87
|
||||
|
||||
ce_aes_xts_init:
|
||||
vldr d14, .Lxts_mul_x
|
||||
vldr d15, .Lxts_mul_x + 8
|
||||
vmov.i32 d30, #0x87 @ compose tweak mask vector
|
||||
vmovl.u32 q15, d30
|
||||
vshr.u64 d30, d31, #7
|
||||
|
||||
ldrd r4, r5, [sp, #16] @ load args
|
||||
ldr r6, [sp, #28]
|
||||
@@ -390,49 +491,86 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
|
||||
bl ce_aes_xts_init @ run shared prologue
|
||||
prepare_key r2, r3
|
||||
vmov q3, q0
|
||||
vmov q4, q0
|
||||
|
||||
teq r6, #0 @ start of a block?
|
||||
bne .Lxtsenc3x
|
||||
bne .Lxtsenc4x
|
||||
|
||||
.Lxtsencloop3x:
|
||||
next_tweak q3, q3, q7, q6
|
||||
.Lxtsenc3x:
|
||||
subs r4, r4, #3
|
||||
.Lxtsencloop4x:
|
||||
next_tweak q4, q4, q15, q10
|
||||
.Lxtsenc4x:
|
||||
subs r4, r4, #64
|
||||
bmi .Lxtsenc1x
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
bl aes_encrypt_3x
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
|
||||
vld1.8 {q2-q3}, [r1]!
|
||||
next_tweak q5, q4, q15, q10
|
||||
veor q0, q0, q4
|
||||
next_tweak q6, q5, q15, q10
|
||||
veor q1, q1, q5
|
||||
next_tweak q7, q6, q15, q10
|
||||
veor q2, q2, q6
|
||||
veor q3, q3, q7
|
||||
bl aes_encrypt_4x
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
veor q2, q2, q6
|
||||
veor q3, q3, q7
|
||||
vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
vmov q4, q7
|
||||
teq r4, #0
|
||||
beq .Lxtsencout
|
||||
b .Lxtsencloop3x
|
||||
beq .Lxtsencret
|
||||
b .Lxtsencloop4x
|
||||
.Lxtsenc1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #64
|
||||
beq .Lxtsencout
|
||||
subs r4, r4, #16
|
||||
bmi .LxtsencctsNx
|
||||
.Lxtsencloop:
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
.Lxtsencctsout:
|
||||
veor q0, q0, q4
|
||||
bl aes_encrypt
|
||||
veor q0, q0, q3
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
veor q0, q0, q4
|
||||
teq r4, #0
|
||||
beq .Lxtsencout
|
||||
next_tweak q3, q3, q7, q6
|
||||
subs r4, r4, #16
|
||||
next_tweak q4, q4, q15, q6
|
||||
bmi .Lxtsenccts
|
||||
vst1.8 {q0}, [r0]!
|
||||
b .Lxtsencloop
|
||||
.Lxtsencout:
|
||||
vst1.8 {q3}, [r5]
|
||||
vst1.8 {q0}, [r0]
|
||||
.Lxtsencret:
|
||||
vst1.8 {q4}, [r5]
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.LxtsencctsNx:
|
||||
vmov q0, q3
|
||||
sub r0, r0, #16
|
||||
.Lxtsenccts:
|
||||
movw ip, :lower16:.Lcts_permute_table
|
||||
movt ip, :upper16:.Lcts_permute_table
|
||||
|
||||
add r1, r1, r4 @ rewind input pointer
|
||||
add r4, r4, #16 @ # bytes in final block
|
||||
add lr, ip, #32
|
||||
add ip, ip, r4
|
||||
sub lr, lr, r4
|
||||
add r4, r0, r4 @ output address of final block
|
||||
|
||||
vld1.8 {q1}, [r1] @ load final partial block
|
||||
vld1.8 {q2}, [ip]
|
||||
vld1.8 {q3}, [lr]
|
||||
|
||||
vtbl.8 d4, {d0-d1}, d4
|
||||
vtbl.8 d5, {d0-d1}, d5
|
||||
vtbx.8 d0, {d2-d3}, d6
|
||||
vtbx.8 d1, {d2-d3}, d7
|
||||
|
||||
vst1.8 {q2}, [r4] @ overlapping stores
|
||||
mov r4, #0
|
||||
b .Lxtsencctsout
|
||||
ENDPROC(ce_aes_xts_encrypt)
|
||||
|
||||
|
||||
@@ -441,50 +579,90 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
|
||||
bl ce_aes_xts_init @ run shared prologue
|
||||
prepare_key r2, r3
|
||||
vmov q3, q0
|
||||
vmov q4, q0
|
||||
|
||||
/* subtract 16 bytes if we are doing CTS */
|
||||
tst r4, #0xf
|
||||
subne r4, r4, #0x10
|
||||
|
||||
teq r6, #0 @ start of a block?
|
||||
bne .Lxtsdec3x
|
||||
bne .Lxtsdec4x
|
||||
|
||||
.Lxtsdecloop3x:
|
||||
next_tweak q3, q3, q7, q6
|
||||
.Lxtsdec3x:
|
||||
subs r4, r4, #3
|
||||
.Lxtsdecloop4x:
|
||||
next_tweak q4, q4, q15, q10
|
||||
.Lxtsdec4x:
|
||||
subs r4, r4, #64
|
||||
bmi .Lxtsdec1x
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
bl aes_decrypt_3x
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
|
||||
vld1.8 {q2-q3}, [r1]!
|
||||
next_tweak q5, q4, q15, q10
|
||||
veor q0, q0, q4
|
||||
next_tweak q6, q5, q15, q10
|
||||
veor q1, q1, q5
|
||||
next_tweak q7, q6, q15, q10
|
||||
veor q2, q2, q6
|
||||
veor q3, q3, q7
|
||||
bl aes_decrypt_4x
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
veor q2, q2, q6
|
||||
veor q3, q3, q7
|
||||
vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
|
||||
vst1.8 {q2-q3}, [r0]!
|
||||
vmov q4, q7
|
||||
teq r4, #0
|
||||
beq .Lxtsdecout
|
||||
b .Lxtsdecloop3x
|
||||
b .Lxtsdecloop4x
|
||||
.Lxtsdec1x:
|
||||
adds r4, r4, #3
|
||||
adds r4, r4, #64
|
||||
beq .Lxtsdecout
|
||||
subs r4, r4, #16
|
||||
.Lxtsdecloop:
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
add ip, r2, #32 @ 3rd round key
|
||||
bmi .Lxtsdeccts
|
||||
.Lxtsdecctsout:
|
||||
veor q0, q0, q4
|
||||
bl aes_decrypt
|
||||
veor q0, q0, q3
|
||||
veor q0, q0, q4
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
teq r4, #0
|
||||
beq .Lxtsdecout
|
||||
next_tweak q3, q3, q7, q6
|
||||
subs r4, r4, #16
|
||||
next_tweak q4, q4, q15, q6
|
||||
b .Lxtsdecloop
|
||||
.Lxtsdecout:
|
||||
vst1.8 {q3}, [r5]
|
||||
vst1.8 {q4}, [r5]
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lxtsdeccts:
|
||||
movw ip, :lower16:.Lcts_permute_table
|
||||
movt ip, :upper16:.Lcts_permute_table
|
||||
|
||||
add r1, r1, r4 @ rewind input pointer
|
||||
add r4, r4, #16 @ # bytes in final block
|
||||
add lr, ip, #32
|
||||
add ip, ip, r4
|
||||
sub lr, lr, r4
|
||||
add r4, r0, r4 @ output address of final block
|
||||
|
||||
next_tweak q5, q4, q15, q6
|
||||
|
||||
vld1.8 {q1}, [r1] @ load final partial block
|
||||
vld1.8 {q2}, [ip]
|
||||
vld1.8 {q3}, [lr]
|
||||
|
||||
veor q0, q0, q5
|
||||
bl aes_decrypt
|
||||
veor q0, q0, q5
|
||||
|
||||
vtbl.8 d4, {d0-d1}, d4
|
||||
vtbl.8 d5, {d0-d1}, d5
|
||||
vtbx.8 d0, {d2-d3}, d6
|
||||
vtbx.8 d1, {d2-d3}, d7
|
||||
|
||||
vst1.8 {q2}, [r4] @ overlapping stores
|
||||
mov r4, #0
|
||||
b .Lxtsdecctsout
|
||||
ENDPROC(ce_aes_xts_decrypt)
|
||||
|
||||
/*
|
||||
@@ -505,8 +683,18 @@ ENDPROC(ce_aes_sub)
|
||||
* operation on round key *src
|
||||
*/
|
||||
ENTRY(ce_aes_invert)
|
||||
vld1.8 {q0}, [r1]
|
||||
vld1.32 {q0}, [r1]
|
||||
aesimc.8 q0, q0
|
||||
vst1.8 {q0}, [r0]
|
||||
vst1.32 {q0}, [r0]
|
||||
bx lr
|
||||
ENDPROC(ce_aes_invert)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 6
|
||||
.Lcts_permute_table:
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
|
@@ -7,9 +7,13 @@
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/xts.h>
|
||||
@@ -22,25 +26,29 @@ MODULE_LICENSE("GPL v2");
|
||||
asmlinkage u32 ce_aes_sub(u32 input);
|
||||
asmlinkage void ce_aes_invert(void *dst, void *src);
|
||||
|
||||
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
asmlinkage void ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
|
||||
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 ctr[]);
|
||||
|
||||
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
u8 const rk2[], int first);
|
||||
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
u8 const rk2[], int first);
|
||||
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u8 iv[],
|
||||
u32 const rk2[], int first);
|
||||
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u8 iv[],
|
||||
u32 const rk2[], int first);
|
||||
|
||||
struct aes_block {
|
||||
u8 b[AES_BLOCK_SIZE];
|
||||
@@ -77,21 +85,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
key_len != AES_KEYSIZE_256)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(ctx->key_enc, in_key, key_len);
|
||||
ctx->key_length = key_len;
|
||||
for (i = 0; i < kwords; i++)
|
||||
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
|
||||
|
||||
kernel_neon_begin();
|
||||
for (i = 0; i < sizeof(rcon); i++) {
|
||||
u32 *rki = ctx->key_enc + (i * kwords);
|
||||
u32 *rko = rki + kwords;
|
||||
|
||||
#ifndef CONFIG_CPU_BIG_ENDIAN
|
||||
rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
|
||||
rko[0] = rko[0] ^ rki[0] ^ rcon[i];
|
||||
#else
|
||||
rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
|
||||
rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
|
||||
#endif
|
||||
rko[1] = rko[0] ^ rki[1];
|
||||
rko[2] = rko[1] ^ rki[2];
|
||||
rko[3] = rko[2] ^ rki[3];
|
||||
@@ -178,15 +182,15 @@ static int ecb_encrypt(struct skcipher_request *req)
|
||||
unsigned int blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, num_rounds(ctx), blocks);
|
||||
ctx->key_enc, num_rounds(ctx), blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -198,58 +202,192 @@ static int ecb_decrypt(struct skcipher_request *req)
|
||||
unsigned int blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, num_rounds(ctx), blocks);
|
||||
ctx->key_dec, num_rounds(ctx), blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_encrypt_walk(struct skcipher_request *req,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned int blocks;
|
||||
int err = 0;
|
||||
|
||||
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
ce_aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
|
||||
ctx->key_enc, num_rounds(ctx), blocks,
|
||||
walk->iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
return cbc_encrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
static int cbc_decrypt_walk(struct skcipher_request *req,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned int blocks;
|
||||
int err = 0;
|
||||
|
||||
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
ce_aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
|
||||
ctx->key_dec, num_rounds(ctx), blocks,
|
||||
walk->iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
return cbc_decrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false) ?:
|
||||
cbc_encrypt_walk(&subreq, &walk);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst,
|
||||
subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, num_rounds(ctx), blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
ce_aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_enc, num_rounds(ctx), walk.nbytes,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false) ?:
|
||||
cbc_decrypt_walk(&subreq, &walk);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst,
|
||||
subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
ce_aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_dec, num_rounds(ctx), walk.nbytes,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
@@ -259,13 +397,14 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
struct skcipher_walk walk;
|
||||
int err, blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
|
||||
ctx->key_enc, num_rounds(ctx), blocks,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
if (walk.nbytes) {
|
||||
@@ -279,36 +418,109 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
*/
|
||||
blocks = -1;
|
||||
|
||||
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
|
||||
num_rounds(ctx), blocks, walk.iv);
|
||||
kernel_neon_begin();
|
||||
ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx),
|
||||
blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Temporarily disable interrupts to avoid races where
|
||||
* cachelines are evicted when the CPU is interrupted
|
||||
* to do something else.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
aes_encrypt(ctx, dst, src);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int ctr_encrypt_sync(struct skcipher_request *req)
|
||||
{
|
||||
if (!crypto_simd_usable())
|
||||
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
|
||||
|
||||
return ctr_encrypt(req);
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, first, rounds = num_rounds(&ctx->key1);
|
||||
int tail = req->cryptlen % AES_BLOCK_SIZE;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct scatterlist *src, *dst;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
||||
AES_BLOCK_SIZE) - 2;
|
||||
|
||||
skcipher_walk_abort(&walk);
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
xts_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
req = &subreq;
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
|
||||
int nbytes = walk.nbytes;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
kernel_neon_begin();
|
||||
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_enc, rounds, nbytes, walk.iv,
|
||||
ctx->key2.key_enc, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
||||
|
||||
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_enc, rounds, blocks,
|
||||
walk.iv, (u8 *)ctx->key2.key_enc, first);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_enc, rounds, walk.nbytes, walk.iv,
|
||||
ctx->key2.key_enc, first);
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -316,87 +528,165 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, first, rounds = num_rounds(&ctx->key1);
|
||||
int tail = req->cryptlen % AES_BLOCK_SIZE;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct scatterlist *src, *dst;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
||||
AES_BLOCK_SIZE) - 2;
|
||||
|
||||
skcipher_walk_abort(&walk);
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
xts_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
req = &subreq;
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
|
||||
int nbytes = walk.nbytes;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
kernel_neon_begin();
|
||||
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_dec, rounds, nbytes, walk.iv,
|
||||
ctx->key2.key_enc, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
||||
|
||||
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_dec, rounds, blocks,
|
||||
walk.iv, (u8 *)ctx->key2.key_enc, first);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_dec, rounds, walk.nbytes, walk.iv,
|
||||
ctx->key2.key_enc, first);
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
.base = {
|
||||
.cra_name = "__ecb(aes)",
|
||||
.cra_driver_name = "__ecb-aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
.base.cra_name = "__ecb(aes)",
|
||||
.base.cra_driver_name = "__ecb-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cbc(aes)",
|
||||
.cra_driver_name = "__cbc-aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
.base.cra_name = "__cbc(aes)",
|
||||
.base.cra_driver_name = "__cbc-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(aes)",
|
||||
.cra_driver_name = "__ctr-aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
.base.cra_name = "__cts(cbc(aes))",
|
||||
.base.cra_driver_name = "__cts-cbc-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.walksize = 2 * AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = cts_cbc_encrypt,
|
||||
.decrypt = cts_cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__xts(aes)",
|
||||
.cra_driver_name = "__xts-aes-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = xts_set_key,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
.base.cra_name = "__ctr(aes)",
|
||||
.base.cra_driver_name = "__ctr-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "ctr(aes)",
|
||||
.base.cra_driver_name = "ctr-aes-ce-sync",
|
||||
.base.cra_priority = 300 - 1,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = ce_aes_setkey,
|
||||
.encrypt = ctr_encrypt_sync,
|
||||
.decrypt = ctr_encrypt_sync,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.walksize = 2 * AES_BLOCK_SIZE,
|
||||
.setkey = xts_set_key,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
@@ -425,6 +715,9 @@ static int __init aes_init(void)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
|
@@ -219,43 +219,5 @@ ENDPROC(__aes_arm_encrypt)
|
||||
|
||||
.align 5
|
||||
ENTRY(__aes_arm_decrypt)
|
||||
do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
|
||||
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
|
||||
ENDPROC(__aes_arm_decrypt)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align L1_CACHE_SHIFT
|
||||
.type __aes_arm_inverse_sbox, %object
|
||||
__aes_arm_inverse_sbox:
|
||||
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
|
||||
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
|
||||
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
|
||||
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
|
||||
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
|
||||
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
|
||||
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
|
||||
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
|
||||
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
|
||||
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
|
||||
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
|
||||
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
|
||||
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
|
||||
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
|
||||
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
|
||||
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
|
||||
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
|
||||
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
|
||||
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
|
||||
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
|
||||
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
|
||||
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
|
||||
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
|
||||
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
|
||||
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
|
||||
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
|
||||
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
|
||||
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
|
||||
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
.size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
|
||||
|
@@ -11,12 +11,9 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_encrypt);
|
||||
|
||||
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@@ -24,7 +21,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@@ -44,8 +41,8 @@ static struct crypto_alg aes_alg = {
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt,
|
||||
.cra_cipher.cia_encrypt = aes_arm_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_arm_decrypt,
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
.cra_alignmask = 3,
|
||||
|
@@ -887,19 +887,17 @@ ENDPROC(aesbs_ctr_encrypt)
|
||||
veor \out, \out, \tmp
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
.Lxts_mul_x:
|
||||
.quad 1, 0x87
|
||||
|
||||
/*
|
||||
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* int blocks, u8 iv[], int reorder_last_tweak)
|
||||
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* int blocks, u8 iv[], int reorder_last_tweak)
|
||||
*/
|
||||
__xts_prepare8:
|
||||
vld1.8 {q14}, [r7] // load iv
|
||||
__ldr q15, .Lxts_mul_x // load tweak mask
|
||||
vmov.i32 d30, #0x87 // compose tweak mask vector
|
||||
vmovl.u32 q15, d30
|
||||
vshr.u64 d30, d31, #7
|
||||
vmov q12, q14
|
||||
|
||||
__adr ip, 0f
|
||||
@@ -946,17 +944,25 @@ __xts_prepare8:
|
||||
|
||||
vld1.8 {q7}, [r1]!
|
||||
next_tweak q14, q12, q15, q13
|
||||
veor q7, q7, q12
|
||||
THUMB( itt le )
|
||||
W(cmple) r8, #0
|
||||
ble 1f
|
||||
0: veor q7, q7, q12
|
||||
vst1.8 {q12}, [r4, :128]
|
||||
|
||||
0: vst1.8 {q14}, [r7] // store next iv
|
||||
vst1.8 {q14}, [r7] // store next iv
|
||||
bx lr
|
||||
|
||||
1: vswp q12, q14
|
||||
b 0b
|
||||
ENDPROC(__xts_prepare8)
|
||||
|
||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
push {r4-r8, lr}
|
||||
mov r5, sp // preserve sp
|
||||
ldrd r6, r7, [sp, #24] // get blocks and iv args
|
||||
ldr r8, [sp, #32] // reorder final tweak?
|
||||
rsb r8, r8, #1
|
||||
sub ip, sp, #128 // make room for 8x tweak
|
||||
bic ip, ip, #0xf // align sp to 16 bytes
|
||||
mov sp, ip
|
||||
|
@@ -6,10 +6,13 @@
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/cbc.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
@@ -35,9 +38,9 @@ asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 ctr[], u8 final[]);
|
||||
|
||||
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
int rounds, int blocks, u8 iv[], int);
|
||||
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
int rounds, int blocks, u8 iv[], int);
|
||||
|
||||
struct aesbs_ctx {
|
||||
int rounds;
|
||||
@@ -51,9 +54,15 @@ struct aesbs_cbc_ctx {
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
struct crypto_cipher *cts_tfm;
|
||||
struct crypto_cipher *tweak_tfm;
|
||||
};
|
||||
|
||||
struct aesbs_ctr_ctx {
|
||||
struct aesbs_ctx key; /* must be first member */
|
||||
struct crypto_aes_ctx fallback;
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@@ -61,7 +70,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
err = aes_expandkey(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -83,9 +92,8 @@ static int __ecb_crypt(struct skcipher_request *req,
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
@@ -93,12 +101,13 @@ static int __ecb_crypt(struct skcipher_request *req,
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -120,7 +129,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
err = aes_expandkey(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -152,9 +161,8 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
@@ -162,13 +170,14 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -189,6 +198,25 @@ static void cbc_exit(struct crypto_tfm *tfm)
|
||||
crypto_free_cipher(ctx->enc_tfm);
|
||||
}
|
||||
|
||||
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&ctx->fallback, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->key.rounds = 6 + key_len / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
@@ -197,9 +225,8 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
@@ -210,8 +237,10 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
kernel_neon_end();
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
@@ -226,11 +255,33 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Temporarily disable interrupts to avoid races where
|
||||
* cachelines are evicted when the CPU is interrupted
|
||||
* to do something else.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
aes_encrypt(&ctx->fallback, dst, src);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int ctr_encrypt_sync(struct skcipher_request *req)
|
||||
{
|
||||
if (!crypto_simd_usable())
|
||||
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
|
||||
|
||||
return ctr_encrypt(req);
|
||||
}
|
||||
|
||||
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@@ -242,6 +293,9 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -253,7 +307,13 @@ static int xts_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
|
||||
if (IS_ERR(ctx->cts_tfm))
|
||||
return PTR_ERR(ctx->cts_tfm);
|
||||
|
||||
ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
|
||||
if (IS_ERR(ctx->tweak_tfm))
|
||||
crypto_free_cipher(ctx->cts_tfm);
|
||||
|
||||
return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
|
||||
}
|
||||
@@ -263,49 +323,89 @@ static void xts_exit(struct crypto_tfm *tfm)
|
||||
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(ctx->tweak_tfm);
|
||||
crypto_free_cipher(ctx->cts_tfm);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
int rounds, int blocks, u8 iv[], int))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int tail = req->cryptlen % AES_BLOCK_SIZE;
|
||||
struct skcipher_request subreq;
|
||||
u8 buf[2 * AES_BLOCK_SIZE];
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(tail)) {
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
req->cryptlen - tail, req->iv);
|
||||
req = &subreq;
|
||||
}
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
int reorder_last_tweak = !encrypt && tail > 0;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
if (walk.nbytes < walk.total) {
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
reorder_last_tweak = 0;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
ctx->key.rounds, blocks, walk.iv, reorder_last_tweak);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
|
||||
AES_BLOCK_SIZE, 0);
|
||||
memcpy(buf + AES_BLOCK_SIZE, buf, tail);
|
||||
scatterwalk_map_and_copy(buf, req->src, req->cryptlen, tail, 0);
|
||||
|
||||
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
if (encrypt)
|
||||
crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf);
|
||||
else
|
||||
crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf);
|
||||
|
||||
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
|
||||
AES_BLOCK_SIZE + tail, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
return __xts_crypt(req, true, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
return __xts_crypt(req, false, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
@@ -358,6 +458,22 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "ctr(aes)",
|
||||
.base.cra_driver_name = "ctr-aes-neonbs-sync",
|
||||
.base.cra_priority = 250 - 1,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_ctr_setkey_sync,
|
||||
.encrypt = ctr_encrypt_sync,
|
||||
.decrypt = ctr_encrypt_sync,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-neonbs",
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/cryptd.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
@@ -17,7 +18,7 @@
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("ghash");
|
||||
@@ -30,6 +31,8 @@ struct ghash_key {
|
||||
u64 h2[2];
|
||||
u64 h3[2];
|
||||
u64 h4[2];
|
||||
|
||||
be128 k;
|
||||
};
|
||||
|
||||
struct ghash_desc_ctx {
|
||||
@@ -62,6 +65,36 @@ static int ghash_init(struct shash_desc *desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghash_do_update(int blocks, u64 dg[], const char *src,
|
||||
struct ghash_key *key, const char *head)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_neon_begin();
|
||||
pmull_ghash_update(blocks, dg, src, key, head);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
|
||||
|
||||
do {
|
||||
const u8 *in = src;
|
||||
|
||||
if (head) {
|
||||
in = head;
|
||||
blocks++;
|
||||
head = NULL;
|
||||
} else {
|
||||
src += GHASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
|
||||
gf128mul_lle(&dst, &key->k);
|
||||
} while (--blocks);
|
||||
|
||||
dg[0] = be64_to_cpu(dst.b);
|
||||
dg[1] = be64_to_cpu(dst.a);
|
||||
}
|
||||
}
|
||||
|
||||
static int ghash_update(struct shash_desc *desc, const u8 *src,
|
||||
unsigned int len)
|
||||
{
|
||||
@@ -85,10 +118,8 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
|
||||
blocks = len / GHASH_BLOCK_SIZE;
|
||||
len %= GHASH_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
pmull_ghash_update(blocks, ctx->digest, src, key,
|
||||
partial ? ctx->buf : NULL);
|
||||
kernel_neon_end();
|
||||
ghash_do_update(blocks, ctx->digest, src, key,
|
||||
partial ? ctx->buf : NULL);
|
||||
src += blocks * GHASH_BLOCK_SIZE;
|
||||
partial = 0;
|
||||
}
|
||||
@@ -106,9 +137,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
|
||||
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
|
||||
kernel_neon_begin();
|
||||
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
|
||||
kernel_neon_end();
|
||||
ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
|
||||
}
|
||||
put_unaligned_be64(ctx->digest[1], dst);
|
||||
put_unaligned_be64(ctx->digest[0], dst + 8);
|
||||
@@ -132,24 +161,25 @@ static int ghash_setkey(struct crypto_shash *tfm,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
struct ghash_key *key = crypto_shash_ctx(tfm);
|
||||
be128 h, k;
|
||||
be128 h;
|
||||
|
||||
if (keylen != GHASH_BLOCK_SIZE) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(&k, inkey, GHASH_BLOCK_SIZE);
|
||||
ghash_reflect(key->h, &k);
|
||||
/* needed for the fallback */
|
||||
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
|
||||
ghash_reflect(key->h, &key->k);
|
||||
|
||||
h = k;
|
||||
gf128mul_lle(&h, &k);
|
||||
h = key->k;
|
||||
gf128mul_lle(&h, &key->k);
|
||||
ghash_reflect(key->h2, &h);
|
||||
|
||||
gf128mul_lle(&h, &k);
|
||||
gf128mul_lle(&h, &key->k);
|
||||
ghash_reflect(key->h3, &h);
|
||||
|
||||
gf128mul_lle(&h, &k);
|
||||
gf128mul_lle(&h, &key->k);
|
||||
ghash_reflect(key->h4, &h);
|
||||
|
||||
return 0;
|
||||
@@ -162,15 +192,13 @@ static struct shash_alg ghash_alg = {
|
||||
.final = ghash_final,
|
||||
.setkey = ghash_setkey,
|
||||
.descsize = sizeof(struct ghash_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "__ghash",
|
||||
.cra_driver_name = "__driver-ghash-ce",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_key),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
|
||||
.base.cra_name = "ghash",
|
||||
.base.cra_driver_name = "ghash-ce-sync",
|
||||
.base.cra_priority = 300 - 1,
|
||||
.base.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct ghash_key),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int ghash_async_init(struct ahash_request *req)
|
||||
@@ -285,9 +313,7 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
|
||||
struct cryptd_ahash *cryptd_tfm;
|
||||
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
|
||||
CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL);
|
||||
cryptd_tfm = cryptd_alloc_ahash("ghash-ce-sync", 0, 0);
|
||||
if (IS_ERR(cryptd_tfm))
|
||||
return PTR_ERR(cryptd_tfm);
|
||||
ctx->cryptd_tfm = cryptd_tfm;
|
||||
|
@@ -39,7 +39,7 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
|
||||
}
|
||||
EXPORT_SYMBOL(crypto_sha256_arm_update);
|
||||
|
||||
static int sha256_final(struct shash_desc *desc, u8 *out)
|
||||
static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
@@ -51,7 +51,7 @@ int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
|
||||
{
|
||||
sha256_base_do_update(desc, data, len,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
return sha256_final(desc, out);
|
||||
return crypto_sha256_arm_final(desc, out);
|
||||
}
|
||||
EXPORT_SYMBOL(crypto_sha256_arm_finup);
|
||||
|
||||
@@ -59,7 +59,7 @@ static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_base_init,
|
||||
.update = crypto_sha256_arm_update,
|
||||
.final = sha256_final,
|
||||
.final = crypto_sha256_arm_final,
|
||||
.finup = crypto_sha256_arm_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
@@ -73,7 +73,7 @@ static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_base_init,
|
||||
.update = crypto_sha256_arm_update,
|
||||
.final = sha256_final,
|
||||
.final = crypto_sha256_arm_final,
|
||||
.finup = crypto_sha256_arm_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
|
@@ -25,8 +25,8 @@
|
||||
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
|
||||
static int sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
@@ -42,8 +42,8 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
if (!crypto_simd_usable())
|
||||
return crypto_sha256_arm_finup(desc, data, len, out);
|
||||
@@ -59,17 +59,17 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int sha256_final(struct shash_desc *desc, u8 *out)
|
||||
static int crypto_sha256_neon_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return sha256_finup(desc, NULL, 0, out);
|
||||
return crypto_sha256_neon_finup(desc, NULL, 0, out);
|
||||
}
|
||||
|
||||
struct shash_alg sha256_neon_algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_base_init,
|
||||
.update = sha256_update,
|
||||
.final = sha256_final,
|
||||
.finup = sha256_finup,
|
||||
.update = crypto_sha256_neon_update,
|
||||
.final = crypto_sha256_neon_final,
|
||||
.finup = crypto_sha256_neon_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
@@ -81,9 +81,9 @@ struct shash_alg sha256_neon_algs[] = { {
|
||||
}, {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_base_init,
|
||||
.update = sha256_update,
|
||||
.final = sha256_final,
|
||||
.finup = sha256_finup,
|
||||
.update = crypto_sha256_neon_update,
|
||||
.final = crypto_sha256_neon_final,
|
||||
.finup = crypto_sha256_neon_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
|
@@ -17,7 +17,6 @@ generic-y += parport.h
|
||||
generic-y += preempt.h
|
||||
generic-y += seccomp.h
|
||||
generic-y += serial.h
|
||||
generic-y += simd.h
|
||||
generic-y += trace_clock.h
|
||||
|
||||
generated-y += mach-types.h
|
||||
|
Reference in New Issue
Block a user