Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Add the ability to abort a skcipher walk.

  Algorithms:
   - Fix XTS to actually do the stealing.
   - Add library helpers for AES and DES for single-block users.
   - Add library helpers for SHA256.
   - Add new DES key verification helper.
   - Add surrounding bits for ESSIV generator.
   - Add accelerations for aegis128.
   - Add test vectors for lzo-rle.

  Drivers:
   - Add i.MX8MQ support to caam.
   - Add gcm/ccm/cfb/ofb aes support in inside-secure.
   - Add ofb/cfb aes support in media-tek.
   - Add HiSilicon ZIP accelerator support.

  Others:
   - Fix potential race condition in padata.
   - Use unbound workqueues in padata"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits)
  crypto: caam - Cast to long first before pointer conversion
  crypto: ccree - enable CTS support in AES-XTS
  crypto: inside-secure - Probe transform record cache RAM sizes
  crypto: inside-secure - Base RD fetchcount on actual RD FIFO size
  crypto: inside-secure - Base CD fetchcount on actual CD FIFO size
  crypto: inside-secure - Enable extended algorithms on newer HW
  crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL
  crypto: inside-secure - Add EIP97/EIP197 and endianness detection
  padata: remove cpu_index from the parallel_queue
  padata: unbind parallel jobs from specific CPUs
  padata: use separate workqueues for parallel and serial work
  padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible
  crypto: pcrypt - remove padata cpumask notifier
  padata: make padata_do_parallel find alternate callback CPU
  workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs
  workqueue: unconfine alloc/apply/free_workqueue_attrs()
  padata: allocate workqueue internally
  arm64: dts: imx8mq: Add CAAM node
  random: Use wait_event_freezable() in add_hwgenerator_randomness()
  crypto: ux500 - Fix COMPILE_TEST warnings
  ...
This commit is contained in:
Linus Torvalds
2019-09-18 12:11:14 -07:00
297 changed files with 14692 additions and 17484 deletions

View File

@@ -82,8 +82,8 @@ config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
select CRYPTO_AES
help
Use a faster and more secure NEON based implementation of AES in CBC,
CTR and XTS modes

View File

@@ -44,63 +44,73 @@
veor q0, q0, \key3
.endm
.macro enc_dround_3x, key1, key2
.macro enc_dround_4x, key1, key2
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
enc_round q3, \key1
enc_round q0, \key2
enc_round q1, \key2
enc_round q2, \key2
enc_round q3, \key2
.endm
.macro dec_dround_3x, key1, key2
.macro dec_dround_4x, key1, key2
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
dec_round q3, \key1
dec_round q0, \key2
dec_round q1, \key2
dec_round q2, \key2
dec_round q3, \key2
.endm
.macro enc_fround_3x, key1, key2, key3
.macro enc_fround_4x, key1, key2, key3
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
enc_round q3, \key1
aese.8 q0, \key2
aese.8 q1, \key2
aese.8 q2, \key2
aese.8 q3, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
veor q3, q3, \key3
.endm
.macro dec_fround_3x, key1, key2, key3
.macro dec_fround_4x, key1, key2, key3
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
dec_round q3, \key1
aesd.8 q0, \key2
aesd.8 q1, \key2
aesd.8 q2, \key2
aesd.8 q3, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
veor q3, q3, \key3
.endm
.macro do_block, dround, fround
cmp r3, #12 @ which key size?
vld1.8 {q10-q11}, [ip]!
vld1.32 {q10-q11}, [ip]!
\dround q8, q9
vld1.8 {q12-q13}, [ip]!
vld1.32 {q12-q13}, [ip]!
\dround q10, q11
vld1.8 {q10-q11}, [ip]!
vld1.32 {q10-q11}, [ip]!
\dround q12, q13
vld1.8 {q12-q13}, [ip]!
vld1.32 {q12-q13}, [ip]!
\dround q10, q11
blo 0f @ AES-128: 10 rounds
vld1.8 {q10-q11}, [ip]!
vld1.32 {q10-q11}, [ip]!
\dround q12, q13
beq 1f @ AES-192: 12 rounds
vld1.8 {q12-q13}, [ip]
vld1.32 {q12-q13}, [ip]
\dround q10, q11
0: \fround q12, q13, q14
bx lr
@@ -114,8 +124,9 @@
* transforms. These should preserve all registers except q0 - q2 and ip
* Arguments:
* q0 : first in/output block
* q1 : second in/output block (_3x version only)
* q2 : third in/output block (_3x version only)
* q1 : second in/output block (_4x version only)
* q2 : third in/output block (_4x version only)
* q3 : fourth in/output block (_4x version only)
* q8 : first round key
* q9 : secound round key
* q14 : final round key
@@ -136,44 +147,44 @@ aes_decrypt:
ENDPROC(aes_decrypt)
.align 6
aes_encrypt_3x:
aes_encrypt_4x:
add ip, r2, #32 @ 3rd round key
do_block enc_dround_3x, enc_fround_3x
ENDPROC(aes_encrypt_3x)
do_block enc_dround_4x, enc_fround_4x
ENDPROC(aes_encrypt_4x)
.align 6
aes_decrypt_3x:
aes_decrypt_4x:
add ip, r2, #32 @ 3rd round key
do_block dec_dround_3x, dec_fround_3x
ENDPROC(aes_decrypt_3x)
do_block dec_dround_4x, dec_fround_4x
ENDPROC(aes_decrypt_4x)
.macro prepare_key, rk, rounds
add ip, \rk, \rounds, lsl #4
vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
vld1.8 {q14}, [ip] @ load last round key
vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
vld1.32 {q14}, [ip] @ load last round key
.endm
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks)
* aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks)
*/
ENTRY(ce_aes_ecb_encrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
.Lecbencloop3x:
subs r4, r4, #3
.Lecbencloop4x:
subs r4, r4, #4
bmi .Lecbenc1x
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
bl aes_encrypt_3x
vld1.8 {q2-q3}, [r1]!
bl aes_encrypt_4x
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lecbencloop3x
vst1.8 {q2-q3}, [r0]!
b .Lecbencloop4x
.Lecbenc1x:
adds r4, r4, #3
adds r4, r4, #4
beq .Lecbencout
.Lecbencloop:
vld1.8 {q0}, [r1]!
@@ -189,17 +200,17 @@ ENTRY(ce_aes_ecb_decrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
.Lecbdecloop3x:
subs r4, r4, #3
.Lecbdecloop4x:
subs r4, r4, #4
bmi .Lecbdec1x
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
bl aes_decrypt_3x
vld1.8 {q2-q3}, [r1]!
bl aes_decrypt_4x
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lecbdecloop3x
vst1.8 {q2-q3}, [r0]!
b .Lecbdecloop4x
.Lecbdec1x:
adds r4, r4, #3
adds r4, r4, #4
beq .Lecbdecout
.Lecbdecloop:
vld1.8 {q0}, [r1]!
@@ -212,9 +223,9 @@ ENTRY(ce_aes_ecb_decrypt)
ENDPROC(ce_aes_ecb_decrypt)
/*
* aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 iv[])
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 iv[])
*/
ENTRY(ce_aes_cbc_encrypt)
@@ -236,88 +247,181 @@ ENDPROC(ce_aes_cbc_encrypt)
ENTRY(ce_aes_cbc_decrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
vld1.8 {q6}, [r5] @ keep iv in q6
vld1.8 {q15}, [r5] @ keep iv in q15
prepare_key r2, r3
.Lcbcdecloop3x:
subs r4, r4, #3
.Lcbcdecloop4x:
subs r4, r4, #4
bmi .Lcbcdec1x
vld1.8 {q0-q1}, [r1]!
vld1.8 {q2}, [r1]!
vmov q3, q0
vmov q4, q1
vmov q5, q2
bl aes_decrypt_3x
veor q0, q0, q6
veor q1, q1, q3
veor q2, q2, q4
vmov q6, q5
vld1.8 {q2-q3}, [r1]!
vmov q4, q0
vmov q5, q1
vmov q6, q2
vmov q7, q3
bl aes_decrypt_4x
veor q0, q0, q15
veor q1, q1, q4
veor q2, q2, q5
veor q3, q3, q6
vmov q15, q7
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
b .Lcbcdecloop3x
vst1.8 {q2-q3}, [r0]!
b .Lcbcdecloop4x
.Lcbcdec1x:
adds r4, r4, #3
adds r4, r4, #4
beq .Lcbcdecout
vmov q15, q14 @ preserve last round key
vmov q6, q14 @ preserve last round key
.Lcbcdecloop:
vld1.8 {q0}, [r1]! @ get next ct block
veor q14, q15, q6 @ combine prev ct with last key
vmov q6, q0
vmov q15, q0
bl aes_decrypt
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lcbcdecloop
.Lcbcdecout:
vst1.8 {q6}, [r5] @ keep iv in q6
vst1.8 {q15}, [r5] @ keep iv in q15
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
* int rounds, int bytes, u8 const iv[])
* ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
* int rounds, int bytes, u8 const iv[])
*/
ENTRY(ce_aes_cbc_cts_encrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
movw ip, :lower16:.Lcts_permute_table
movt ip, :upper16:.Lcts_permute_table
sub r4, r4, #16
add lr, ip, #32
add ip, ip, r4
sub lr, lr, r4
vld1.8 {q5}, [ip]
vld1.8 {q6}, [lr]
add ip, r1, r4
vld1.8 {q0}, [r1] @ overlapping loads
vld1.8 {q3}, [ip]
vld1.8 {q1}, [r5] @ get iv
prepare_key r2, r3
veor q0, q0, q1 @ xor with iv
bl aes_encrypt
vtbl.8 d4, {d0-d1}, d10
vtbl.8 d5, {d0-d1}, d11
vtbl.8 d2, {d6-d7}, d12
vtbl.8 d3, {d6-d7}, d13
veor q0, q0, q1
bl aes_encrypt
add r4, r0, r4
vst1.8 {q2}, [r4] @ overlapping stores
vst1.8 {q0}, [r0]
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_cts_encrypt)
ENTRY(ce_aes_cbc_cts_decrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
movw ip, :lower16:.Lcts_permute_table
movt ip, :upper16:.Lcts_permute_table
sub r4, r4, #16
add lr, ip, #32
add ip, ip, r4
sub lr, lr, r4
vld1.8 {q5}, [ip]
vld1.8 {q6}, [lr]
add ip, r1, r4
vld1.8 {q0}, [r1] @ overlapping loads
vld1.8 {q1}, [ip]
vld1.8 {q3}, [r5] @ get iv
prepare_key r2, r3
bl aes_decrypt
vtbl.8 d4, {d0-d1}, d10
vtbl.8 d5, {d0-d1}, d11
vtbx.8 d0, {d2-d3}, d12
vtbx.8 d1, {d2-d3}, d13
veor q1, q1, q2
bl aes_decrypt
veor q0, q0, q3 @ xor with iv
add r4, r0, r4
vst1.8 {q1}, [r4] @ overlapping stores
vst1.8 {q0}, [r0]
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_cts_decrypt)
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
ENTRY(ce_aes_ctr_encrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
vld1.8 {q6}, [r5] @ load ctr
vld1.8 {q7}, [r5] @ load ctr
prepare_key r2, r3
vmov r6, s27 @ keep swabbed ctr in r6
vmov r6, s31 @ keep swabbed ctr in r6
rev r6, r6
cmn r6, r4 @ 32 bit overflow?
bcs .Lctrloop
.Lctrloop3x:
subs r4, r4, #3
.Lctrloop4x:
subs r4, r4, #4
bmi .Lctr1x
add r6, r6, #1
vmov q0, q6
vmov q1, q6
vmov q0, q7
vmov q1, q7
rev ip, r6
add r6, r6, #1
vmov q2, q6
vmov q2, q7
vmov s7, ip
rev ip, r6
add r6, r6, #1
vmov q3, q7
vmov s11, ip
vld1.8 {q3-q4}, [r1]!
vld1.8 {q5}, [r1]!
bl aes_encrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
rev ip, r6
add r6, r6, #1
vmov s15, ip
vld1.8 {q4-q5}, [r1]!
vld1.8 {q6}, [r1]!
vld1.8 {q15}, [r1]!
bl aes_encrypt_4x
veor q0, q0, q4
veor q1, q1, q5
veor q2, q2, q6
veor q3, q3, q15
rev ip, r6
vst1.8 {q0-q1}, [r0]!
vst1.8 {q2}, [r0]!
vmov s27, ip
b .Lctrloop3x
vst1.8 {q2-q3}, [r0]!
vmov s31, ip
b .Lctrloop4x
.Lctr1x:
adds r4, r4, #3
adds r4, r4, #4
beq .Lctrout
.Lctrloop:
vmov q0, q6
vmov q0, q7
bl aes_encrypt
adds r6, r6, #1 @ increment BE ctr
rev ip, r6
vmov s27, ip
vmov s31, ip
bcs .Lctrcarry
.Lctrcarrydone:
@@ -329,7 +433,7 @@ ENTRY(ce_aes_ctr_encrypt)
bne .Lctrloop
.Lctrout:
vst1.8 {q6}, [r5] @ return next CTR value
vst1.8 {q7}, [r5] @ return next CTR value
pop {r4-r6, pc}
.Lctrtailblock:
@@ -337,7 +441,7 @@ ENTRY(ce_aes_ctr_encrypt)
b .Lctrout
.Lctrcarry:
.irp sreg, s26, s25, s24
.irp sreg, s30, s29, s28
vmov ip, \sreg @ load next word of ctr
rev ip, ip @ ... to handle the carry
adds ip, ip, #1
@@ -349,10 +453,10 @@ ENTRY(ce_aes_ctr_encrypt)
ENDPROC(ce_aes_ctr_encrypt)
/*
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 iv[], u8 const rk2[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
* int blocks, u8 iv[], u8 const rk2[], int first)
* aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
* int bytes, u8 iv[], u32 const rk2[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
* int bytes, u8 iv[], u32 const rk2[], int first)
*/
.macro next_tweak, out, in, const, tmp
@@ -363,13 +467,10 @@ ENDPROC(ce_aes_ctr_encrypt)
veor \out, \out, \tmp
.endm
.align 3
.Lxts_mul_x:
.quad 1, 0x87
ce_aes_xts_init:
vldr d14, .Lxts_mul_x
vldr d15, .Lxts_mul_x + 8
vmov.i32 d30, #0x87 @ compose tweak mask vector
vmovl.u32 q15, d30
vshr.u64 d30, d31, #7
ldrd r4, r5, [sp, #16] @ load args
ldr r6, [sp, #28]
@@ -390,49 +491,86 @@ ENTRY(ce_aes_xts_encrypt)
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
vmov q3, q0
vmov q4, q0
teq r6, #0 @ start of a block?
bne .Lxtsenc3x
bne .Lxtsenc4x
.Lxtsencloop3x:
next_tweak q3, q3, q7, q6
.Lxtsenc3x:
subs r4, r4, #3
.Lxtsencloop4x:
next_tweak q4, q4, q15, q10
.Lxtsenc4x:
subs r4, r4, #64
bmi .Lxtsenc1x
vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
vld1.8 {q2}, [r1]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
veor q1, q1, q4
veor q2, q2, q5
bl aes_encrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
vst1.8 {q2}, [r0]!
vmov q3, q5
vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
vld1.8 {q2-q3}, [r1]!
next_tweak q5, q4, q15, q10
veor q0, q0, q4
next_tweak q6, q5, q15, q10
veor q1, q1, q5
next_tweak q7, q6, q15, q10
veor q2, q2, q6
veor q3, q3, q7
bl aes_encrypt_4x
veor q0, q0, q4
veor q1, q1, q5
veor q2, q2, q6
veor q3, q3, q7
vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
vst1.8 {q2-q3}, [r0]!
vmov q4, q7
teq r4, #0
beq .Lxtsencout
b .Lxtsencloop3x
beq .Lxtsencret
b .Lxtsencloop4x
.Lxtsenc1x:
adds r4, r4, #3
adds r4, r4, #64
beq .Lxtsencout
subs r4, r4, #16
bmi .LxtsencctsNx
.Lxtsencloop:
vld1.8 {q0}, [r1]!
veor q0, q0, q3
.Lxtsencctsout:
veor q0, q0, q4
bl aes_encrypt
veor q0, q0, q3
vst1.8 {q0}, [r0]!
subs r4, r4, #1
veor q0, q0, q4
teq r4, #0
beq .Lxtsencout
next_tweak q3, q3, q7, q6
subs r4, r4, #16
next_tweak q4, q4, q15, q6
bmi .Lxtsenccts
vst1.8 {q0}, [r0]!
b .Lxtsencloop
.Lxtsencout:
vst1.8 {q3}, [r5]
vst1.8 {q0}, [r0]
.Lxtsencret:
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
.LxtsencctsNx:
vmov q0, q3
sub r0, r0, #16
.Lxtsenccts:
movw ip, :lower16:.Lcts_permute_table
movt ip, :upper16:.Lcts_permute_table
add r1, r1, r4 @ rewind input pointer
add r4, r4, #16 @ # bytes in final block
add lr, ip, #32
add ip, ip, r4
sub lr, lr, r4
add r4, r0, r4 @ output address of final block
vld1.8 {q1}, [r1] @ load final partial block
vld1.8 {q2}, [ip]
vld1.8 {q3}, [lr]
vtbl.8 d4, {d0-d1}, d4
vtbl.8 d5, {d0-d1}, d5
vtbx.8 d0, {d2-d3}, d6
vtbx.8 d1, {d2-d3}, d7
vst1.8 {q2}, [r4] @ overlapping stores
mov r4, #0
b .Lxtsencctsout
ENDPROC(ce_aes_xts_encrypt)
@@ -441,50 +579,90 @@ ENTRY(ce_aes_xts_decrypt)
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
vmov q3, q0
vmov q4, q0
/* subtract 16 bytes if we are doing CTS */
tst r4, #0xf
subne r4, r4, #0x10
teq r6, #0 @ start of a block?
bne .Lxtsdec3x
bne .Lxtsdec4x
.Lxtsdecloop3x:
next_tweak q3, q3, q7, q6
.Lxtsdec3x:
subs r4, r4, #3
.Lxtsdecloop4x:
next_tweak q4, q4, q15, q10
.Lxtsdec4x:
subs r4, r4, #64
bmi .Lxtsdec1x
vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
vld1.8 {q2}, [r1]!
next_tweak q4, q3, q7, q6
veor q0, q0, q3
next_tweak q5, q4, q7, q6
veor q1, q1, q4
veor q2, q2, q5
bl aes_decrypt_3x
veor q0, q0, q3
veor q1, q1, q4
veor q2, q2, q5
vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
vst1.8 {q2}, [r0]!
vmov q3, q5
vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
vld1.8 {q2-q3}, [r1]!
next_tweak q5, q4, q15, q10
veor q0, q0, q4
next_tweak q6, q5, q15, q10
veor q1, q1, q5
next_tweak q7, q6, q15, q10
veor q2, q2, q6
veor q3, q3, q7
bl aes_decrypt_4x
veor q0, q0, q4
veor q1, q1, q5
veor q2, q2, q6
veor q3, q3, q7
vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
vst1.8 {q2-q3}, [r0]!
vmov q4, q7
teq r4, #0
beq .Lxtsdecout
b .Lxtsdecloop3x
b .Lxtsdecloop4x
.Lxtsdec1x:
adds r4, r4, #3
adds r4, r4, #64
beq .Lxtsdecout
subs r4, r4, #16
.Lxtsdecloop:
vld1.8 {q0}, [r1]!
veor q0, q0, q3
add ip, r2, #32 @ 3rd round key
bmi .Lxtsdeccts
.Lxtsdecctsout:
veor q0, q0, q4
bl aes_decrypt
veor q0, q0, q3
veor q0, q0, q4
vst1.8 {q0}, [r0]!
subs r4, r4, #1
teq r4, #0
beq .Lxtsdecout
next_tweak q3, q3, q7, q6
subs r4, r4, #16
next_tweak q4, q4, q15, q6
b .Lxtsdecloop
.Lxtsdecout:
vst1.8 {q3}, [r5]
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
.Lxtsdeccts:
movw ip, :lower16:.Lcts_permute_table
movt ip, :upper16:.Lcts_permute_table
add r1, r1, r4 @ rewind input pointer
add r4, r4, #16 @ # bytes in final block
add lr, ip, #32
add ip, ip, r4
sub lr, lr, r4
add r4, r0, r4 @ output address of final block
next_tweak q5, q4, q15, q6
vld1.8 {q1}, [r1] @ load final partial block
vld1.8 {q2}, [ip]
vld1.8 {q3}, [lr]
veor q0, q0, q5
bl aes_decrypt
veor q0, q0, q5
vtbl.8 d4, {d0-d1}, d4
vtbl.8 d5, {d0-d1}, d5
vtbx.8 d0, {d2-d3}, d6
vtbx.8 d1, {d2-d3}, d7
vst1.8 {q2}, [r4] @ overlapping stores
mov r4, #0
b .Lxtsdecctsout
ENDPROC(ce_aes_xts_decrypt)
/*
@@ -505,8 +683,18 @@ ENDPROC(ce_aes_sub)
* operation on round key *src
*/
ENTRY(ce_aes_invert)
vld1.8 {q0}, [r1]
vld1.32 {q0}, [r1]
aesimc.8 q0, q0
vst1.8 {q0}, [r0]
vst1.32 {q0}, [r0]
bx lr
ENDPROC(ce_aes_invert)
.section ".rodata", "a"
.align 6
.Lcts_permute_table:
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff

View File

@@ -7,9 +7,13 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -22,25 +26,29 @@ MODULE_LICENSE("GPL v2");
asmlinkage u32 ce_aes_sub(u32 input);
asmlinkage void ce_aes_invert(void *dst, void *src);
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 const iv[]);
asmlinkage void ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 const iv[]);
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 iv[],
u8 const rk2[], int first);
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 iv[],
u8 const rk2[], int first);
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
struct aes_block {
u8 b[AES_BLOCK_SIZE];
@@ -77,21 +85,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
key_len != AES_KEYSIZE_256)
return -EINVAL;
memcpy(ctx->key_enc, in_key, key_len);
ctx->key_length = key_len;
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
kernel_neon_begin();
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
#ifndef CONFIG_CPU_BIG_ENDIAN
rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
rko[0] = rko[0] ^ rki[0] ^ rcon[i];
#else
rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
#endif
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
@@ -178,15 +182,15 @@ static int ecb_encrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks);
ctx->key_enc, num_rounds(ctx), blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
@@ -198,58 +202,192 @@ static int ecb_decrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks);
ctx->key_dec, num_rounds(ctx), blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_encrypt_walk(struct skcipher_request *req,
struct skcipher_walk *walk)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int blocks;
int err = 0;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
ce_aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_enc, num_rounds(ctx), blocks,
walk->iv);
kernel_neon_end();
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
return cbc_encrypt_walk(req, &walk);
}
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
static int cbc_decrypt_walk(struct skcipher_request *req,
struct skcipher_walk *walk)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int blocks;
int err = 0;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
ce_aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_dec, num_rounds(ctx), blocks,
walk->iv);
kernel_neon_end();
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int blocks;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
return cbc_decrypt_walk(req, &walk);
}
static int cts_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
struct scatterlist *src = req->src, *dst = req->dst;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct skcipher_walk walk;
int err;
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
NULL, NULL);
if (req->cryptlen <= AES_BLOCK_SIZE) {
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
cbc_blocks = 1;
}
if (cbc_blocks > 0) {
skcipher_request_set_crypt(&subreq, req->src, req->dst,
cbc_blocks * AES_BLOCK_SIZE,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false) ?:
cbc_encrypt_walk(&subreq, &walk);
if (err)
return err;
if (req->cryptlen == AES_BLOCK_SIZE)
return 0;
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst,
subreq.cryptlen);
}
/* handle ciphertext stealing */
skcipher_request_set_crypt(&subreq, src, dst,
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_dec, num_rounds(ctx), blocks,
walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
ce_aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, num_rounds(ctx), walk.nbytes,
walk.iv);
kernel_neon_end();
return err;
return skcipher_walk_done(&walk, 0);
}
static int cts_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
struct scatterlist *src = req->src, *dst = req->dst;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct skcipher_walk walk;
int err;
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
NULL, NULL);
if (req->cryptlen <= AES_BLOCK_SIZE) {
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
cbc_blocks = 1;
}
if (cbc_blocks > 0) {
skcipher_request_set_crypt(&subreq, req->src, req->dst,
cbc_blocks * AES_BLOCK_SIZE,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false) ?:
cbc_decrypt_walk(&subreq, &walk);
if (err)
return err;
if (req->cryptlen == AES_BLOCK_SIZE)
return 0;
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst,
subreq.cryptlen);
}
/* handle ciphertext stealing */
skcipher_request_set_crypt(&subreq, src, dst,
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
req->iv);
err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
kernel_neon_begin();
ce_aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, num_rounds(ctx), walk.nbytes,
walk.iv);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
static int ctr_encrypt(struct skcipher_request *req)
@@ -259,13 +397,14 @@ static int ctr_encrypt(struct skcipher_request *req)
struct skcipher_walk walk;
int err, blocks;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, num_rounds(ctx), blocks,
ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
@@ -279,36 +418,109 @@ static int ctr_encrypt(struct skcipher_request *req)
*/
blocks = -1;
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
num_rounds(ctx), blocks, walk.iv);
kernel_neon_begin();
ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx),
blocks, walk.iv);
kernel_neon_end();
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
return err;
}
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
{
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned long flags;
/*
* Temporarily disable interrupts to avoid races where
* cachelines are evicted when the CPU is interrupted
* to do something else.
*/
local_irq_save(flags);
aes_encrypt(ctx, dst, src);
local_irq_restore(flags);
}
static int ctr_encrypt_sync(struct skcipher_request *req)
{
if (!crypto_simd_usable())
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
return ctr_encrypt(req);
}
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct scatterlist *src, *dst;
struct skcipher_walk walk;
unsigned int blocks;
err = skcipher_walk_virt(&walk, req, true);
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
AES_BLOCK_SIZE) - 2;
skcipher_walk_abort(&walk);
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
xts_blocks * AES_BLOCK_SIZE,
req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
} else {
tail = 0;
}
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
int nbytes = walk.nbytes;
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_enc, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
if (err || likely(!tail))
return err;
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
req->iv);
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_enc, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_enc, rounds, walk.nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
return err;
return skcipher_walk_done(&walk, 0);
}
static int xts_decrypt(struct skcipher_request *req)
@@ -316,87 +528,165 @@ static int xts_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct scatterlist sg_src[2], sg_dst[2];
struct skcipher_request subreq;
struct scatterlist *src, *dst;
struct skcipher_walk walk;
unsigned int blocks;
err = skcipher_walk_virt(&walk, req, true);
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
AES_BLOCK_SIZE) - 2;
skcipher_walk_abort(&walk);
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
xts_blocks * AES_BLOCK_SIZE,
req->iv);
req = &subreq;
err = skcipher_walk_virt(&walk, req, false);
} else {
tail = 0;
}
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
int nbytes = walk.nbytes;
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_dec, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
if (err || likely(!tail))
return err;
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
if (req->dst != req->src)
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
req->iv);
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key1.key_dec, rounds, blocks,
walk.iv, (u8 *)ctx->key2.key_enc, first);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_dec, rounds, walk.nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
return err;
return skcipher_walk_done(&walk, 0);
}
static struct skcipher_alg aes_algs[] = { {
.base = {
.cra_name = "__ecb(aes)",
.cra_driver_name = "__ecb-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
.base.cra_name = "__ecb(aes)",
.base.cra_driver_name = "__ecb-aes-ce",
.base.cra_priority = 300,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(aes)",
.cra_driver_name = "__cbc-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
.base.cra_name = "__cbc(aes)",
.base.cra_driver_name = "__cbc-aes-ce",
.base.cra_priority = 300,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
.base.cra_name = "__cts(cbc(aes))",
.base.cra_driver_name = "__cts-cbc-aes-ce",
.base.cra_priority = 300,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.walksize = 2 * AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = cts_cbc_encrypt,
.decrypt = cts_cbc_decrypt,
}, {
.base = {
.cra_name = "__xts(aes)",
.cra_driver_name = "__xts-aes-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
.base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-ce",
.base.cra_priority = 300,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base.cra_name = "ctr(aes)",
.base.cra_driver_name = "ctr-aes-ce-sync",
.base.cra_priority = 300 - 1,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = ce_aes_setkey,
.encrypt = ctr_encrypt_sync,
.decrypt = ctr_encrypt_sync,
}, {
.base.cra_name = "__xts(aes)",
.base.cra_driver_name = "__xts-aes-ce",
.base.cra_priority = 300,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
} };
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
@@ -425,6 +715,9 @@ static int __init aes_init(void)
return err;
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
continue;
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;

View File

@@ -219,43 +219,5 @@ ENDPROC(__aes_arm_encrypt)
.align 5
ENTRY(__aes_arm_decrypt)
do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
ENDPROC(__aes_arm_decrypt)
.section ".rodata", "a"
.align L1_CACHE_SHIFT
.type __aes_arm_inverse_sbox, %object
__aes_arm_inverse_sbox:
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
.size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox

View File

@@ -11,12 +11,9 @@
#include <linux/module.h>
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_encrypt);
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
EXPORT_SYMBOL(__aes_arm_decrypt);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
@@ -24,7 +21,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
int rounds = 6 + ctx->key_length / 4;
@@ -44,8 +41,8 @@ static struct crypto_alg aes_alg = {
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
.cra_cipher.cia_setkey = crypto_aes_set_key,
.cra_cipher.cia_encrypt = aes_encrypt,
.cra_cipher.cia_decrypt = aes_decrypt,
.cra_cipher.cia_encrypt = aes_arm_encrypt,
.cra_cipher.cia_decrypt = aes_arm_decrypt,
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
.cra_alignmask = 3,

View File

@@ -887,19 +887,17 @@ ENDPROC(aesbs_ctr_encrypt)
veor \out, \out, \tmp
.endm
.align 4
.Lxts_mul_x:
.quad 1, 0x87
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
* int blocks, u8 iv[], int reorder_last_tweak)
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
* int blocks, u8 iv[], int reorder_last_tweak)
*/
__xts_prepare8:
vld1.8 {q14}, [r7] // load iv
__ldr q15, .Lxts_mul_x // load tweak mask
vmov.i32 d30, #0x87 // compose tweak mask vector
vmovl.u32 q15, d30
vshr.u64 d30, d31, #7
vmov q12, q14
__adr ip, 0f
@@ -946,17 +944,25 @@ __xts_prepare8:
vld1.8 {q7}, [r1]!
next_tweak q14, q12, q15, q13
veor q7, q7, q12
THUMB( itt le )
W(cmple) r8, #0
ble 1f
0: veor q7, q7, q12
vst1.8 {q12}, [r4, :128]
0: vst1.8 {q14}, [r7] // store next iv
vst1.8 {q14}, [r7] // store next iv
bx lr
1: vswp q12, q14
b 0b
ENDPROC(__xts_prepare8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
push {r4-r8, lr}
mov r5, sp // preserve sp
ldrd r6, r7, [sp, #24] // get blocks and iv args
ldr r8, [sp, #32] // reorder final tweak?
rsb r8, r8, #1
sub ip, sp, #128 // make room for 8x tweak
bic ip, ip, #0xf // align sp to 16 bytes
mov sp, ip

View File

@@ -6,10 +6,13 @@
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/aes.h>
#include <crypto/cbc.h>
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <linux/module.h>
@@ -35,9 +38,9 @@ asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 ctr[], u8 final[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
int rounds, int blocks, u8 iv[], int);
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]);
int rounds, int blocks, u8 iv[], int);
struct aesbs_ctx {
int rounds;
@@ -51,9 +54,15 @@ struct aesbs_cbc_ctx {
struct aesbs_xts_ctx {
struct aesbs_ctx key;
struct crypto_cipher *cts_tfm;
struct crypto_cipher *tweak_tfm;
};
struct aesbs_ctr_ctx {
struct aesbs_ctx key; /* must be first member */
struct crypto_aes_ctx fallback;
};
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -61,7 +70,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
err = aes_expandkey(&rk, in_key, key_len);
if (err)
return err;
@@ -83,9 +92,8 @@ static int __ecb_crypt(struct skcipher_request *req,
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -93,12 +101,13 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
ctx->rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
@@ -120,7 +129,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
struct crypto_aes_ctx rk;
int err;
err = crypto_aes_expand_key(&rk, in_key, key_len);
err = aes_expandkey(&rk, in_key, key_len);
if (err)
return err;
@@ -152,9 +161,8 @@ static int cbc_decrypt(struct skcipher_request *req)
struct skcipher_walk walk;
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -162,13 +170,14 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
kernel_neon_begin();
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
@@ -189,6 +198,25 @@ static void cbc_exit(struct crypto_tfm *tfm)
crypto_free_cipher(ctx->enc_tfm);
}
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = aes_expandkey(&ctx->fallback, in_key, key_len);
if (err)
return err;
ctx->key.rounds = 6 + key_len / 4;
kernel_neon_begin();
aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
kernel_neon_end();
return 0;
}
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -197,9 +225,8 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 buf[AES_BLOCK_SIZE];
int err;
err = skcipher_walk_virt(&walk, req, true);
err = skcipher_walk_virt(&walk, req, false);
kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -210,8 +237,10 @@ static int ctr_encrypt(struct skcipher_request *req)
final = NULL;
}
kernel_neon_begin();
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->rk, ctx->rounds, blocks, walk.iv, final);
kernel_neon_end();
if (final) {
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -226,11 +255,33 @@ static int ctr_encrypt(struct skcipher_request *req)
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
}
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
{
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned long flags;
/*
* Temporarily disable interrupts to avoid races where
* cachelines are evicted when the CPU is interrupted
* to do something else.
*/
local_irq_save(flags);
aes_encrypt(&ctx->fallback, dst, src);
local_irq_restore(flags);
}
static int ctr_encrypt_sync(struct skcipher_request *req)
{
if (!crypto_simd_usable())
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
return ctr_encrypt(req);
}
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -242,6 +293,9 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return err;
key_len /= 2;
err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len);
if (err)
return err;
err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len);
if (err)
return err;
@@ -253,7 +307,13 @@ static int xts_init(struct crypto_tfm *tfm)
{
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(ctx->cts_tfm))
return PTR_ERR(ctx->cts_tfm);
ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(ctx->tweak_tfm))
crypto_free_cipher(ctx->cts_tfm);
return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
}
@@ -263,49 +323,89 @@ static void xts_exit(struct crypto_tfm *tfm)
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
crypto_free_cipher(ctx->tweak_tfm);
crypto_free_cipher(ctx->cts_tfm);
}
static int __xts_crypt(struct skcipher_request *req,
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]))
int rounds, int blocks, u8 iv[], int))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int tail = req->cryptlen % AES_BLOCK_SIZE;
struct skcipher_request subreq;
u8 buf[2 * AES_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
if (req->cryptlen < AES_BLOCK_SIZE)
return -EINVAL;
if (unlikely(tail)) {
skcipher_request_set_tfm(&subreq, tfm);
skcipher_request_set_callback(&subreq,
skcipher_request_flags(req),
NULL, NULL);
skcipher_request_set_crypt(&subreq, req->src, req->dst,
req->cryptlen - tail, req->iv);
req = &subreq;
}
err = skcipher_walk_virt(&walk, req, true);
if (err)
return err;
crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
int reorder_last_tweak = !encrypt && tail > 0;
if (walk.nbytes < walk.total)
if (walk.nbytes < walk.total) {
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
reorder_last_tweak = 0;
}
kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
ctx->key.rounds, blocks, walk.iv);
ctx->key.rounds, blocks, walk.iv, reorder_last_tweak);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
kernel_neon_end();
return err;
if (err || likely(!tail))
return err;
/* handle ciphertext stealing */
scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
AES_BLOCK_SIZE, 0);
memcpy(buf + AES_BLOCK_SIZE, buf, tail);
scatterwalk_map_and_copy(buf, req->src, req->cryptlen, tail, 0);
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
if (encrypt)
crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf);
else
crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf);
crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
AES_BLOCK_SIZE + tail, 1);
return 0;
}
static int xts_encrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_encrypt);
return __xts_crypt(req, true, aesbs_xts_encrypt);
}
static int xts_decrypt(struct skcipher_request *req)
{
return __xts_crypt(req, aesbs_xts_decrypt);
return __xts_crypt(req, false, aesbs_xts_decrypt);
}
static struct skcipher_alg aes_algs[] = { {
@@ -358,6 +458,22 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = aesbs_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base.cra_name = "ctr(aes)",
.base.cra_driver_name = "ctr-aes-neonbs-sync",
.base.cra_priority = 250 - 1,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesbs_ctr_setkey_sync,
.encrypt = ctr_encrypt_sync,
.decrypt = ctr_encrypt_sync,
}, {
.base.cra_name = "__xts(aes)",
.base.cra_driver_name = "__xts-aes-neonbs",

View File

@@ -9,6 +9,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/b128ops.h>
#include <crypto/cryptd.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
@@ -17,7 +18,7 @@
#include <linux/crypto.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("ghash");
@@ -30,6 +31,8 @@ struct ghash_key {
u64 h2[2];
u64 h3[2];
u64 h4[2];
be128 k;
};
struct ghash_desc_ctx {
@@ -62,6 +65,36 @@ static int ghash_init(struct shash_desc *desc)
return 0;
}
static void ghash_do_update(int blocks, u64 dg[], const char *src,
struct ghash_key *key, const char *head)
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
pmull_ghash_update(blocks, dg, src, key, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
do {
const u8 *in = src;
if (head) {
in = head;
blocks++;
head = NULL;
} else {
src += GHASH_BLOCK_SIZE;
}
crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
gf128mul_lle(&dst, &key->k);
} while (--blocks);
dg[0] = be64_to_cpu(dst.b);
dg[1] = be64_to_cpu(dst.a);
}
}
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
@@ -85,10 +118,8 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
kernel_neon_begin();
pmull_ghash_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
kernel_neon_end();
ghash_do_update(blocks, ctx->digest, src, key,
partial ? ctx->buf : NULL);
src += blocks * GHASH_BLOCK_SIZE;
partial = 0;
}
@@ -106,9 +137,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
struct ghash_key *key = crypto_shash_ctx(desc->tfm);
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
kernel_neon_begin();
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
kernel_neon_end();
ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -132,24 +161,25 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
be128 h, k;
be128 h;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(&k, inkey, GHASH_BLOCK_SIZE);
ghash_reflect(key->h, &k);
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
ghash_reflect(key->h, &key->k);
h = k;
gf128mul_lle(&h, &k);
h = key->k;
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h2, &h);
gf128mul_lle(&h, &k);
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h3, &h);
gf128mul_lle(&h, &k);
gf128mul_lle(&h, &key->k);
ghash_reflect(key->h4, &h);
return 0;
@@ -162,15 +192,13 @@ static struct shash_alg ghash_alg = {
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "__ghash",
.cra_driver_name = "__driver-ghash-ce",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_key),
.cra_module = THIS_MODULE,
},
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-ce-sync",
.base.cra_priority = 300 - 1,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct ghash_key),
.base.cra_module = THIS_MODULE,
};
static int ghash_async_init(struct ahash_request *req)
@@ -285,9 +313,7 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
cryptd_tfm = cryptd_alloc_ahash("ghash-ce-sync", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;

View File

@@ -39,7 +39,7 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
}
EXPORT_SYMBOL(crypto_sha256_arm_update);
static int sha256_final(struct shash_desc *desc, u8 *out)
static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
{
sha256_base_do_finalize(desc,
(sha256_block_fn *)sha256_block_data_order);
@@ -51,7 +51,7 @@ int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
{
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
return sha256_final(desc, out);
return crypto_sha256_arm_final(desc, out);
}
EXPORT_SYMBOL(crypto_sha256_arm_finup);
@@ -59,7 +59,7 @@ static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = crypto_sha256_arm_update,
.final = sha256_final,
.final = crypto_sha256_arm_final,
.finup = crypto_sha256_arm_finup,
.descsize = sizeof(struct sha256_state),
.base = {
@@ -73,7 +73,7 @@ static struct shash_alg algs[] = { {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = crypto_sha256_arm_update,
.final = sha256_final,
.final = crypto_sha256_arm_final,
.finup = crypto_sha256_arm_finup,
.descsize = sizeof(struct sha256_state),
.base = {

View File

@@ -25,8 +25,8 @@
asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
unsigned int num_blks);
static int sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
@@ -42,8 +42,8 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!crypto_simd_usable())
return crypto_sha256_arm_finup(desc, data, len, out);
@@ -59,17 +59,17 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
return sha256_base_finish(desc, out);
}
static int sha256_final(struct shash_desc *desc, u8 *out)
static int crypto_sha256_neon_final(struct shash_desc *desc, u8 *out)
{
return sha256_finup(desc, NULL, 0, out);
return crypto_sha256_neon_finup(desc, NULL, 0, out);
}
struct shash_alg sha256_neon_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_update,
.final = sha256_final,
.finup = sha256_finup,
.update = crypto_sha256_neon_update,
.final = crypto_sha256_neon_final,
.finup = crypto_sha256_neon_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
@@ -81,9 +81,9 @@ struct shash_alg sha256_neon_algs[] = { {
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_update,
.final = sha256_final,
.finup = sha256_finup,
.update = crypto_sha256_neon_update,
.final = crypto_sha256_neon_final,
.finup = crypto_sha256_neon_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",

View File

@@ -17,7 +17,6 @@ generic-y += parport.h
generic-y += preempt.h
generic-y += seccomp.h
generic-y += serial.h
generic-y += simd.h
generic-y += trace_clock.h
generated-y += mach-types.h