Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Add the ability to abort a skcipher walk. Algorithms: - Fix XTS to actually do the stealing. - Add library helpers for AES and DES for single-block users. - Add library helpers for SHA256. - Add new DES key verification helper. - Add surrounding bits for ESSIV generator. - Add accelerations for aegis128. - Add test vectors for lzo-rle. Drivers: - Add i.MX8MQ support to caam. - Add gcm/ccm/cfb/ofb aes support in inside-secure. - Add ofb/cfb aes support in media-tek. - Add HiSilicon ZIP accelerator support. Others: - Fix potential race condition in padata. - Use unbound workqueues in padata" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits) crypto: caam - Cast to long first before pointer conversion crypto: ccree - enable CTS support in AES-XTS crypto: inside-secure - Probe transform record cache RAM sizes crypto: inside-secure - Base RD fetchcount on actual RD FIFO size crypto: inside-secure - Base CD fetchcount on actual CD FIFO size crypto: inside-secure - Enable extended algorithms on newer HW crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL crypto: inside-secure - Add EIP97/EIP197 and endianness detection padata: remove cpu_index from the parallel_queue padata: unbind parallel jobs from specific CPUs padata: use separate workqueues for parallel and serial work padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible crypto: pcrypt - remove padata cpumask notifier padata: make padata_do_parallel find alternate callback CPU workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs workqueue: unconfine alloc/apply/free_workqueue_attrs() padata: allocate workqueue internally arm64: dts: imx8mq: Add CAAM node random: Use wait_event_freezable() in add_hwgenerator_randomness() crypto: ux500 - Fix COMPILE_TEST warnings ...
This commit is contained in:
@@ -14,11 +14,9 @@ sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
|
||||
@@ -38,14 +36,6 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
|
||||
obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
|
||||
@@ -64,15 +54,11 @@ endif
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
|
||||
endif
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_glue.o
|
||||
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
|
||||
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
|
||||
|
||||
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
|
||||
des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
|
||||
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
|
||||
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
|
||||
@@ -82,11 +68,6 @@ chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o
|
||||
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
|
||||
|
||||
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
|
||||
aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
|
||||
aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
|
||||
|
||||
morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
|
||||
morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
|
||||
|
||||
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
|
||||
|
||||
@@ -106,8 +87,6 @@ ifeq ($(avx2_supported),yes)
|
||||
chacha-x86_64-y += chacha-avx2-x86_64.o
|
||||
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
|
||||
|
||||
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
|
||||
|
||||
nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
|
||||
endif
|
||||
|
||||
|
@@ -1,823 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* AES-NI + SSE2 implementation of AEGIS-128L
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define STATE5 %xmm5
|
||||
#define STATE6 %xmm6
|
||||
#define STATE7 %xmm7
|
||||
#define MSG0 %xmm8
|
||||
#define MSG1 %xmm9
|
||||
#define T0 %xmm10
|
||||
#define T1 %xmm11
|
||||
#define T2 %xmm12
|
||||
#define T3 %xmm13
|
||||
|
||||
#define STATEP %rdi
|
||||
#define LEN %rsi
|
||||
#define SRC %rdx
|
||||
#define DST %rcx
|
||||
|
||||
.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Laegis128l_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Laegis128l_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Laegis128l_counter0:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.Laegis128l_counter1:
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* SRC - src
|
||||
* output:
|
||||
* MSG0 - first message block
|
||||
* MSG1 - second message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9d, %r9d
|
||||
pxor MSG0, MSG0
|
||||
pxor MSG1, MSG1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1E, %r8
|
||||
add SRC, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov LEN, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1C, %r8
|
||||
add SRC, %r8
|
||||
shl $0x10, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov LEN, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x18, %r8
|
||||
add SRC, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG0
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
add SRC, %r8
|
||||
pslldq $8, MSG0
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG0
|
||||
|
||||
.Lld_partial_8:
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
movdqa MSG0, MSG1
|
||||
movdqu (SRC), MSG0
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* DST - dst
|
||||
* output:
|
||||
* T0 - first message block
|
||||
* T1 - second message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov LEN, %r8
|
||||
mov DST, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0, (%r9)
|
||||
movdqa T1, T0
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $0x10, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
.macro update
|
||||
movdqa STATE7, T0
|
||||
aesenc STATE0, STATE7
|
||||
aesenc STATE1, STATE0
|
||||
aesenc STATE2, STATE1
|
||||
aesenc STATE3, STATE2
|
||||
aesenc STATE4, STATE3
|
||||
aesenc STATE5, STATE4
|
||||
aesenc STATE6, STATE5
|
||||
aesenc T0, STATE6
|
||||
.endm
|
||||
|
||||
.macro update0
|
||||
update
|
||||
pxor MSG0, STATE7
|
||||
pxor MSG1, STATE3
|
||||
.endm
|
||||
|
||||
.macro update1
|
||||
update
|
||||
pxor MSG0, STATE6
|
||||
pxor MSG1, STATE2
|
||||
.endm
|
||||
|
||||
.macro update2
|
||||
update
|
||||
pxor MSG0, STATE5
|
||||
pxor MSG1, STATE1
|
||||
.endm
|
||||
|
||||
.macro update3
|
||||
update
|
||||
pxor MSG0, STATE4
|
||||
pxor MSG1, STATE0
|
||||
.endm
|
||||
|
||||
.macro update4
|
||||
update
|
||||
pxor MSG0, STATE3
|
||||
pxor MSG1, STATE7
|
||||
.endm
|
||||
|
||||
.macro update5
|
||||
update
|
||||
pxor MSG0, STATE2
|
||||
pxor MSG1, STATE6
|
||||
.endm
|
||||
|
||||
.macro update6
|
||||
update
|
||||
pxor MSG0, STATE1
|
||||
pxor MSG1, STATE5
|
||||
.endm
|
||||
|
||||
.macro update7
|
||||
update
|
||||
pxor MSG0, STATE0
|
||||
pxor MSG1, STATE4
|
||||
.endm
|
||||
|
||||
.macro state_load
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
movdqu 0x50(STATEP), STATE5
|
||||
movdqu 0x60(STATEP), STATE6
|
||||
movdqu 0x70(STATEP), STATE7
|
||||
.endm
|
||||
|
||||
.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
|
||||
movdqu \s7, 0x00(STATEP)
|
||||
movdqu \s0, 0x10(STATEP)
|
||||
movdqu \s1, 0x20(STATEP)
|
||||
movdqu \s2, 0x30(STATEP)
|
||||
movdqu \s3, 0x40(STATEP)
|
||||
movdqu \s4, 0x50(STATEP)
|
||||
movdqu \s5, 0x60(STATEP)
|
||||
movdqu \s6, 0x70(STATEP)
|
||||
.endm
|
||||
|
||||
.macro state_store0
|
||||
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
||||
.endm
|
||||
|
||||
.macro state_store1
|
||||
state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
||||
.endm
|
||||
|
||||
.macro state_store2
|
||||
state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store3
|
||||
state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro state_store4
|
||||
state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro state_store5
|
||||
state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro state_store6
|
||||
state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro state_store7
|
||||
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load key: */
|
||||
movdqa (%rsi), MSG1
|
||||
movdqa MSG1, STATE0
|
||||
movdqa MSG1, STATE4
|
||||
movdqa MSG1, STATE5
|
||||
movdqa MSG1, STATE6
|
||||
movdqa MSG1, STATE7
|
||||
|
||||
/* load IV: */
|
||||
movdqu (%rdx), MSG0
|
||||
pxor MSG0, STATE0
|
||||
pxor MSG0, STATE4
|
||||
|
||||
/* load the constants: */
|
||||
movdqa .Laegis128l_const_0, STATE2
|
||||
movdqa .Laegis128l_const_1, STATE1
|
||||
movdqa STATE1, STATE3
|
||||
pxor STATE2, STATE5
|
||||
pxor STATE1, STATE6
|
||||
pxor STATE2, STATE7
|
||||
|
||||
/* update 10 times with IV and KEY: */
|
||||
update0
|
||||
update1
|
||||
update2
|
||||
update3
|
||||
update4
|
||||
update5
|
||||
update6
|
||||
update7
|
||||
update0
|
||||
update1
|
||||
|
||||
state_store1
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_init)
|
||||
|
||||
.macro ad_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
update\i
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Lad_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
|
||||
* const void *data);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Lad_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 8
|
||||
.Lad_a_loop:
|
||||
ad_block a 0
|
||||
ad_block a 1
|
||||
ad_block a 2
|
||||
ad_block a 3
|
||||
ad_block a 4
|
||||
ad_block a 5
|
||||
ad_block a 6
|
||||
ad_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
jmp .Lad_a_loop
|
||||
|
||||
.align 8
|
||||
.Lad_u_loop:
|
||||
ad_block u 0
|
||||
ad_block u 1
|
||||
ad_block u 2
|
||||
ad_block u 3
|
||||
ad_block u 4
|
||||
ad_block u 5
|
||||
ad_block u 6
|
||||
ad_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
jmp .Lad_u_loop
|
||||
|
||||
.Lad_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_ad)
|
||||
|
||||
.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
|
||||
pxor \s1, \m0
|
||||
pxor \s6, \m0
|
||||
movdqa \s2, T3
|
||||
pand \s3, T3
|
||||
pxor T3, \m0
|
||||
|
||||
pxor \s2, \m1
|
||||
pxor \s5, \m1
|
||||
movdqa \s6, T3
|
||||
pand \s7, T3
|
||||
pxor T3, \m1
|
||||
.endm
|
||||
|
||||
.macro crypt0 m0 m1
|
||||
crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
|
||||
.endm
|
||||
|
||||
.macro crypt1 m0 m1
|
||||
crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
|
||||
.endm
|
||||
|
||||
.macro crypt2 m0 m1
|
||||
crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro crypt3 m0 m1
|
||||
crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro crypt4 m0 m1
|
||||
crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro crypt5 m0 m1
|
||||
crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro crypt6 m0 m1
|
||||
crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro crypt7 m0 m1
|
||||
crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
|
||||
.endm
|
||||
|
||||
.macro encrypt_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
crypt\i T0, T1
|
||||
movdq\a T0, (\i * 0x20 + 0x00)(DST)
|
||||
movdq\a T1, (\i * 0x20 + 0x10)(DST)
|
||||
|
||||
update\i
|
||||
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Lenc_out_\i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block a i
|
||||
movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
|
||||
movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
|
||||
crypt\i MSG0, MSG1
|
||||
movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
|
||||
movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
|
||||
|
||||
update\i
|
||||
|
||||
sub $0x20, LEN
|
||||
cmp $0x20, LEN
|
||||
jl .Ldec_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Lenc_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_a_loop:
|
||||
encrypt_block a 0
|
||||
encrypt_block a 1
|
||||
encrypt_block a 2
|
||||
encrypt_block a 3
|
||||
encrypt_block a 4
|
||||
encrypt_block a 5
|
||||
encrypt_block a 6
|
||||
encrypt_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Lenc_a_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_u_loop:
|
||||
encrypt_block u 0
|
||||
encrypt_block u 1
|
||||
encrypt_block u 2
|
||||
encrypt_block u 3
|
||||
encrypt_block u 4
|
||||
encrypt_block u 5
|
||||
encrypt_block u 6
|
||||
encrypt_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Lenc_u_loop
|
||||
|
||||
.Lenc_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_enc)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
crypt0 T0, T1
|
||||
|
||||
call __store_partial
|
||||
|
||||
update0
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x20, LEN
|
||||
jb .Ldec_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_a_loop:
|
||||
decrypt_block a 0
|
||||
decrypt_block a 1
|
||||
decrypt_block a 2
|
||||
decrypt_block a 3
|
||||
decrypt_block a 4
|
||||
decrypt_block a 5
|
||||
decrypt_block a 6
|
||||
decrypt_block a 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Ldec_a_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_u_loop:
|
||||
decrypt_block u 0
|
||||
decrypt_block u 1
|
||||
decrypt_block u 2
|
||||
decrypt_block u 3
|
||||
decrypt_block u 4
|
||||
decrypt_block u 5
|
||||
decrypt_block u 6
|
||||
decrypt_block u 7
|
||||
|
||||
add $0x100, SRC
|
||||
add $0x100, DST
|
||||
jmp .Ldec_u_loop
|
||||
|
||||
.Ldec_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_6:
|
||||
state_store6
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_7:
|
||||
state_store7
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_dec)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
crypt0 MSG0, MSG1
|
||||
|
||||
movdqa MSG0, T0
|
||||
movdqa MSG1, T1
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq LEN, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa T0, T1
|
||||
movdqa .Laegis128l_counter0, T2
|
||||
movdqa .Laegis128l_counter1, T3
|
||||
pcmpgtb T2, T0
|
||||
pcmpgtb T3, T1
|
||||
pand T0, MSG0
|
||||
pand T1, MSG1
|
||||
|
||||
update0
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_aegis128l_aesni_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG0
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG0
|
||||
psllq $3, MSG0 /* multiply by 8 (to get bit count) */
|
||||
|
||||
pxor STATE2, MSG0
|
||||
movdqa MSG0, MSG1
|
||||
|
||||
/* update state: */
|
||||
update0
|
||||
update1
|
||||
update2
|
||||
update3
|
||||
update4
|
||||
update5
|
||||
update6
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), T0
|
||||
|
||||
pxor STATE1, T0
|
||||
pxor STATE2, T0
|
||||
pxor STATE3, T0
|
||||
pxor STATE4, T0
|
||||
pxor STATE5, T0
|
||||
pxor STATE6, T0
|
||||
pxor STATE7, T0
|
||||
|
||||
movdqu T0, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis128l_aesni_final)
|
@@ -1,293 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The AEGIS-128L Authenticated-Encryption Algorithm
|
||||
* Glue for AES-NI + SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define AEGIS128L_BLOCK_ALIGN 16
|
||||
#define AEGIS128L_BLOCK_SIZE 32
|
||||
#define AEGIS128L_NONCE_SIZE 16
|
||||
#define AEGIS128L_STATE_BLOCKS 8
|
||||
#define AEGIS128L_KEY_SIZE 16
|
||||
#define AEGIS128L_MIN_AUTH_SIZE 8
|
||||
#define AEGIS128L_MAX_AUTH_SIZE 16
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_ad(
|
||||
void *state, unsigned int length, const void *data);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_enc(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_dec(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_enc_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_dec_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis128l_aesni_final(
|
||||
void *state, void *tag_xor, unsigned int cryptlen,
|
||||
unsigned int assoclen);
|
||||
|
||||
struct aegis_block {
|
||||
u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
struct aegis_block key;
|
||||
};
|
||||
|
||||
struct aegis_crypt_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
void (*crypt_tail)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
};
|
||||
|
||||
static void crypto_aegis128l_aesni_process_ad(
|
||||
struct aegis_state *state, struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS128L_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis128l_aesni_ad(state,
|
||||
AEGIS128L_BLOCK_SIZE,
|
||||
buf.bytes);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis128l_aesni_ad(state, left, src);
|
||||
|
||||
src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
|
||||
left &= AEGIS128L_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
|
||||
crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_process_crypt(
|
||||
struct aegis_state *state, struct skcipher_walk *walk,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) {
|
||||
ops->crypt_blocks(state, round_down(walk->nbytes,
|
||||
AEGIS128L_BLOCK_SIZE),
|
||||
walk->src.virt.addr, walk->dst.virt.addr);
|
||||
skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk->nbytes) {
|
||||
ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
|
||||
walk->dst.virt.addr);
|
||||
skcipher_walk_done(walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
|
||||
{
|
||||
u8 *ctx = crypto_aead_ctx(aead);
|
||||
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
|
||||
return (void *)ctx;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS128L_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS128L_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS128L_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
|
||||
struct aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
struct aegis_state state;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, true);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
|
||||
crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis128l_aesni_process_crypt(&state, &walk, ops);
|
||||
crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = crypto_aegis128l_aesni_enc,
|
||||
.crypt_tail = crypto_aegis128l_aesni_enc_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_block zeros = {};
|
||||
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = crypto_aegis128l_aesni_dec,
|
||||
.crypt_tail = crypto_aegis128l_aesni_dec_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis128l_aesni_alg = {
|
||||
.setkey = crypto_aegis128l_aesni_setkey,
|
||||
.setauthsize = crypto_aegis128l_aesni_setauthsize,
|
||||
.encrypt = crypto_aegis128l_aesni_encrypt,
|
||||
.decrypt = crypto_aegis128l_aesni_decrypt,
|
||||
.init = crypto_aegis128l_aesni_init_tfm,
|
||||
.exit = crypto_aegis128l_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS128L_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS128L_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx) +
|
||||
__alignof__(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_priority = 400,
|
||||
|
||||
.cra_name = "__aegis128l",
|
||||
.cra_driver_name = "__aegis128l-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_aead_alg *simd_alg;
|
||||
|
||||
static int __init crypto_aegis128l_aesni_module_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1,
|
||||
&simd_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis128l_aesni_module_exit(void)
|
||||
{
|
||||
simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_aegis128l_aesni_module_init);
|
||||
module_exit(crypto_aegis128l_aesni_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l");
|
||||
MODULE_ALIAS_CRYPTO("aegis128l-aesni");
|
@@ -1,700 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* AES-NI + SSE2 implementation of AEGIS-128L
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define STATE5 %xmm5
|
||||
#define MSG %xmm6
|
||||
#define T0 %xmm7
|
||||
#define T1 %xmm8
|
||||
#define T2 %xmm9
|
||||
#define T3 %xmm10
|
||||
|
||||
#define STATEP %rdi
|
||||
#define LEN %rsi
|
||||
#define SRC %rdx
|
||||
#define DST %rcx
|
||||
|
||||
.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Laegis256_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Laegis256_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Laegis256_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* SRC - src
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9d, %r9d
|
||||
pxor MSG, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1E, %r8
|
||||
add SRC, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov LEN, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x1C, %r8
|
||||
add SRC, %r8
|
||||
shl $0x10, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov LEN, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x18, %r8
|
||||
add SRC, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov LEN, %r8
|
||||
and $0x10, %r8
|
||||
add SRC, %r8
|
||||
pslldq $8, MSG
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG
|
||||
|
||||
.Lld_partial_8:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* LEN - bytes
|
||||
* DST - dst
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov LEN, %r8
|
||||
mov DST, %r9
|
||||
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $0x10, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
.macro update
|
||||
movdqa STATE5, T0
|
||||
aesenc STATE0, STATE5
|
||||
aesenc STATE1, STATE0
|
||||
aesenc STATE2, STATE1
|
||||
aesenc STATE3, STATE2
|
||||
aesenc STATE4, STATE3
|
||||
aesenc T0, STATE4
|
||||
.endm
|
||||
|
||||
.macro update0 m
|
||||
update
|
||||
pxor \m, STATE5
|
||||
.endm
|
||||
|
||||
.macro update1 m
|
||||
update
|
||||
pxor \m, STATE4
|
||||
.endm
|
||||
|
||||
.macro update2 m
|
||||
update
|
||||
pxor \m, STATE3
|
||||
.endm
|
||||
|
||||
.macro update3 m
|
||||
update
|
||||
pxor \m, STATE2
|
||||
.endm
|
||||
|
||||
.macro update4 m
|
||||
update
|
||||
pxor \m, STATE1
|
||||
.endm
|
||||
|
||||
.macro update5 m
|
||||
update
|
||||
pxor \m, STATE0
|
||||
.endm
|
||||
|
||||
.macro state_load
|
||||
movdqu 0x00(STATEP), STATE0
|
||||
movdqu 0x10(STATEP), STATE1
|
||||
movdqu 0x20(STATEP), STATE2
|
||||
movdqu 0x30(STATEP), STATE3
|
||||
movdqu 0x40(STATEP), STATE4
|
||||
movdqu 0x50(STATEP), STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store s0 s1 s2 s3 s4 s5
|
||||
movdqu \s5, 0x00(STATEP)
|
||||
movdqu \s0, 0x10(STATEP)
|
||||
movdqu \s1, 0x20(STATEP)
|
||||
movdqu \s2, 0x30(STATEP)
|
||||
movdqu \s3, 0x40(STATEP)
|
||||
movdqu \s4, 0x50(STATEP)
|
||||
.endm
|
||||
|
||||
.macro state_store0
|
||||
state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro state_store1
|
||||
state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro state_store2
|
||||
state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro state_store3
|
||||
state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro state_store4
|
||||
state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro state_store5
|
||||
state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load key: */
|
||||
movdqa 0x00(%rsi), MSG
|
||||
movdqa 0x10(%rsi), T1
|
||||
movdqa MSG, STATE4
|
||||
movdqa T1, STATE5
|
||||
|
||||
/* load IV: */
|
||||
movdqu 0x00(%rdx), T2
|
||||
movdqu 0x10(%rdx), T3
|
||||
pxor MSG, T2
|
||||
pxor T1, T3
|
||||
movdqa T2, STATE0
|
||||
movdqa T3, STATE1
|
||||
|
||||
/* load the constants: */
|
||||
movdqa .Laegis256_const_0, STATE3
|
||||
movdqa .Laegis256_const_1, STATE2
|
||||
pxor STATE3, STATE4
|
||||
pxor STATE2, STATE5
|
||||
|
||||
/* update 10 times with IV and KEY: */
|
||||
update0 MSG
|
||||
update1 T1
|
||||
update2 T2
|
||||
update3 T3
|
||||
update4 MSG
|
||||
update5 T1
|
||||
update0 T2
|
||||
update1 T3
|
||||
update2 MSG
|
||||
update3 T1
|
||||
update4 T2
|
||||
update5 T3
|
||||
update0 MSG
|
||||
update1 T1
|
||||
update2 T2
|
||||
update3 T3
|
||||
|
||||
state_store3
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_init)
|
||||
|
||||
.macro ad_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
update\i MSG
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lad_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_ad(void *state, unsigned int length,
|
||||
* const void *data);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lad_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 8
|
||||
.Lad_a_loop:
|
||||
ad_block a 0
|
||||
ad_block a 1
|
||||
ad_block a 2
|
||||
ad_block a 3
|
||||
ad_block a 4
|
||||
ad_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
jmp .Lad_a_loop
|
||||
|
||||
.align 8
|
||||
.Lad_u_loop:
|
||||
ad_block u 0
|
||||
ad_block u 1
|
||||
ad_block u 2
|
||||
ad_block u 3
|
||||
ad_block u 4
|
||||
ad_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
jmp .Lad_u_loop
|
||||
|
||||
.Lad_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_ad)
|
||||
|
||||
.macro crypt m s0 s1 s2 s3 s4 s5
|
||||
pxor \s1, \m
|
||||
pxor \s4, \m
|
||||
pxor \s5, \m
|
||||
movdqa \s2, T3
|
||||
pand \s3, T3
|
||||
pxor T3, \m
|
||||
.endm
|
||||
|
||||
.macro crypt0 m
|
||||
crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
|
||||
.endm
|
||||
|
||||
.macro crypt1 m
|
||||
crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
|
||||
.endm
|
||||
|
||||
.macro crypt2 m
|
||||
crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
|
||||
.endm
|
||||
|
||||
.macro crypt3 m
|
||||
crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
|
||||
.endm
|
||||
|
||||
.macro crypt4 m
|
||||
crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
|
||||
.endm
|
||||
|
||||
.macro crypt5 m
|
||||
crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
|
||||
.endm
|
||||
|
||||
.macro encrypt_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
movdqa MSG, T0
|
||||
crypt\i T0
|
||||
movdq\a T0, (\i * 0x10)(DST)
|
||||
|
||||
update\i MSG
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Lenc_out_\i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block a i
|
||||
movdq\a (\i * 0x10)(SRC), MSG
|
||||
crypt\i MSG
|
||||
movdq\a MSG, (\i * 0x10)(DST)
|
||||
|
||||
update\i MSG
|
||||
|
||||
sub $0x10, LEN
|
||||
cmp $0x10, LEN
|
||||
jl .Ldec_out_\i
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_enc(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lenc_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xf, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_a_loop:
|
||||
encrypt_block a 0
|
||||
encrypt_block a 1
|
||||
encrypt_block a 2
|
||||
encrypt_block a 3
|
||||
encrypt_block a 4
|
||||
encrypt_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Lenc_a_loop
|
||||
|
||||
.align 8
|
||||
.Lenc_u_loop:
|
||||
encrypt_block u 0
|
||||
encrypt_block u 1
|
||||
encrypt_block u 2
|
||||
encrypt_block u 3
|
||||
encrypt_block u 4
|
||||
encrypt_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Lenc_u_loop
|
||||
|
||||
.Lenc_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_enc)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG, T0
|
||||
crypt0 T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
update0 MSG
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_dec(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Ldec_out
|
||||
|
||||
state_load
|
||||
|
||||
mov SRC, %r8
|
||||
or DST, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_a_loop:
|
||||
decrypt_block a 0
|
||||
decrypt_block a 1
|
||||
decrypt_block a 2
|
||||
decrypt_block a 3
|
||||
decrypt_block a 4
|
||||
decrypt_block a 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Ldec_a_loop
|
||||
|
||||
.align 8
|
||||
.Ldec_u_loop:
|
||||
decrypt_block u 0
|
||||
decrypt_block u 1
|
||||
decrypt_block u 2
|
||||
decrypt_block u 3
|
||||
decrypt_block u 4
|
||||
decrypt_block u 5
|
||||
|
||||
add $0x60, SRC
|
||||
add $0x60, DST
|
||||
jmp .Ldec_u_loop
|
||||
|
||||
.Ldec_out_0:
|
||||
state_store0
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_1:
|
||||
state_store1
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_2:
|
||||
state_store2
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_3:
|
||||
state_store3
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_4:
|
||||
state_store4
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out_5:
|
||||
state_store5
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_dec)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
|
||||
* const void *src, void *dst);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
crypt0 MSG
|
||||
|
||||
movdqa MSG, T0
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq LEN, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa .Laegis256_counter, T1
|
||||
pcmpgtb T1, T0
|
||||
pand T0, MSG
|
||||
|
||||
update0 MSG
|
||||
|
||||
state_store0
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_aegis256_aesni_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_aegis256_aesni_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
state_load
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG
|
||||
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
pxor STATE3, MSG
|
||||
|
||||
/* update state: */
|
||||
update0 MSG
|
||||
update1 MSG
|
||||
update2 MSG
|
||||
update3 MSG
|
||||
update4 MSG
|
||||
update5 MSG
|
||||
update0 MSG
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), MSG
|
||||
|
||||
pxor STATE0, MSG
|
||||
pxor STATE1, MSG
|
||||
pxor STATE2, MSG
|
||||
pxor STATE3, MSG
|
||||
pxor STATE4, MSG
|
||||
pxor STATE5, MSG
|
||||
|
||||
movdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_aegis256_aesni_final)
|
@@ -1,293 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The AEGIS-256 Authenticated-Encryption Algorithm
|
||||
* Glue for AES-NI + SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
#define AEGIS256_BLOCK_ALIGN 16
|
||||
#define AEGIS256_BLOCK_SIZE 16
|
||||
#define AEGIS256_NONCE_SIZE 32
|
||||
#define AEGIS256_STATE_BLOCKS 6
|
||||
#define AEGIS256_KEY_SIZE 32
|
||||
#define AEGIS256_MIN_AUTH_SIZE 8
|
||||
#define AEGIS256_MAX_AUTH_SIZE 16
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_ad(
|
||||
void *state, unsigned int length, const void *data);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_enc(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_dec(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_enc_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_dec_tail(
|
||||
void *state, unsigned int length, const void *src, void *dst);
|
||||
|
||||
asmlinkage void crypto_aegis256_aesni_final(
|
||||
void *state, void *tag_xor, unsigned int cryptlen,
|
||||
unsigned int assoclen);
|
||||
|
||||
struct aegis_block {
|
||||
u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
|
||||
};
|
||||
|
||||
struct aegis_state {
|
||||
struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct aegis_ctx {
|
||||
struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct aegis_crypt_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
void (*crypt_tail)(void *state, unsigned int length, const void *src,
|
||||
void *dst);
|
||||
};
|
||||
|
||||
static void crypto_aegis256_aesni_process_ad(
|
||||
struct aegis_state *state, struct scatterlist *sg_src,
|
||||
unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct aegis_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= AEGIS256_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
crypto_aegis256_aesni_ad(state,
|
||||
AEGIS256_BLOCK_SIZE,
|
||||
buf.bytes);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
crypto_aegis256_aesni_ad(state, left, src);
|
||||
|
||||
src += left & ~(AEGIS256_BLOCK_SIZE - 1);
|
||||
left &= AEGIS256_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
|
||||
crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_process_crypt(
|
||||
struct aegis_state *state, struct skcipher_walk *walk,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
while (walk->nbytes >= AEGIS256_BLOCK_SIZE) {
|
||||
ops->crypt_blocks(state,
|
||||
round_down(walk->nbytes, AEGIS256_BLOCK_SIZE),
|
||||
walk->src.virt.addr, walk->dst.virt.addr);
|
||||
skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk->nbytes) {
|
||||
ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
|
||||
walk->dst.virt.addr);
|
||||
skcipher_walk_done(walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
|
||||
{
|
||||
u8 *ctx = crypto_aead_ctx(aead);
|
||||
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
|
||||
return (void *)ctx;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
|
||||
|
||||
if (keylen != AEGIS256_KEY_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
if (authsize > AEGIS256_MAX_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
if (authsize < AEGIS256_MIN_AUTH_SIZE)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_crypt(struct aead_request *req,
|
||||
struct aegis_block *tag_xor,
|
||||
unsigned int cryptlen,
|
||||
const struct aegis_crypt_ops *ops)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
struct aegis_state state;
|
||||
|
||||
ops->skcipher_walk_init(&walk, req, true);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
|
||||
crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
|
||||
crypto_aegis256_aesni_process_crypt(&state, &walk, ops);
|
||||
crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = crypto_aegis256_aesni_enc,
|
||||
.crypt_tail = crypto_aegis256_aesni_enc_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const struct aegis_block zeros = {};
|
||||
|
||||
static const struct aegis_crypt_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = crypto_aegis256_aesni_dec,
|
||||
.crypt_tail = crypto_aegis256_aesni_dec_tail,
|
||||
};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aegis_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
|
||||
static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
|
||||
{
|
||||
}
|
||||
|
||||
static struct aead_alg crypto_aegis256_aesni_alg = {
|
||||
.setkey = crypto_aegis256_aesni_setkey,
|
||||
.setauthsize = crypto_aegis256_aesni_setauthsize,
|
||||
.encrypt = crypto_aegis256_aesni_encrypt,
|
||||
.decrypt = crypto_aegis256_aesni_decrypt,
|
||||
.init = crypto_aegis256_aesni_init_tfm,
|
||||
.exit = crypto_aegis256_aesni_exit_tfm,
|
||||
|
||||
.ivsize = AEGIS256_NONCE_SIZE,
|
||||
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
|
||||
.chunksize = AEGIS256_BLOCK_SIZE,
|
||||
|
||||
.base = {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aegis_ctx) +
|
||||
__alignof__(struct aegis_ctx),
|
||||
.cra_alignmask = 0,
|
||||
.cra_priority = 400,
|
||||
|
||||
.cra_name = "__aegis256",
|
||||
.cra_driver_name = "__aegis256-aesni",
|
||||
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static struct simd_aead_alg *simd_alg;
|
||||
|
||||
static int __init crypto_aegis256_aesni_module_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
|
||||
!boot_cpu_has(X86_FEATURE_AES) ||
|
||||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1,
|
||||
&simd_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_aegis256_aesni_module_exit(void)
|
||||
{
|
||||
simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_aegis256_aesni_module_init);
|
||||
module_exit(crypto_aegis256_aesni_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("aegis256");
|
||||
MODULE_ALIAS_CRYPTO("aegis256-aesni");
|
@@ -1,362 +0,0 @@
|
||||
// -------------------------------------------------------------------------
|
||||
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
|
||||
// All rights reserved.
|
||||
//
|
||||
// LICENSE TERMS
|
||||
//
|
||||
// The free distribution and use of this software in both source and binary
|
||||
// form is allowed (with or without changes) provided that:
|
||||
//
|
||||
// 1. distributions of this source code include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer//
|
||||
//
|
||||
// 2. distributions in binary form include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other associated materials//
|
||||
//
|
||||
// 3. the copyright holder's name is not used to endorse products
|
||||
// built using this software without specific written permission.
|
||||
//
|
||||
//
|
||||
// ALTERNATIVELY, provided that this notice is retained in full, this product
|
||||
// may be distributed under the terms of the GNU General Public License (GPL),
|
||||
// in which case the provisions of the GPL apply INSTEAD OF those given above.
|
||||
//
|
||||
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
|
||||
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
|
||||
|
||||
// DISCLAIMER
|
||||
//
|
||||
// This software is provided 'as is' with no explicit or implied warranties
|
||||
// in respect of its properties including, but not limited to, correctness
|
||||
// and fitness for purpose.
|
||||
// -------------------------------------------------------------------------
|
||||
// Issue Date: 29/07/2002
|
||||
|
||||
.file "aes-i586-asm.S"
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
|
||||
|
||||
/* offsets to parameters with one register pushed onto stack */
|
||||
#define ctx 8
|
||||
#define out_blk 12
|
||||
#define in_blk 16
|
||||
|
||||
/* offsets in crypto_aes_ctx structure */
|
||||
#define klen (480)
|
||||
#define ekey (0)
|
||||
#define dkey (240)
|
||||
|
||||
// register mapping for encrypt and decrypt subroutines
|
||||
|
||||
#define r0 eax
|
||||
#define r1 ebx
|
||||
#define r2 ecx
|
||||
#define r3 edx
|
||||
#define r4 esi
|
||||
#define r5 edi
|
||||
|
||||
#define eaxl al
|
||||
#define eaxh ah
|
||||
#define ebxl bl
|
||||
#define ebxh bh
|
||||
#define ecxl cl
|
||||
#define ecxh ch
|
||||
#define edxl dl
|
||||
#define edxh dh
|
||||
|
||||
#define _h(reg) reg##h
|
||||
#define h(reg) _h(reg)
|
||||
|
||||
#define _l(reg) reg##l
|
||||
#define l(reg) _l(reg)
|
||||
|
||||
// This macro takes a 32-bit word representing a column and uses
|
||||
// each of its four bytes to index into four tables of 256 32-bit
|
||||
// words to obtain values that are then xored into the appropriate
|
||||
// output registers r0, r1, r4 or r5.
|
||||
|
||||
// Parameters:
|
||||
// table table base address
|
||||
// %1 out_state[0]
|
||||
// %2 out_state[1]
|
||||
// %3 out_state[2]
|
||||
// %4 out_state[3]
|
||||
// idx input register for the round (destroyed)
|
||||
// tmp scratch register for the round
|
||||
// sched key schedule
|
||||
|
||||
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
|
||||
movzx %l(idx),%tmp; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+2*tlen(,%tmp,4),%a3; \
|
||||
xor table+3*tlen(,%idx,4),%a4;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 12 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 4 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 4 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 12 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
|
||||
// original Gladman had conditional saves to MMX regs.
|
||||
#define save(a1, a2) \
|
||||
mov %a2,4*a1(%esp)
|
||||
|
||||
#define restore(a1, a2) \
|
||||
mov 4*a2(%esp),%a1
|
||||
|
||||
// These macros perform a forward encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage.
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define fwd_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define fwd_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
|
||||
|
||||
// These macros performs an inverse encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define inv_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define inv_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
|
||||
|
||||
// AES (Rijndael) Encryption Subroutine
|
||||
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.extern crypto_ft_tab
|
||||
.extern crypto_fl_tab
|
||||
|
||||
ENTRY(aes_enc_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov klen(%ebp),%r3 // key size
|
||||
push %edi
|
||||
#if ekey != 0
|
||||
lea ekey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
add $16,%ebp // increment to next round key
|
||||
cmp $24,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
|
||||
2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
|
||||
fwd_rnd2( -48(%ebp), crypto_ft_tab)
|
||||
3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
|
||||
fwd_rnd2( -16(%ebp), crypto_ft_tab)
|
||||
4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
|
||||
fwd_rnd2( +16(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +32(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2( +48(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +64(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2( +80(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1( +96(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2(+112(%ebp), crypto_ft_tab)
|
||||
fwd_rnd1(+128(%ebp), crypto_ft_tab)
|
||||
fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_enc_blk)
|
||||
|
||||
// AES (Rijndael) Decryption Subroutine
|
||||
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.extern crypto_it_tab
|
||||
.extern crypto_il_tab
|
||||
|
||||
ENTRY(aes_dec_blk)
|
||||
push %ebp
|
||||
mov ctx(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov klen(%ebp),%r3 // key size
|
||||
push %edi
|
||||
#if dkey != 0
|
||||
lea dkey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
add $16,%ebp // increment to next round key
|
||||
cmp $24,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
|
||||
2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
|
||||
inv_rnd2( -48(%ebp), crypto_it_tab)
|
||||
3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
|
||||
inv_rnd2( -16(%ebp), crypto_it_tab)
|
||||
4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
|
||||
inv_rnd2( +16(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +32(%ebp), crypto_it_tab)
|
||||
inv_rnd2( +48(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +64(%ebp), crypto_it_tab)
|
||||
inv_rnd2( +80(%ebp), crypto_it_tab)
|
||||
inv_rnd1( +96(%ebp), crypto_it_tab)
|
||||
inv_rnd2(+112(%ebp), crypto_it_tab)
|
||||
inv_rnd1(+128(%ebp), crypto_it_tab)
|
||||
inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
ret
|
||||
ENDPROC(aes_dec_blk)
|
@@ -1,185 +0,0 @@
|
||||
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
|
||||
*
|
||||
* Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
|
||||
*
|
||||
* License:
|
||||
* This code can be distributed under the terms of the GNU General Public
|
||||
* License (GPL) Version 2 provided that the above header down to and
|
||||
* including this sentence is retained in full.
|
||||
*/
|
||||
|
||||
.extern crypto_ft_tab
|
||||
.extern crypto_it_tab
|
||||
.extern crypto_fl_tab
|
||||
.extern crypto_il_tab
|
||||
|
||||
.text
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define R1 %rax
|
||||
#define R1E %eax
|
||||
#define R1X %ax
|
||||
#define R1H %ah
|
||||
#define R1L %al
|
||||
#define R2 %rbx
|
||||
#define R2E %ebx
|
||||
#define R2X %bx
|
||||
#define R2H %bh
|
||||
#define R2L %bl
|
||||
#define R3 %rcx
|
||||
#define R3E %ecx
|
||||
#define R3X %cx
|
||||
#define R3H %ch
|
||||
#define R3L %cl
|
||||
#define R4 %rdx
|
||||
#define R4E %edx
|
||||
#define R4X %dx
|
||||
#define R4H %dh
|
||||
#define R4L %dl
|
||||
#define R5 %rsi
|
||||
#define R5E %esi
|
||||
#define R6 %rdi
|
||||
#define R6E %edi
|
||||
#define R7 %r9 /* don't use %rbp; it breaks stack traces */
|
||||
#define R7E %r9d
|
||||
#define R8 %r8
|
||||
#define R10 %r10
|
||||
#define R11 %r11
|
||||
|
||||
#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
|
||||
ENTRY(FUNC); \
|
||||
movq r1,r2; \
|
||||
leaq KEY+48(r8),r9; \
|
||||
movq r10,r11; \
|
||||
movl (r7),r5 ## E; \
|
||||
movl 4(r7),r1 ## E; \
|
||||
movl 8(r7),r6 ## E; \
|
||||
movl 12(r7),r7 ## E; \
|
||||
movl 480(r8),r10 ## E; \
|
||||
xorl -48(r9),r5 ## E; \
|
||||
xorl -44(r9),r1 ## E; \
|
||||
xorl -40(r9),r6 ## E; \
|
||||
xorl -36(r9),r7 ## E; \
|
||||
cmpl $24,r10 ## E; \
|
||||
jb B128; \
|
||||
leaq 32(r9),r9; \
|
||||
je B192; \
|
||||
leaq 32(r9),r9;
|
||||
|
||||
#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
|
||||
movq r1,r2; \
|
||||
movl r5 ## E,(r9); \
|
||||
movl r6 ## E,4(r9); \
|
||||
movl r7 ## E,8(r9); \
|
||||
movl r8 ## E,12(r9); \
|
||||
ret; \
|
||||
ENDPROC(FUNC);
|
||||
|
||||
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
|
||||
movzbl r2 ## H,r5 ## E; \
|
||||
movzbl r2 ## L,r6 ## E; \
|
||||
movl TAB+1024(,r5,4),r5 ## E;\
|
||||
movw r4 ## X,r2 ## X; \
|
||||
movl TAB(,r6,4),r6 ## E; \
|
||||
roll $16,r2 ## E; \
|
||||
shrl $16,r4 ## E; \
|
||||
movzbl r4 ## L,r7 ## E; \
|
||||
movzbl r4 ## H,r4 ## E; \
|
||||
xorl OFFSET(r8),ra ## E; \
|
||||
xorl OFFSET+4(r8),rb ## E; \
|
||||
xorl TAB+3072(,r4,4),r5 ## E;\
|
||||
xorl TAB+2048(,r7,4),r6 ## E;\
|
||||
movzbl r1 ## L,r7 ## E; \
|
||||
movzbl r1 ## H,r4 ## E; \
|
||||
movl TAB+1024(,r4,4),r4 ## E;\
|
||||
movw r3 ## X,r1 ## X; \
|
||||
roll $16,r1 ## E; \
|
||||
shrl $16,r3 ## E; \
|
||||
xorl TAB(,r7,4),r5 ## E; \
|
||||
movzbl r3 ## L,r7 ## E; \
|
||||
movzbl r3 ## H,r3 ## E; \
|
||||
xorl TAB+3072(,r3,4),r4 ## E;\
|
||||
xorl TAB+2048(,r7,4),r5 ## E;\
|
||||
movzbl r1 ## L,r7 ## E; \
|
||||
movzbl r1 ## H,r3 ## E; \
|
||||
shrl $16,r1 ## E; \
|
||||
xorl TAB+3072(,r3,4),r6 ## E;\
|
||||
movl TAB+2048(,r7,4),r3 ## E;\
|
||||
movzbl r1 ## L,r7 ## E; \
|
||||
movzbl r1 ## H,r1 ## E; \
|
||||
xorl TAB+1024(,r1,4),r6 ## E;\
|
||||
xorl TAB(,r7,4),r3 ## E; \
|
||||
movzbl r2 ## H,r1 ## E; \
|
||||
movzbl r2 ## L,r7 ## E; \
|
||||
shrl $16,r2 ## E; \
|
||||
xorl TAB+3072(,r1,4),r3 ## E;\
|
||||
xorl TAB+2048(,r7,4),r4 ## E;\
|
||||
movzbl r2 ## H,r1 ## E; \
|
||||
movzbl r2 ## L,r2 ## E; \
|
||||
xorl OFFSET+8(r8),rc ## E; \
|
||||
xorl OFFSET+12(r8),rd ## E; \
|
||||
xorl TAB+1024(,r1,4),r3 ## E;\
|
||||
xorl TAB(,r2,4),r4 ## E;
|
||||
|
||||
#define move_regs(r1,r2,r3,r4) \
|
||||
movl r3 ## E,r1 ## E; \
|
||||
movl r4 ## E,r2 ## E;
|
||||
|
||||
#define entry(FUNC,KEY,B128,B192) \
|
||||
prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
|
||||
|
||||
#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
|
||||
|
||||
#define encrypt_round(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
|
||||
move_regs(R1,R2,R5,R6)
|
||||
|
||||
#define encrypt_final(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
|
||||
|
||||
#define decrypt_round(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
|
||||
move_regs(R1,R2,R5,R6)
|
||||
|
||||
#define decrypt_final(TAB,OFFSET) \
|
||||
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
|
||||
|
||||
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_enc_blk,0,.Le128,.Le192)
|
||||
encrypt_round(crypto_ft_tab,-96)
|
||||
encrypt_round(crypto_ft_tab,-80)
|
||||
.Le192: encrypt_round(crypto_ft_tab,-64)
|
||||
encrypt_round(crypto_ft_tab,-48)
|
||||
.Le128: encrypt_round(crypto_ft_tab,-32)
|
||||
encrypt_round(crypto_ft_tab,-16)
|
||||
encrypt_round(crypto_ft_tab, 0)
|
||||
encrypt_round(crypto_ft_tab, 16)
|
||||
encrypt_round(crypto_ft_tab, 32)
|
||||
encrypt_round(crypto_ft_tab, 48)
|
||||
encrypt_round(crypto_ft_tab, 64)
|
||||
encrypt_round(crypto_ft_tab, 80)
|
||||
encrypt_round(crypto_ft_tab, 96)
|
||||
encrypt_final(crypto_fl_tab,112)
|
||||
return(aes_enc_blk)
|
||||
|
||||
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
|
||||
|
||||
entry(aes_dec_blk,240,.Ld128,.Ld192)
|
||||
decrypt_round(crypto_it_tab,-96)
|
||||
decrypt_round(crypto_it_tab,-80)
|
||||
.Ld192: decrypt_round(crypto_it_tab,-64)
|
||||
decrypt_round(crypto_it_tab,-48)
|
||||
.Ld128: decrypt_round(crypto_it_tab,-32)
|
||||
decrypt_round(crypto_it_tab,-16)
|
||||
decrypt_round(crypto_it_tab, 0)
|
||||
decrypt_round(crypto_it_tab, 16)
|
||||
decrypt_round(crypto_it_tab, 32)
|
||||
decrypt_round(crypto_it_tab, 48)
|
||||
decrypt_round(crypto_it_tab, 64)
|
||||
decrypt_round(crypto_it_tab, 80)
|
||||
decrypt_round(crypto_it_tab, 96)
|
||||
decrypt_final(crypto_il_tab,112)
|
||||
return(aes_dec_blk)
|
@@ -1,71 +1 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Glue Code for the asm optimized version of the AES Cipher Algorithm
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <asm/crypto/aes.h>
|
||||
|
||||
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
|
||||
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
|
||||
|
||||
void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_enc_blk(ctx, dst, src);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
|
||||
|
||||
void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_dec_blk(ctx, dst, src);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = crypto_aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
||||
MODULE_ALIAS_CRYPTO("aes-asm");
|
||||
|
@@ -26,7 +26,6 @@
|
||||
#include <crypto/gcm.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/crypto/aes.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
@@ -329,7 +328,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
|
||||
}
|
||||
|
||||
if (!crypto_simd_usable())
|
||||
err = crypto_aes_expand_key(ctx, in_key, key_len);
|
||||
err = aes_expandkey(ctx, in_key, key_len);
|
||||
else {
|
||||
kernel_fpu_begin();
|
||||
err = aesni_set_key(ctx, in_key, key_len);
|
||||
@@ -345,26 +344,26 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
|
||||
}
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
|
||||
|
||||
if (!crypto_simd_usable())
|
||||
crypto_aes_encrypt_x86(ctx, dst, src);
|
||||
else {
|
||||
if (!crypto_simd_usable()) {
|
||||
aes_encrypt(ctx, dst, src);
|
||||
} else {
|
||||
kernel_fpu_begin();
|
||||
aesni_enc(ctx, dst, src);
|
||||
kernel_fpu_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
|
||||
|
||||
if (!crypto_simd_usable())
|
||||
crypto_aes_decrypt_x86(ctx, dst, src);
|
||||
else {
|
||||
if (!crypto_simd_usable()) {
|
||||
aes_decrypt(ctx, dst, src);
|
||||
} else {
|
||||
kernel_fpu_begin();
|
||||
aesni_dec(ctx, dst, src);
|
||||
kernel_fpu_end();
|
||||
@@ -610,7 +609,8 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
return glue_xts_req_128bit(&aesni_enc_xts, req,
|
||||
XTS_TWEAK_CAST(aesni_xts_tweak),
|
||||
aes_ctx(ctx->raw_tweak_ctx),
|
||||
aes_ctx(ctx->raw_crypt_ctx));
|
||||
aes_ctx(ctx->raw_crypt_ctx),
|
||||
false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -621,32 +621,28 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
return glue_xts_req_128bit(&aesni_dec_xts, req,
|
||||
XTS_TWEAK_CAST(aesni_xts_tweak),
|
||||
aes_ctx(ctx->raw_tweak_ctx),
|
||||
aes_ctx(ctx->raw_crypt_ctx));
|
||||
aes_ctx(ctx->raw_crypt_ctx),
|
||||
true);
|
||||
}
|
||||
|
||||
static int
|
||||
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
|
||||
{
|
||||
struct crypto_cipher *tfm;
|
||||
struct crypto_aes_ctx ctx;
|
||||
int ret;
|
||||
|
||||
tfm = crypto_alloc_cipher("aes", 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
ret = crypto_cipher_setkey(tfm, key, key_len);
|
||||
ret = aes_expandkey(&ctx, key, key_len);
|
||||
if (ret)
|
||||
goto out_free_cipher;
|
||||
return ret;
|
||||
|
||||
/* Clear the data in the hash sub key container to zero.*/
|
||||
/* We want to cipher all zeros to create the hash sub key. */
|
||||
memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
|
||||
|
||||
crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
|
||||
aes_encrypt(&ctx, hash_subkey, hash_subkey);
|
||||
|
||||
out_free_cipher:
|
||||
crypto_free_cipher(tfm);
|
||||
return ret;
|
||||
memzero_explicit(&ctx, sizeof(ctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
|
||||
@@ -919,8 +915,8 @@ static struct crypto_alg aesni_cipher_alg = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
.cia_encrypt = aesni_encrypt,
|
||||
.cia_decrypt = aesni_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@@ -182,7 +182,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&camellia_enc_xts, req,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -192,7 +192,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&camellia_dec_xts, req,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg camellia_algs[] = {
|
||||
|
@@ -208,7 +208,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&camellia_enc_xts, req,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -218,7 +218,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&camellia_dec_xts, req,
|
||||
XTS_TWEAK_CAST(camellia_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg camellia_algs[] = {
|
||||
|
@@ -201,7 +201,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&cast6_enc_xts, req,
|
||||
XTS_TWEAK_CAST(__cast6_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -211,7 +211,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&cast6_dec_xts, req,
|
||||
XTS_TWEAK_CAST(__cast6_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg cast6_algs[] = {
|
||||
|
@@ -19,8 +19,8 @@
|
||||
#include <linux/types.h>
|
||||
|
||||
struct des3_ede_x86_ctx {
|
||||
u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
|
||||
u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
|
||||
struct des3_ede_ctx enc;
|
||||
struct des3_ede_ctx dec;
|
||||
};
|
||||
|
||||
/* regular block cipher functions */
|
||||
@@ -34,7 +34,7 @@ asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
|
||||
static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *enc_ctx = ctx->enc_expkey;
|
||||
u32 *enc_ctx = ctx->enc.expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
|
||||
}
|
||||
@@ -42,7 +42,7 @@ static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *dec_ctx = ctx->dec_expkey;
|
||||
u32 *dec_ctx = ctx->dec.expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
|
||||
}
|
||||
@@ -50,7 +50,7 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *enc_ctx = ctx->enc_expkey;
|
||||
u32 *enc_ctx = ctx->enc.expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
|
||||
}
|
||||
@@ -58,7 +58,7 @@ static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
|
||||
const u8 *src)
|
||||
{
|
||||
u32 *dec_ctx = ctx->dec_expkey;
|
||||
u32 *dec_ctx = ctx->dec.expkey;
|
||||
|
||||
des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
|
||||
}
|
||||
@@ -122,7 +122,7 @@ static int ecb_encrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_crypt(req, ctx->enc_expkey);
|
||||
return ecb_crypt(req, ctx->enc.expkey);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
@@ -130,7 +130,7 @@ static int ecb_decrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
return ecb_crypt(req, ctx->dec_expkey);
|
||||
return ecb_crypt(req, ctx->dec.expkey);
|
||||
}
|
||||
|
||||
static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
|
||||
@@ -348,20 +348,28 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
u32 i, j, tmp;
|
||||
int err;
|
||||
|
||||
/* Generate encryption context using generic implementation. */
|
||||
err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
|
||||
if (err < 0)
|
||||
err = des3_ede_expand_key(&ctx->enc, key, keylen);
|
||||
if (err == -ENOKEY) {
|
||||
if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
|
||||
err = -EINVAL;
|
||||
else
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Fix encryption context for this implementation and form decryption
|
||||
* context. */
|
||||
j = DES3_EDE_EXPKEY_WORDS - 2;
|
||||
for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
|
||||
tmp = ror32(ctx->enc_expkey[i + 1], 4);
|
||||
ctx->enc_expkey[i + 1] = tmp;
|
||||
tmp = ror32(ctx->enc.expkey[i + 1], 4);
|
||||
ctx->enc.expkey[i + 1] = tmp;
|
||||
|
||||
ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
|
||||
ctx->dec_expkey[j + 1] = tmp;
|
||||
ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0];
|
||||
ctx->dec.expkey[j + 1] = tmp;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -357,6 +357,5 @@ module_init(ghash_pclmulqdqni_mod_init);
|
||||
module_exit(ghash_pclmulqdqni_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
|
||||
"accelerated by PCLMULQDQ-NI");
|
||||
MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
|
||||
MODULE_ALIAS_CRYPTO("ghash");
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <asm/crypto/glue_helper.h>
|
||||
|
||||
@@ -259,17 +260,36 @@ done:
|
||||
int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
|
||||
struct skcipher_request *req,
|
||||
common_glue_func_t tweak_fn, void *tweak_ctx,
|
||||
void *crypt_ctx)
|
||||
void *crypt_ctx, bool decrypt)
|
||||
{
|
||||
const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
|
||||
const unsigned int bsize = 128 / 8;
|
||||
struct skcipher_request subreq;
|
||||
struct skcipher_walk walk;
|
||||
bool fpu_enabled = false;
|
||||
unsigned int nbytes;
|
||||
unsigned int nbytes, tail;
|
||||
int err;
|
||||
|
||||
if (req->cryptlen < XTS_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(cts)) {
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
|
||||
tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
crypto_skcipher_get_flags(tfm),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
req->cryptlen - tail, req->iv);
|
||||
req = &subreq;
|
||||
}
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
nbytes = walk.nbytes;
|
||||
if (!nbytes)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* set minimum length to bsize, for tweak_fn */
|
||||
@@ -287,6 +307,47 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
|
||||
nbytes = walk.nbytes;
|
||||
}
|
||||
|
||||
if (unlikely(cts)) {
|
||||
u8 *next_tweak, *final_tweak = req->iv;
|
||||
struct scatterlist *src, *dst;
|
||||
struct scatterlist s[2], d[2];
|
||||
le128 b[2];
|
||||
|
||||
dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);
|
||||
|
||||
if (decrypt) {
|
||||
next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);
|
||||
gf128mul_x_ble(b, b);
|
||||
} else {
|
||||
next_tweak = req->iv;
|
||||
}
|
||||
|
||||
skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,
|
||||
next_tweak);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false) ?:
|
||||
skcipher_walk_done(&walk,
|
||||
__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);
|
||||
memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);
|
||||
scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,
|
||||
tail - XTS_BLOCK_SIZE, 0);
|
||||
scatterwalk_map_and_copy(b, dst, 0, tail, 1);
|
||||
|
||||
skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,
|
||||
final_tweak);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false) ?:
|
||||
skcipher_walk_done(&walk,
|
||||
__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
|
||||
}
|
||||
|
||||
out:
|
||||
glue_fpu_end(fpu_enabled);
|
||||
|
||||
return err;
|
||||
|
@@ -1,619 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* AVX2 implementation of MORUS-1280
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
|
||||
|
||||
#define STATE0 %ymm0
|
||||
#define STATE0_LOW %xmm0
|
||||
#define STATE1 %ymm1
|
||||
#define STATE2 %ymm2
|
||||
#define STATE3 %ymm3
|
||||
#define STATE4 %ymm4
|
||||
#define KEY %ymm5
|
||||
#define MSG %ymm5
|
||||
#define MSG_LOW %xmm5
|
||||
#define T0 %ymm6
|
||||
#define T0_LOW %xmm6
|
||||
#define T1 %ymm7
|
||||
|
||||
.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lmorus1280_const:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lmorus1280_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
.macro morus1280_round s0, s1, s2, s3, s4, b, w
|
||||
vpand \s1, \s2, T0
|
||||
vpxor T0, \s0, \s0
|
||||
vpxor \s3, \s0, \s0
|
||||
vpsllq $\b, \s0, T0
|
||||
vpsrlq $(64 - \b), \s0, \s0
|
||||
vpxor T0, \s0, \s0
|
||||
vpermq $\w, \s3, \s3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus1280_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update:
|
||||
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
|
||||
vpxor MSG, STATE1, STATE1
|
||||
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
|
||||
vpxor MSG, STATE2, STATE2
|
||||
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
|
||||
vpxor MSG, STATE3, STATE3
|
||||
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
|
||||
vpxor MSG, STATE4, STATE4
|
||||
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
|
||||
ret
|
||||
ENDPROC(__morus1280_update)
|
||||
|
||||
/*
|
||||
* __morus1280_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update_zero:
|
||||
morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
|
||||
morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
|
||||
morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
|
||||
morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
|
||||
morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
|
||||
ret
|
||||
ENDPROC(__morus1280_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9d, %r9d
|
||||
vpxor MSG, MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG_LOW
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pshufd $MASK2, MSG_LOW, MSG_LOW
|
||||
pinsrq $0, (%r8), MSG_LOW
|
||||
|
||||
.Lld_partial_8:
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
vpermq $MASK2, MSG, MSG
|
||||
movdqu (%rsi), MSG_LOW
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0_LOW, (%r9)
|
||||
vpermq $MASK2, T0, T0
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0_LOW, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
pextrq $1, T0_LOW, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_init(void *state, const void *key,
|
||||
* const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
vpxor STATE0, STATE0, STATE0
|
||||
movdqu (%rdx), STATE0_LOW
|
||||
/* load key: */
|
||||
vmovdqu (%rsi), KEY
|
||||
vmovdqa KEY, STATE1
|
||||
/* load all ones: */
|
||||
vpcmpeqd STATE2, STATE2, STATE2
|
||||
/* load all zeros: */
|
||||
vpxor STATE3, STATE3, STATE3
|
||||
/* load the constant: */
|
||||
vmovdqa .Lmorus1280_const, STATE4
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
vpxor KEY, STATE1, STATE1
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
vmovdqa T0, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
vmovdqu T0, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
vmovdqa MSG, T0
|
||||
vpxor STATE0, T0, T0
|
||||
vpermq $MASK3, STATE1, T1
|
||||
vpxor T1, T0, T0
|
||||
vpand STATE2, STATE3, T1
|
||||
vpxor T1, T0, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0x1F, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
vmovdqa (%rsi), MSG
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqa MSG, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
vmovdqu (%rsi), MSG
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqu MSG, (%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqa MSG, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0_LOW
|
||||
vpbroadcastb T0_LOW, T0
|
||||
vmovdqa .Lmorus1280_counter, T1
|
||||
vpcmpgtb T1, T0, T0
|
||||
vpand T0, MSG, MSG
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
vmovdqu STATE0, (0 * 32)(%rdi)
|
||||
vmovdqu STATE1, (1 * 32)(%rdi)
|
||||
vmovdqu STATE2, (2 * 32)(%rdi)
|
||||
vmovdqu STATE3, (3 * 32)(%rdi)
|
||||
vmovdqu STATE4, (4 * 32)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_avx2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_avx2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
vmovdqu (0 * 32)(%rdi), STATE0
|
||||
vmovdqu (1 * 32)(%rdi), STATE1
|
||||
vmovdqu (2 * 32)(%rdi), STATE2
|
||||
vmovdqu (3 * 32)(%rdi), STATE3
|
||||
vmovdqu (4 * 32)(%rdi), STATE4
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
vpxor STATE0, STATE4, STATE4
|
||||
|
||||
/* prepare length block: */
|
||||
vpxor MSG, MSG, MSG
|
||||
vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
|
||||
vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
|
||||
vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
/* update state: */
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
|
||||
/* xor tag: */
|
||||
vmovdqu (%rsi), MSG
|
||||
|
||||
vpxor STATE0, MSG, MSG
|
||||
vpermq $MASK3, STATE1, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vpand STATE2, STATE3, T0
|
||||
vpxor T0, MSG, MSG
|
||||
vmovdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_avx2_final)
|
@@ -1,62 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Glue for AVX2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400);
|
||||
|
||||
static struct simd_aead_alg *simd_alg;
|
||||
|
||||
static int __init crypto_morus1280_avx2_module_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX2) ||
|
||||
!boot_cpu_has(X86_FEATURE_OSXSAVE) ||
|
||||
!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1,
|
||||
&simd_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_morus1280_avx2_module_exit(void)
|
||||
{
|
||||
simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_morus1280_avx2_module_init);
|
||||
module_exit(crypto_morus1280_avx2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus1280");
|
||||
MODULE_ALIAS_CRYPTO("morus1280-avx2");
|
@@ -1,893 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* SSE2 implementation of MORUS-1280
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
|
||||
#define STATE0_LO %xmm0
|
||||
#define STATE0_HI %xmm1
|
||||
#define STATE1_LO %xmm2
|
||||
#define STATE1_HI %xmm3
|
||||
#define STATE2_LO %xmm4
|
||||
#define STATE2_HI %xmm5
|
||||
#define STATE3_LO %xmm6
|
||||
#define STATE3_HI %xmm7
|
||||
#define STATE4_LO %xmm8
|
||||
#define STATE4_HI %xmm9
|
||||
#define KEY_LO %xmm10
|
||||
#define KEY_HI %xmm11
|
||||
#define MSG_LO %xmm10
|
||||
#define MSG_HI %xmm11
|
||||
#define T0_LO %xmm12
|
||||
#define T0_HI %xmm13
|
||||
#define T1_LO %xmm14
|
||||
#define T1_HI %xmm15
|
||||
|
||||
.section .rodata.cst16.morus640_const, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Lmorus640_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_counter_0:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
.Lmorus640_counter_1:
|
||||
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
|
||||
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
|
||||
|
||||
.text
|
||||
|
||||
.macro rol1 hi, lo
|
||||
/*
|
||||
* HI_1 | HI_0 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | HI_1 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | LO_1 || LO_0 | HI_1
|
||||
*/
|
||||
pshufd $MASK2, \hi, \hi
|
||||
movdqa \hi, T0_LO
|
||||
punpcklqdq \lo, T0_LO
|
||||
punpckhqdq \hi, \lo
|
||||
movdqa \lo, \hi
|
||||
movdqa T0_LO, \lo
|
||||
.endm
|
||||
|
||||
.macro rol2 hi, lo
|
||||
movdqa \lo, T0_LO
|
||||
movdqa \hi, \lo
|
||||
movdqa T0_LO, \hi
|
||||
.endm
|
||||
|
||||
.macro rol3 hi, lo
|
||||
/*
|
||||
* HI_1 | HI_0 || LO_1 | LO_0
|
||||
* ==>
|
||||
* HI_0 | HI_1 || LO_1 | LO_0
|
||||
* ==>
|
||||
* LO_0 | HI_1 || HI_0 | LO_1
|
||||
*/
|
||||
pshufd $MASK2, \hi, \hi
|
||||
movdqa \lo, T0_LO
|
||||
punpckhqdq \hi, T0_LO
|
||||
punpcklqdq \lo, \hi
|
||||
movdqa T0_LO, \lo
|
||||
.endm
|
||||
|
||||
.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
|
||||
movdqa \s1_l, T0_LO
|
||||
pand \s2_l, T0_LO
|
||||
pxor T0_LO, \s0_l
|
||||
|
||||
movdqa \s1_h, T0_LO
|
||||
pand \s2_h, T0_LO
|
||||
pxor T0_LO, \s0_h
|
||||
|
||||
pxor \s3_l, \s0_l
|
||||
pxor \s3_h, \s0_h
|
||||
|
||||
movdqa \s0_l, T0_LO
|
||||
psllq $\b, T0_LO
|
||||
psrlq $(64 - \b), \s0_l
|
||||
pxor T0_LO, \s0_l
|
||||
|
||||
movdqa \s0_h, T0_LO
|
||||
psllq $\b, T0_LO
|
||||
psrlq $(64 - \b), \s0_h
|
||||
pxor T0_LO, \s0_h
|
||||
|
||||
\w \s3_h, \s3_l
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus1280_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update:
|
||||
morus1280_round \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
13, rol1
|
||||
pxor MSG_LO, STATE1_LO
|
||||
pxor MSG_HI, STATE1_HI
|
||||
morus1280_round \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
46, rol2
|
||||
pxor MSG_LO, STATE2_LO
|
||||
pxor MSG_HI, STATE2_HI
|
||||
morus1280_round \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
38, rol3
|
||||
pxor MSG_LO, STATE3_LO
|
||||
pxor MSG_HI, STATE3_HI
|
||||
morus1280_round \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
7, rol2
|
||||
pxor MSG_LO, STATE4_LO
|
||||
pxor MSG_HI, STATE4_HI
|
||||
morus1280_round \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
4, rol1
|
||||
ret
|
||||
ENDPROC(__morus1280_update)
|
||||
|
||||
/*
|
||||
* __morus1280_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus1280_update_zero:
|
||||
morus1280_round \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
13, rol1
|
||||
morus1280_round \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
46, rol2
|
||||
morus1280_round \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
38, rol3
|
||||
morus1280_round \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
7, rol2
|
||||
morus1280_round \
|
||||
STATE4_LO, STATE4_HI, \
|
||||
STATE0_LO, STATE0_HI, \
|
||||
STATE1_LO, STATE1_HI, \
|
||||
STATE2_LO, STATE2_HI, \
|
||||
STATE3_LO, STATE3_HI, \
|
||||
4, rol1
|
||||
ret
|
||||
ENDPROC(__morus1280_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9d, %r9d
|
||||
pxor MSG_LO, MSG_LO
|
||||
pxor MSG_HI, MSG_HI
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG_LO
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pslldq $8, MSG_LO
|
||||
movq (%r8), T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
|
||||
.Lld_partial_8:
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
jz .Lld_partial_16
|
||||
|
||||
movdqa MSG_LO, MSG_HI
|
||||
movdqu (%rsi), MSG_LO
|
||||
|
||||
.Lld_partial_16:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
cmp $16, %r8
|
||||
jl .Lst_partial_16
|
||||
|
||||
movdqu T0_LO, (%r9)
|
||||
movdqa T0_HI, T0_LO
|
||||
|
||||
sub $16, %r8
|
||||
add $16, %r9
|
||||
|
||||
.Lst_partial_16:
|
||||
movq T0_LO, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0_LO
|
||||
movq T0_LO, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_init(void *state, const void *key,
|
||||
* const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
pxor STATE0_HI, STATE0_HI
|
||||
movdqu (%rdx), STATE0_LO
|
||||
/* load key: */
|
||||
movdqu 0(%rsi), KEY_LO
|
||||
movdqu 16(%rsi), KEY_HI
|
||||
movdqa KEY_LO, STATE1_LO
|
||||
movdqa KEY_HI, STATE1_HI
|
||||
/* load all ones: */
|
||||
pcmpeqd STATE2_LO, STATE2_LO
|
||||
pcmpeqd STATE2_HI, STATE2_HI
|
||||
/* load all zeros: */
|
||||
pxor STATE3_LO, STATE3_LO
|
||||
pxor STATE3_HI, STATE3_HI
|
||||
/* load the constant: */
|
||||
movdqa .Lmorus640_const_0, STATE4_LO
|
||||
movdqa .Lmorus640_const_1, STATE4_HI
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
call __morus1280_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
pxor KEY_LO, STATE1_LO
|
||||
pxor KEY_HI, STATE1_HI
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
call __morus1280_update
|
||||
sub $32, %rdx
|
||||
add $32, %rsi
|
||||
cmp $32, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
movdqa T0_LO, 0(%rdx)
|
||||
movdqa T0_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
movdqu T0_LO, 0(%rdx)
|
||||
movdqu T0_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
pxor STATE0_LO, T0_LO
|
||||
pxor STATE0_HI, T0_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, T0_LO
|
||||
pxor T1_HI, T0_HI
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $32, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
movdqa 0(%rsi), MSG_LO
|
||||
movdqa 16(%rsi), MSG_HI
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa MSG_LO, 0(%rdx)
|
||||
movdqa MSG_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqu MSG_LO, 0(%rdx)
|
||||
movdqu MSG_HI, 16(%rdx)
|
||||
|
||||
call __morus1280_update
|
||||
sub $32, %rcx
|
||||
add $32, %rsi
|
||||
add $32, %rdx
|
||||
cmp $32, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T1_LO
|
||||
movdqa STATE1_HI, T1_HI
|
||||
rol3 T1_HI, T1_LO
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa STATE2_LO, T1_LO
|
||||
movdqa STATE2_HI, T1_HI
|
||||
pand STATE3_LO, T1_LO
|
||||
pand STATE3_HI, T1_HI
|
||||
pxor T1_LO, MSG_LO
|
||||
pxor T1_HI, MSG_HI
|
||||
movdqa MSG_LO, T0_LO
|
||||
movdqa MSG_HI, T0_HI
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
punpcklbw T0_LO, T0_LO
|
||||
movdqa T0_LO, T0_HI
|
||||
movdqa .Lmorus640_counter_0, T1_LO
|
||||
movdqa .Lmorus640_counter_1, T1_HI
|
||||
pcmpgtb T1_LO, T0_LO
|
||||
pcmpgtb T1_HI, T0_HI
|
||||
pand T0_LO, MSG_LO
|
||||
pand T0_HI, MSG_HI
|
||||
|
||||
call __morus1280_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0_LO, (0 * 16)(%rdi)
|
||||
movdqu STATE0_HI, (1 * 16)(%rdi)
|
||||
movdqu STATE1_LO, (2 * 16)(%rdi)
|
||||
movdqu STATE1_HI, (3 * 16)(%rdi)
|
||||
movdqu STATE2_LO, (4 * 16)(%rdi)
|
||||
movdqu STATE2_HI, (5 * 16)(%rdi)
|
||||
movdqu STATE3_LO, (6 * 16)(%rdi)
|
||||
movdqu STATE3_HI, (7 * 16)(%rdi)
|
||||
movdqu STATE4_LO, (8 * 16)(%rdi)
|
||||
movdqu STATE4_HI, (9 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus1280_sse2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus1280_sse2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0_LO
|
||||
movdqu (1 * 16)(%rdi), STATE0_HI
|
||||
movdqu (2 * 16)(%rdi), STATE1_LO
|
||||
movdqu (3 * 16)(%rdi), STATE1_HI
|
||||
movdqu (4 * 16)(%rdi), STATE2_LO
|
||||
movdqu (5 * 16)(%rdi), STATE2_HI
|
||||
movdqu (6 * 16)(%rdi), STATE3_LO
|
||||
movdqu (7 * 16)(%rdi), STATE3_HI
|
||||
movdqu (8 * 16)(%rdi), STATE4_LO
|
||||
movdqu (9 * 16)(%rdi), STATE4_HI
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
pxor STATE0_LO, STATE4_LO
|
||||
pxor STATE0_HI, STATE4_HI
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG_LO
|
||||
movq %rcx, T0_LO
|
||||
pslldq $8, T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
|
||||
pxor MSG_HI, MSG_HI
|
||||
|
||||
/* update state: */
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
call __morus1280_update
|
||||
|
||||
/* xor tag: */
|
||||
movdqu 0(%rsi), MSG_LO
|
||||
movdqu 16(%rsi), MSG_HI
|
||||
|
||||
pxor STATE0_LO, MSG_LO
|
||||
pxor STATE0_HI, MSG_HI
|
||||
movdqa STATE1_LO, T0_LO
|
||||
movdqa STATE1_HI, T0_HI
|
||||
rol3 T0_HI, T0_LO
|
||||
pxor T0_LO, MSG_LO
|
||||
pxor T0_HI, MSG_HI
|
||||
movdqa STATE2_LO, T0_LO
|
||||
movdqa STATE2_HI, T0_HI
|
||||
pand STATE3_LO, T0_LO
|
||||
pand STATE3_HI, T0_HI
|
||||
pxor T0_LO, MSG_LO
|
||||
pxor T0_HI, MSG_HI
|
||||
|
||||
movdqu MSG_LO, 0(%rsi)
|
||||
movdqu MSG_HI, 16(%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus1280_sse2_final)
|
@@ -1,61 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Glue for SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350);
|
||||
|
||||
static struct simd_aead_alg *simd_alg;
|
||||
|
||||
static int __init crypto_morus1280_sse2_module_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
|
||||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1,
|
||||
&simd_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_morus1280_sse2_module_exit(void)
|
||||
{
|
||||
simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_morus1280_sse2_module_init);
|
||||
module_exit(crypto_morus1280_sse2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus1280");
|
||||
MODULE_ALIAS_CRYPTO("morus1280-sse2");
|
@@ -1,205 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The MORUS-1280 Authenticated-Encryption Algorithm
|
||||
* Common x86 SIMD glue skeleton
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus1280_glue.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct morus1280_state {
|
||||
struct morus1280_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus1280_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
void (*crypt_tail)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
};
|
||||
|
||||
static void crypto_morus1280_glue_process_ad(
|
||||
struct morus1280_state *state,
|
||||
const struct morus1280_glue_ops *ops,
|
||||
struct scatterlist *sg_src, unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus1280_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS1280_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
ops->ad(state, src, left);
|
||||
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
|
||||
left &= MORUS1280_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
|
||||
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
|
||||
struct morus1280_ops ops,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
|
||||
ops.crypt_blocks(state, walk->src.virt.addr,
|
||||
walk->dst.virt.addr,
|
||||
round_down(walk->nbytes,
|
||||
MORUS1280_BLOCK_SIZE));
|
||||
skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk->nbytes) {
|
||||
ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
|
||||
walk->nbytes);
|
||||
skcipher_walk_done(walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen == MORUS1280_BLOCK_SIZE) {
|
||||
memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
|
||||
} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
|
||||
memcpy(ctx->key.bytes, key, keylen);
|
||||
memcpy(ctx->key.bytes + keylen, key, keylen);
|
||||
} else {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
|
||||
|
||||
int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
|
||||
|
||||
static void crypto_morus1280_glue_crypt(struct aead_request *req,
|
||||
struct morus1280_ops ops,
|
||||
unsigned int cryptlen,
|
||||
struct morus1280_block *tag_xor)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_state state;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
ops.skcipher_walk_init(&walk, req, true);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
ctx->ops->init(&state, &ctx->key, req->iv);
|
||||
crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
|
||||
crypto_morus1280_glue_process_crypt(&state, ops, &walk);
|
||||
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
int crypto_morus1280_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = ctx->ops->enc,
|
||||
.crypt_tail = ctx->ops->enc_tail,
|
||||
};
|
||||
|
||||
struct morus1280_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
|
||||
|
||||
int crypto_morus1280_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus1280_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = ctx->ops->dec,
|
||||
.crypt_tail = ctx->ops->dec_tail,
|
||||
};
|
||||
|
||||
struct morus1280_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
|
||||
|
||||
void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
|
||||
const struct morus1280_glue_ops *ops)
|
||||
{
|
||||
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
|
||||
ctx->ops = ops;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
|
@@ -1,612 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* SSE2 implementation of MORUS-640
|
||||
*
|
||||
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define SHUFFLE_MASK(i0, i1, i2, i3) \
|
||||
(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
|
||||
|
||||
#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
|
||||
#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
|
||||
#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
|
||||
|
||||
#define STATE0 %xmm0
|
||||
#define STATE1 %xmm1
|
||||
#define STATE2 %xmm2
|
||||
#define STATE3 %xmm3
|
||||
#define STATE4 %xmm4
|
||||
#define KEY %xmm5
|
||||
#define MSG %xmm5
|
||||
#define T0 %xmm6
|
||||
#define T1 %xmm7
|
||||
|
||||
.section .rodata.cst16.morus640_const, "aM", @progbits, 32
|
||||
.align 16
|
||||
.Lmorus640_const_0:
|
||||
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
||||
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
||||
.Lmorus640_const_1:
|
||||
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
||||
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
||||
|
||||
.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmorus640_counter:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
||||
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
||||
|
||||
.text
|
||||
|
||||
.macro morus640_round s0, s1, s2, s3, s4, b, w
|
||||
movdqa \s1, T0
|
||||
pand \s2, T0
|
||||
pxor T0, \s0
|
||||
pxor \s3, \s0
|
||||
movdqa \s0, T0
|
||||
pslld $\b, T0
|
||||
psrld $(32 - \b), \s0
|
||||
pxor T0, \s0
|
||||
pshufd $\w, \s3, \s3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* __morus640_update: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* MSG - message block
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus640_update:
|
||||
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
|
||||
pxor MSG, STATE1
|
||||
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
|
||||
pxor MSG, STATE2
|
||||
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
|
||||
pxor MSG, STATE3
|
||||
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
|
||||
pxor MSG, STATE4
|
||||
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
|
||||
ret
|
||||
ENDPROC(__morus640_update)
|
||||
|
||||
|
||||
/*
|
||||
* __morus640_update_zero: internal ABI
|
||||
* input:
|
||||
* STATE[0-4] - input state
|
||||
* output:
|
||||
* STATE[0-4] - output state
|
||||
* changed:
|
||||
* T0
|
||||
*/
|
||||
__morus640_update_zero:
|
||||
morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
|
||||
morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
|
||||
morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
|
||||
morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
|
||||
morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
|
||||
ret
|
||||
ENDPROC(__morus640_update_zero)
|
||||
|
||||
/*
|
||||
* __load_partial: internal ABI
|
||||
* input:
|
||||
* %rsi - src
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* MSG - message block
|
||||
* changed:
|
||||
* T0
|
||||
* %r8
|
||||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9d, %r9d
|
||||
pxor MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1, %r8
|
||||
jz .Lld_partial_1
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1E, %r8
|
||||
add %rsi, %r8
|
||||
mov (%r8), %r9b
|
||||
|
||||
.Lld_partial_1:
|
||||
mov %rcx, %r8
|
||||
and $0x2, %r8
|
||||
jz .Lld_partial_2
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x1C, %r8
|
||||
add %rsi, %r8
|
||||
shl $16, %r9
|
||||
mov (%r8), %r9w
|
||||
|
||||
.Lld_partial_2:
|
||||
mov %rcx, %r8
|
||||
and $0x4, %r8
|
||||
jz .Lld_partial_4
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x18, %r8
|
||||
add %rsi, %r8
|
||||
shl $32, %r9
|
||||
mov (%r8), %r8d
|
||||
xor %r8, %r9
|
||||
|
||||
.Lld_partial_4:
|
||||
movq %r9, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x8, %r8
|
||||
jz .Lld_partial_8
|
||||
|
||||
mov %rcx, %r8
|
||||
and $0x10, %r8
|
||||
add %rsi, %r8
|
||||
pslldq $8, MSG
|
||||
movq (%r8), T0
|
||||
pxor T0, MSG
|
||||
|
||||
.Lld_partial_8:
|
||||
ret
|
||||
ENDPROC(__load_partial)
|
||||
|
||||
/*
|
||||
* __store_partial: internal ABI
|
||||
* input:
|
||||
* %rdx - dst
|
||||
* %rcx - bytes
|
||||
* output:
|
||||
* T0 - message block
|
||||
* changed:
|
||||
* %r8
|
||||
* %r9
|
||||
* %r10
|
||||
*/
|
||||
__store_partial:
|
||||
mov %rcx, %r8
|
||||
mov %rdx, %r9
|
||||
|
||||
movq T0, %r10
|
||||
|
||||
cmp $8, %r8
|
||||
jl .Lst_partial_8
|
||||
|
||||
mov %r10, (%r9)
|
||||
psrldq $8, T0
|
||||
movq T0, %r10
|
||||
|
||||
sub $8, %r8
|
||||
add $8, %r9
|
||||
|
||||
.Lst_partial_8:
|
||||
cmp $4, %r8
|
||||
jl .Lst_partial_4
|
||||
|
||||
mov %r10d, (%r9)
|
||||
shr $32, %r10
|
||||
|
||||
sub $4, %r8
|
||||
add $4, %r9
|
||||
|
||||
.Lst_partial_4:
|
||||
cmp $2, %r8
|
||||
jl .Lst_partial_2
|
||||
|
||||
mov %r10w, (%r9)
|
||||
shr $16, %r10
|
||||
|
||||
sub $2, %r8
|
||||
add $2, %r9
|
||||
|
||||
.Lst_partial_2:
|
||||
cmp $1, %r8
|
||||
jl .Lst_partial_1
|
||||
|
||||
mov %r10b, (%r9)
|
||||
|
||||
.Lst_partial_1:
|
||||
ret
|
||||
ENDPROC(__store_partial)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_init)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load IV: */
|
||||
movdqu (%rdx), STATE0
|
||||
/* load key: */
|
||||
movdqu (%rsi), KEY
|
||||
movdqa KEY, STATE1
|
||||
/* load all ones: */
|
||||
pcmpeqd STATE2, STATE2
|
||||
/* load the constants: */
|
||||
movdqa .Lmorus640_const_0, STATE3
|
||||
movdqa .Lmorus640_const_1, STATE4
|
||||
|
||||
/* update 16 times with zero: */
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
call __morus640_update_zero
|
||||
|
||||
/* xor-in the key again after updates: */
|
||||
pxor KEY, STATE1
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_init)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_ad(void *state, const void *data,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_ad)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rdx
|
||||
jb .Lad_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lad_u_loop
|
||||
|
||||
.align 4
|
||||
.Lad_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
call __morus640_update
|
||||
sub $16, %rdx
|
||||
add $16, %rsi
|
||||
cmp $16, %rdx
|
||||
jge .Lad_a_loop
|
||||
|
||||
jmp .Lad_cont
|
||||
.align 4
|
||||
.Lad_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
call __morus640_update
|
||||
sub $16, %rdx
|
||||
add $16, %rsi
|
||||
cmp $16, %rdx
|
||||
jge .Lad_u_loop
|
||||
|
||||
.Lad_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Lad_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_ad)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_enc)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rcx
|
||||
jb .Lenc_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Lenc_u_loop
|
||||
|
||||
.align 4
|
||||
.Lenc_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
movdqa T0, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Lenc_a_loop
|
||||
|
||||
jmp .Lenc_cont
|
||||
.align 4
|
||||
.Lenc_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
movdqu T0, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Lenc_u_loop
|
||||
|
||||
.Lenc_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Lenc_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_enc)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_enc_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* encrypt message: */
|
||||
call __load_partial
|
||||
|
||||
movdqa MSG, T0
|
||||
pxor STATE0, T0
|
||||
pshufd $MASK3, STATE1, T1
|
||||
pxor T1, T0
|
||||
movdqa STATE2, T1
|
||||
pand STATE3, T1
|
||||
pxor T1, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
call __morus640_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_enc_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_dec)
|
||||
FRAME_BEGIN
|
||||
|
||||
cmp $16, %rcx
|
||||
jb .Ldec_out
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
mov %rsi, %r8
|
||||
or %rdx, %r8
|
||||
and $0xF, %r8
|
||||
jnz .Ldec_u_loop
|
||||
|
||||
.align 4
|
||||
.Ldec_a_loop:
|
||||
movdqa (%rsi), MSG
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqa MSG, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Ldec_a_loop
|
||||
|
||||
jmp .Ldec_cont
|
||||
.align 4
|
||||
.Ldec_u_loop:
|
||||
movdqu (%rsi), MSG
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqu MSG, (%rdx)
|
||||
|
||||
call __morus640_update
|
||||
sub $16, %rcx
|
||||
add $16, %rsi
|
||||
add $16, %rdx
|
||||
cmp $16, %rcx
|
||||
jge .Ldec_u_loop
|
||||
|
||||
.Ldec_cont:
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
.Ldec_out:
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_dec)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
|
||||
* unsigned int length);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_dec_tail)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* decrypt message: */
|
||||
call __load_partial
|
||||
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
movdqa MSG, T0
|
||||
|
||||
call __store_partial
|
||||
|
||||
/* mask with byte count: */
|
||||
movq %rcx, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
punpcklbw T0, T0
|
||||
movdqa .Lmorus640_counter, T1
|
||||
pcmpgtb T1, T0
|
||||
pand T0, MSG
|
||||
|
||||
call __morus640_update
|
||||
|
||||
/* store the state: */
|
||||
movdqu STATE0, (0 * 16)(%rdi)
|
||||
movdqu STATE1, (1 * 16)(%rdi)
|
||||
movdqu STATE2, (2 * 16)(%rdi)
|
||||
movdqu STATE3, (3 * 16)(%rdi)
|
||||
movdqu STATE4, (4 * 16)(%rdi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_dec_tail)
|
||||
|
||||
/*
|
||||
* void crypto_morus640_sse2_final(void *state, void *tag_xor,
|
||||
* u64 assoclen, u64 cryptlen);
|
||||
*/
|
||||
ENTRY(crypto_morus640_sse2_final)
|
||||
FRAME_BEGIN
|
||||
|
||||
/* load the state: */
|
||||
movdqu (0 * 16)(%rdi), STATE0
|
||||
movdqu (1 * 16)(%rdi), STATE1
|
||||
movdqu (2 * 16)(%rdi), STATE2
|
||||
movdqu (3 * 16)(%rdi), STATE3
|
||||
movdqu (4 * 16)(%rdi), STATE4
|
||||
|
||||
/* xor state[0] into state[4]: */
|
||||
pxor STATE0, STATE4
|
||||
|
||||
/* prepare length block: */
|
||||
movq %rdx, MSG
|
||||
movq %rcx, T0
|
||||
pslldq $8, T0
|
||||
pxor T0, MSG
|
||||
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
||||
|
||||
/* update state: */
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
call __morus640_update
|
||||
|
||||
/* xor tag: */
|
||||
movdqu (%rsi), MSG
|
||||
|
||||
pxor STATE0, MSG
|
||||
pshufd $MASK3, STATE1, T0
|
||||
pxor T0, MSG
|
||||
movdqa STATE2, T0
|
||||
pand STATE3, T0
|
||||
pxor T0, MSG
|
||||
|
||||
movdqu MSG, (%rsi)
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(crypto_morus640_sse2_final)
|
@@ -1,61 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The MORUS-640 Authenticated-Encryption Algorithm
|
||||
* Glue for SSE2 implementation
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/morus640_glue.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
|
||||
const void *iv);
|
||||
asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
|
||||
unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
|
||||
void *dst, unsigned int length);
|
||||
|
||||
asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
|
||||
u64 assoclen, u64 cryptlen);
|
||||
|
||||
MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400);
|
||||
|
||||
static struct simd_aead_alg *simd_alg;
|
||||
|
||||
static int __init crypto_morus640_sse2_module_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
|
||||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
|
||||
return -ENODEV;
|
||||
|
||||
return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1,
|
||||
&simd_alg);
|
||||
}
|
||||
|
||||
static void __exit crypto_morus640_sse2_module_exit(void)
|
||||
{
|
||||
simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg);
|
||||
}
|
||||
|
||||
module_init(crypto_morus640_sse2_module_init);
|
||||
module_exit(crypto_morus640_sse2_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
|
||||
MODULE_ALIAS_CRYPTO("morus640");
|
||||
MODULE_ALIAS_CRYPTO("morus640-sse2");
|
@@ -1,200 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* The MORUS-640 Authenticated-Encryption Algorithm
|
||||
* Common x86 SIMD glue skeleton
|
||||
*
|
||||
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
||||
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/morus640_glue.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct morus640_state {
|
||||
struct morus640_block s[MORUS_STATE_BLOCKS];
|
||||
};
|
||||
|
||||
struct morus640_ops {
|
||||
int (*skcipher_walk_init)(struct skcipher_walk *walk,
|
||||
struct aead_request *req, bool atomic);
|
||||
|
||||
void (*crypt_blocks)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
void (*crypt_tail)(void *state, const void *src, void *dst,
|
||||
unsigned int length);
|
||||
};
|
||||
|
||||
static void crypto_morus640_glue_process_ad(
|
||||
struct morus640_state *state,
|
||||
const struct morus640_glue_ops *ops,
|
||||
struct scatterlist *sg_src, unsigned int assoclen)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
struct morus640_block buf;
|
||||
unsigned int pos = 0;
|
||||
|
||||
scatterwalk_start(&walk, sg_src);
|
||||
while (assoclen != 0) {
|
||||
unsigned int size = scatterwalk_clamp(&walk, assoclen);
|
||||
unsigned int left = size;
|
||||
void *mapped = scatterwalk_map(&walk);
|
||||
const u8 *src = (const u8 *)mapped;
|
||||
|
||||
if (pos + size >= MORUS640_BLOCK_SIZE) {
|
||||
if (pos > 0) {
|
||||
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
|
||||
memcpy(buf.bytes + pos, src, fill);
|
||||
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
|
||||
pos = 0;
|
||||
left -= fill;
|
||||
src += fill;
|
||||
}
|
||||
|
||||
ops->ad(state, src, left);
|
||||
src += left & ~(MORUS640_BLOCK_SIZE - 1);
|
||||
left &= MORUS640_BLOCK_SIZE - 1;
|
||||
}
|
||||
|
||||
memcpy(buf.bytes + pos, src, left);
|
||||
|
||||
pos += left;
|
||||
assoclen -= size;
|
||||
scatterwalk_unmap(mapped);
|
||||
scatterwalk_advance(&walk, size);
|
||||
scatterwalk_done(&walk, 0, assoclen);
|
||||
}
|
||||
|
||||
if (pos > 0) {
|
||||
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
|
||||
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
|
||||
struct morus640_ops ops,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
|
||||
ops.crypt_blocks(state, walk->src.virt.addr,
|
||||
walk->dst.virt.addr,
|
||||
round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
|
||||
skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk->nbytes) {
|
||||
ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
|
||||
walk->nbytes);
|
||||
skcipher_walk_done(walk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
|
||||
|
||||
if (keylen != MORUS640_BLOCK_SIZE) {
|
||||
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
|
||||
|
||||
int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
|
||||
unsigned int authsize)
|
||||
{
|
||||
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
|
||||
|
||||
static void crypto_morus640_glue_crypt(struct aead_request *req,
|
||||
struct morus640_ops ops,
|
||||
unsigned int cryptlen,
|
||||
struct morus640_block *tag_xor)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_state state;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
ops.skcipher_walk_init(&walk, req, true);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
ctx->ops->init(&state, &ctx->key, req->iv);
|
||||
crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
|
||||
crypto_morus640_glue_process_crypt(&state, ops, &walk);
|
||||
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
|
||||
|
||||
kernel_fpu_end();
|
||||
}
|
||||
|
||||
int crypto_morus640_glue_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_encrypt,
|
||||
.crypt_blocks = ctx->ops->enc,
|
||||
.crypt_tail = ctx->ops->enc_tail,
|
||||
};
|
||||
|
||||
struct morus640_block tag = {};
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
|
||||
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->dst,
|
||||
req->assoclen + cryptlen, authsize, 1);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
|
||||
|
||||
int crypto_morus640_glue_decrypt(struct aead_request *req)
|
||||
{
|
||||
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
|
||||
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct morus640_ops OPS = {
|
||||
.skcipher_walk_init = skcipher_walk_aead_decrypt,
|
||||
.crypt_blocks = ctx->ops->dec,
|
||||
.crypt_tail = ctx->ops->dec_tail,
|
||||
};
|
||||
|
||||
struct morus640_block tag;
|
||||
unsigned int authsize = crypto_aead_authsize(tfm);
|
||||
unsigned int cryptlen = req->cryptlen - authsize;
|
||||
|
||||
scatterwalk_map_and_copy(tag.bytes, req->src,
|
||||
req->assoclen + cryptlen, authsize, 0);
|
||||
|
||||
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
|
||||
|
||||
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
|
||||
|
||||
void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
|
||||
const struct morus640_glue_ops *ops)
|
||||
{
|
||||
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
|
||||
ctx->ops = ops;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
|
||||
MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
|
@@ -167,7 +167,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&serpent_enc_xts, req,
|
||||
XTS_TWEAK_CAST(__serpent_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -177,7 +177,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&serpent_dec_xts, req,
|
||||
XTS_TWEAK_CAST(__serpent_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg serpent_algs[] = {
|
||||
|
@@ -207,7 +207,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&serpent_enc_xts, req,
|
||||
XTS_TWEAK_CAST(__serpent_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -217,7 +217,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&serpent_dec_xts, req,
|
||||
XTS_TWEAK_CAST(__serpent_encrypt),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg serpent_algs[] = {
|
||||
|
@@ -45,8 +45,8 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
|
||||
u64 rounds);
|
||||
typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
|
||||
|
||||
static int sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, sha256_transform_fn *sha256_xform)
|
||||
static int _sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, sha256_transform_fn *sha256_xform)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
@@ -84,7 +84,7 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
|
||||
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha256_update(desc, data, len, sha256_transform_ssse3);
|
||||
return _sha256_update(desc, data, len, sha256_transform_ssse3);
|
||||
}
|
||||
|
||||
static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
|
||||
@@ -151,7 +151,7 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
|
||||
static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha256_update(desc, data, len, sha256_transform_avx);
|
||||
return _sha256_update(desc, data, len, sha256_transform_avx);
|
||||
}
|
||||
|
||||
static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
|
||||
@@ -233,7 +233,7 @@ asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
|
||||
static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha256_update(desc, data, len, sha256_transform_rorx);
|
||||
return _sha256_update(desc, data, len, sha256_transform_rorx);
|
||||
}
|
||||
|
||||
static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
|
||||
@@ -313,7 +313,7 @@ asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
|
||||
static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha256_update(desc, data, len, sha256_ni_transform);
|
||||
return _sha256_update(desc, data, len, sha256_ni_transform);
|
||||
}
|
||||
|
||||
static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
|
||||
|
@@ -210,7 +210,7 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&twofish_enc_xts, req,
|
||||
XTS_TWEAK_CAST(twofish_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, false);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
@@ -220,7 +220,7 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
|
||||
return glue_xts_req_128bit(&twofish_dec_xts, req,
|
||||
XTS_TWEAK_CAST(twofish_enc_blk),
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx);
|
||||
&ctx->tweak_ctx, &ctx->crypt_ctx, true);
|
||||
}
|
||||
|
||||
static struct skcipher_alg twofish_algs[] = {
|
||||
|
@@ -1,12 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef ASM_X86_AES_H
|
||||
#define ASM_X86_AES_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
|
||||
const u8 *src);
|
||||
#endif
|
@@ -114,7 +114,7 @@ extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
|
||||
extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
|
||||
struct skcipher_request *req,
|
||||
common_glue_func_t tweak_fn, void *tweak_ctx,
|
||||
void *crypt_ctx);
|
||||
void *crypt_ctx, bool decrypt);
|
||||
|
||||
extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
|
||||
le128 *iv, common_glue_func_t fn);
|
||||
|
@@ -9,9 +9,11 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
|
||||
$(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
|
||||
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
CFLAGS_sha256.o := -D__DISABLE_EXPORTS
|
||||
|
||||
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
|
||||
targets += purgatory.ro
|
||||
|
||||
|
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/sha256.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/purgatory.h>
|
||||
|
||||
#include "../boot/string.h"
|
||||
|
Reference in New Issue
Block a user