i386: move crypto
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
5
arch/x86/crypto/Makefile
Normal file
5
arch/x86/crypto/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
ifeq ($(CONFIG_X86_32),y)
|
||||
include ${srctree}/arch/x86/crypto/Makefile_32
|
||||
else
|
||||
include ${srctree}/arch/x86_64/crypto/Makefile_64
|
||||
endif
|
12
arch/x86/crypto/Makefile_32
Normal file
12
arch/x86/crypto/Makefile_32
Normal file
@@ -0,0 +1,12 @@
|
||||
#
|
||||
# x86/crypto/Makefile
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
|
||||
|
||||
aes-i586-y := aes-i586-asm_32.o aes_32.o
|
||||
twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
|
||||
|
373
arch/x86/crypto/aes-i586-asm_32.S
Normal file
373
arch/x86/crypto/aes-i586-asm_32.S
Normal file
@@ -0,0 +1,373 @@
|
||||
// -------------------------------------------------------------------------
|
||||
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
|
||||
// All rights reserved.
|
||||
//
|
||||
// LICENSE TERMS
|
||||
//
|
||||
// The free distribution and use of this software in both source and binary
|
||||
// form is allowed (with or without changes) provided that:
|
||||
//
|
||||
// 1. distributions of this source code include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer//
|
||||
//
|
||||
// 2. distributions in binary form include the above copyright
|
||||
// notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other associated materials//
|
||||
//
|
||||
// 3. the copyright holder's name is not used to endorse products
|
||||
// built using this software without specific written permission.
|
||||
//
|
||||
//
|
||||
// ALTERNATIVELY, provided that this notice is retained in full, this product
|
||||
// may be distributed under the terms of the GNU General Public License (GPL),
|
||||
// in which case the provisions of the GPL apply INSTEAD OF those given above.
|
||||
//
|
||||
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
|
||||
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
|
||||
|
||||
// DISCLAIMER
|
||||
//
|
||||
// This software is provided 'as is' with no explicit or implied warranties
|
||||
// in respect of its properties including, but not limited to, correctness
|
||||
// and fitness for purpose.
|
||||
// -------------------------------------------------------------------------
|
||||
// Issue Date: 29/07/2002
|
||||
|
||||
.file "aes-i586-asm.S"
|
||||
.text
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
|
||||
|
||||
/* offsets to parameters with one register pushed onto stack */
|
||||
#define tfm 8
|
||||
#define out_blk 12
|
||||
#define in_blk 16
|
||||
|
||||
/* offsets in crypto_tfm structure */
|
||||
#define ekey (crypto_tfm_ctx_offset + 0)
|
||||
#define nrnd (crypto_tfm_ctx_offset + 256)
|
||||
#define dkey (crypto_tfm_ctx_offset + 260)
|
||||
|
||||
// register mapping for encrypt and decrypt subroutines
|
||||
|
||||
#define r0 eax
|
||||
#define r1 ebx
|
||||
#define r2 ecx
|
||||
#define r3 edx
|
||||
#define r4 esi
|
||||
#define r5 edi
|
||||
|
||||
#define eaxl al
|
||||
#define eaxh ah
|
||||
#define ebxl bl
|
||||
#define ebxh bh
|
||||
#define ecxl cl
|
||||
#define ecxh ch
|
||||
#define edxl dl
|
||||
#define edxh dh
|
||||
|
||||
#define _h(reg) reg##h
|
||||
#define h(reg) _h(reg)
|
||||
|
||||
#define _l(reg) reg##l
|
||||
#define l(reg) _l(reg)
|
||||
|
||||
// This macro takes a 32-bit word representing a column and uses
|
||||
// each of its four bytes to index into four tables of 256 32-bit
|
||||
// words to obtain values that are then xored into the appropriate
|
||||
// output registers r0, r1, r4 or r5.
|
||||
|
||||
// Parameters:
|
||||
// table table base address
|
||||
// %1 out_state[0]
|
||||
// %2 out_state[1]
|
||||
// %3 out_state[2]
|
||||
// %4 out_state[3]
|
||||
// idx input register for the round (destroyed)
|
||||
// tmp scratch register for the round
|
||||
// sched key schedule
|
||||
|
||||
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
|
||||
movzx %l(idx),%tmp; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+2*tlen(,%tmp,4),%a3; \
|
||||
xor table+3*tlen(,%idx,4),%a4;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 12 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 4 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
// initialise output registers from the key schedule
|
||||
// NB1: original value of a3 is in idx on exit
|
||||
// NB2: original values of a1,a2,a4 aren't used
|
||||
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
|
||||
mov 0 sched,%a1; \
|
||||
movzx %l(idx),%tmp; \
|
||||
mov 4 sched,%a2; \
|
||||
xor table(,%tmp,4),%a1; \
|
||||
mov 12 sched,%a4; \
|
||||
movzx %h(idx),%tmp; \
|
||||
shr $16,%idx; \
|
||||
xor table+tlen(,%tmp,4),%a2; \
|
||||
movzx %l(idx),%tmp; \
|
||||
movzx %h(idx),%idx; \
|
||||
xor table+3*tlen(,%idx,4),%a4; \
|
||||
mov %a3,%idx; \
|
||||
mov 8 sched,%a3; \
|
||||
xor table+2*tlen(,%tmp,4),%a3;
|
||||
|
||||
|
||||
// original Gladman had conditional saves to MMX regs.
|
||||
#define save(a1, a2) \
|
||||
mov %a2,4*a1(%esp)
|
||||
|
||||
#define restore(a1, a2) \
|
||||
mov 4*a2(%esp),%a1
|
||||
|
||||
// These macros perform a forward encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage.
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define fwd_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define fwd_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
|
||||
|
||||
// These macros performs an inverse encryption cycle. They are entered with
|
||||
// the first previous round column values in r0,r1,r4,r5 and
|
||||
// exit with the final values in the same registers, using stack
|
||||
// for temporary storage
|
||||
|
||||
// round column values
|
||||
// on entry: r0,r1,r4,r5
|
||||
// on exit: r2,r1,r4,r5
|
||||
#define inv_rnd1(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
|
||||
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
|
||||
restore(r0,0); \
|
||||
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
|
||||
restore(r0,1); \
|
||||
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
|
||||
|
||||
// round column values
|
||||
// on entry: r2,r1,r4,r5
|
||||
// on exit: r0,r1,r4,r5
|
||||
#define inv_rnd2(arg, table) \
|
||||
save (0,r1); \
|
||||
save (1,r5); \
|
||||
\
|
||||
/* compute new column values */ \
|
||||
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
|
||||
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
|
||||
restore(r2,0); \
|
||||
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
|
||||
restore(r2,1); \
|
||||
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
|
||||
|
||||
// AES (Rijndael) Encryption Subroutine
|
||||
/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.global aes_enc_blk
|
||||
|
||||
.extern ft_tab
|
||||
.extern fl_tab
|
||||
|
||||
.align 4
|
||||
|
||||
aes_enc_blk:
|
||||
push %ebp
|
||||
mov tfm(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov nrnd(%ebp),%r3 // number of rounds
|
||||
push %edi
|
||||
#if ekey != 0
|
||||
lea ekey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
add $16,%ebp // increment to next round key
|
||||
cmp $12,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea 32(%ebp),%ebp
|
||||
|
||||
2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key
|
||||
fwd_rnd2( -48(%ebp) ,ft_tab)
|
||||
3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key
|
||||
fwd_rnd2( -16(%ebp) ,ft_tab)
|
||||
4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
|
||||
fwd_rnd2( +16(%ebp) ,ft_tab)
|
||||
fwd_rnd1( +32(%ebp) ,ft_tab)
|
||||
fwd_rnd2( +48(%ebp) ,ft_tab)
|
||||
fwd_rnd1( +64(%ebp) ,ft_tab)
|
||||
fwd_rnd2( +80(%ebp) ,ft_tab)
|
||||
fwd_rnd1( +96(%ebp) ,ft_tab)
|
||||
fwd_rnd2(+112(%ebp) ,ft_tab)
|
||||
fwd_rnd1(+128(%ebp) ,ft_tab)
|
||||
fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
mov $1,%eax
|
||||
ret
|
||||
|
||||
// AES (Rijndael) Decryption Subroutine
|
||||
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
|
||||
|
||||
.global aes_dec_blk
|
||||
|
||||
.extern it_tab
|
||||
.extern il_tab
|
||||
|
||||
.align 4
|
||||
|
||||
aes_dec_blk:
|
||||
push %ebp
|
||||
mov tfm(%esp),%ebp
|
||||
|
||||
// CAUTION: the order and the values used in these assigns
|
||||
// rely on the register mappings
|
||||
|
||||
1: push %ebx
|
||||
mov in_blk+4(%esp),%r2
|
||||
push %esi
|
||||
mov nrnd(%ebp),%r3 // number of rounds
|
||||
push %edi
|
||||
#if dkey != 0
|
||||
lea dkey(%ebp),%ebp // key pointer
|
||||
#endif
|
||||
mov %r3,%r0
|
||||
shl $4,%r0
|
||||
add %r0,%ebp
|
||||
|
||||
// input four columns and xor in first round key
|
||||
|
||||
mov (%r2),%r0
|
||||
mov 4(%r2),%r1
|
||||
mov 8(%r2),%r4
|
||||
mov 12(%r2),%r5
|
||||
xor (%ebp),%r0
|
||||
xor 4(%ebp),%r1
|
||||
xor 8(%ebp),%r4
|
||||
xor 12(%ebp),%r5
|
||||
|
||||
sub $8,%esp // space for register saves on stack
|
||||
sub $16,%ebp // increment to next round key
|
||||
cmp $12,%r3
|
||||
jb 4f // 10 rounds for 128-bit key
|
||||
lea -32(%ebp),%ebp
|
||||
je 3f // 12 rounds for 192-bit key
|
||||
lea -32(%ebp),%ebp
|
||||
|
||||
2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key
|
||||
inv_rnd2( +48(%ebp), it_tab)
|
||||
3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key
|
||||
inv_rnd2( +16(%ebp), it_tab)
|
||||
4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
|
||||
inv_rnd2( -16(%ebp), it_tab)
|
||||
inv_rnd1( -32(%ebp), it_tab)
|
||||
inv_rnd2( -48(%ebp), it_tab)
|
||||
inv_rnd1( -64(%ebp), it_tab)
|
||||
inv_rnd2( -80(%ebp), it_tab)
|
||||
inv_rnd1( -96(%ebp), it_tab)
|
||||
inv_rnd2(-112(%ebp), it_tab)
|
||||
inv_rnd1(-128(%ebp), it_tab)
|
||||
inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
|
||||
|
||||
// move final values to the output array. CAUTION: the
|
||||
// order of these assigns rely on the register mappings
|
||||
|
||||
add $8,%esp
|
||||
mov out_blk+12(%esp),%ebp
|
||||
mov %r5,12(%ebp)
|
||||
pop %edi
|
||||
mov %r4,8(%ebp)
|
||||
pop %esi
|
||||
mov %r1,4(%ebp)
|
||||
pop %ebx
|
||||
mov %r0,(%ebp)
|
||||
pop %ebp
|
||||
mov $1,%eax
|
||||
ret
|
||||
|
515
arch/x86/crypto/aes_32.c
Normal file
515
arch/x86/crypto/aes_32.c
Normal file
@@ -0,0 +1,515 @@
|
||||
/*
|
||||
*
|
||||
* Glue Code for optimized 586 assembler version of AES
|
||||
*
|
||||
* Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
|
||||
* All rights reserved.
|
||||
*
|
||||
* LICENSE TERMS
|
||||
*
|
||||
* The free distribution and use of this software in both source and binary
|
||||
* form is allowed (with or without changes) provided that:
|
||||
*
|
||||
* 1. distributions of this source code include the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
*
|
||||
* 2. distributions in binary form include the above copyright
|
||||
* notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other associated materials;
|
||||
*
|
||||
* 3. the copyright holder's name is not used to endorse products
|
||||
* built using this software without specific written permission.
|
||||
*
|
||||
* ALTERNATIVELY, provided that this notice is retained in full, this product
|
||||
* may be distributed under the terms of the GNU General Public License (GPL),
|
||||
* in which case the provisions of the GPL apply INSTEAD OF those given above.
|
||||
*
|
||||
* DISCLAIMER
|
||||
*
|
||||
* This software is provided 'as is' with no explicit or implied warranties
|
||||
* in respect of its properties, including, but not limited to, correctness
|
||||
* and/or fitness for purpose.
|
||||
*
|
||||
* Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
|
||||
* 2.5 API).
|
||||
* Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
|
||||
* Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
|
||||
asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
|
||||
|
||||
#define AES_MIN_KEY_SIZE 16
|
||||
#define AES_MAX_KEY_SIZE 32
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
|
||||
#define RC_LENGTH 29
|
||||
|
||||
struct aes_ctx {
|
||||
u32 ekey[AES_KS_LENGTH];
|
||||
u32 rounds;
|
||||
u32 dkey[AES_KS_LENGTH];
|
||||
};
|
||||
|
||||
#define WPOLY 0x011b
|
||||
#define bytes2word(b0, b1, b2, b3) \
|
||||
(((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
|
||||
|
||||
/* define the finite field multiplies required for Rijndael */
|
||||
#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
|
||||
#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
|
||||
#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
|
||||
#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
|
||||
#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
|
||||
#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
|
||||
#define fi(x) ((x) ? pow[255 - log[x]]: 0)
|
||||
|
||||
static inline u32 upr(u32 x, int n)
|
||||
{
|
||||
return (x << 8 * n) | (x >> (32 - 8 * n));
|
||||
}
|
||||
|
||||
static inline u8 bval(u32 x, int n)
|
||||
{
|
||||
return x >> 8 * n;
|
||||
}
|
||||
|
||||
/* The forward and inverse affine transformations used in the S-box */
|
||||
#define fwd_affine(x) \
|
||||
(w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
|
||||
|
||||
#define inv_affine(x) \
|
||||
(w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
|
||||
|
||||
static u32 rcon_tab[RC_LENGTH];
|
||||
|
||||
u32 ft_tab[4][256];
|
||||
u32 fl_tab[4][256];
|
||||
static u32 im_tab[4][256];
|
||||
u32 il_tab[4][256];
|
||||
u32 it_tab[4][256];
|
||||
|
||||
static void gen_tabs(void)
|
||||
{
|
||||
u32 i, w;
|
||||
u8 pow[512], log[256];
|
||||
|
||||
/*
|
||||
* log and power tables for GF(2^8) finite field with
|
||||
* WPOLY as modular polynomial - the simplest primitive
|
||||
* root is 0x03, used here to generate the tables.
|
||||
*/
|
||||
i = 0; w = 1;
|
||||
|
||||
do {
|
||||
pow[i] = (u8)w;
|
||||
pow[i + 255] = (u8)w;
|
||||
log[w] = (u8)i++;
|
||||
w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
|
||||
} while (w != 1);
|
||||
|
||||
for(i = 0, w = 1; i < RC_LENGTH; ++i) {
|
||||
rcon_tab[i] = bytes2word(w, 0, 0, 0);
|
||||
w = f2(w);
|
||||
}
|
||||
|
||||
for(i = 0; i < 256; ++i) {
|
||||
u8 b;
|
||||
|
||||
b = fwd_affine(fi((u8)i));
|
||||
w = bytes2word(f2(b), b, b, f3(b));
|
||||
|
||||
/* tables for a normal encryption round */
|
||||
ft_tab[0][i] = w;
|
||||
ft_tab[1][i] = upr(w, 1);
|
||||
ft_tab[2][i] = upr(w, 2);
|
||||
ft_tab[3][i] = upr(w, 3);
|
||||
w = bytes2word(b, 0, 0, 0);
|
||||
|
||||
/*
|
||||
* tables for last encryption round
|
||||
* (may also be used in the key schedule)
|
||||
*/
|
||||
fl_tab[0][i] = w;
|
||||
fl_tab[1][i] = upr(w, 1);
|
||||
fl_tab[2][i] = upr(w, 2);
|
||||
fl_tab[3][i] = upr(w, 3);
|
||||
|
||||
b = fi(inv_affine((u8)i));
|
||||
w = bytes2word(fe(b), f9(b), fd(b), fb(b));
|
||||
|
||||
/* tables for the inverse mix column operation */
|
||||
im_tab[0][b] = w;
|
||||
im_tab[1][b] = upr(w, 1);
|
||||
im_tab[2][b] = upr(w, 2);
|
||||
im_tab[3][b] = upr(w, 3);
|
||||
|
||||
/* tables for a normal decryption round */
|
||||
it_tab[0][i] = w;
|
||||
it_tab[1][i] = upr(w,1);
|
||||
it_tab[2][i] = upr(w,2);
|
||||
it_tab[3][i] = upr(w,3);
|
||||
|
||||
w = bytes2word(b, 0, 0, 0);
|
||||
|
||||
/* tables for last decryption round */
|
||||
il_tab[0][i] = w;
|
||||
il_tab[1][i] = upr(w,1);
|
||||
il_tab[2][i] = upr(w,2);
|
||||
il_tab[3][i] = upr(w,3);
|
||||
}
|
||||
}
|
||||
|
||||
#define four_tables(x,tab,vf,rf,c) \
|
||||
( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
|
||||
tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
|
||||
tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
|
||||
tab[3][bval(vf(x,3,c),rf(3,c))] \
|
||||
)
|
||||
|
||||
#define vf1(x,r,c) (x)
|
||||
#define rf1(r,c) (r)
|
||||
#define rf2(r,c) ((r-c)&3)
|
||||
|
||||
#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
|
||||
#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
|
||||
|
||||
#define ff(x) inv_mcol(x)
|
||||
|
||||
#define ke4(k,i) \
|
||||
{ \
|
||||
k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
|
||||
k[4*(i)+5] = ss[1] ^= ss[0]; \
|
||||
k[4*(i)+6] = ss[2] ^= ss[1]; \
|
||||
k[4*(i)+7] = ss[3] ^= ss[2]; \
|
||||
}
|
||||
|
||||
#define kel4(k,i) \
|
||||
{ \
|
||||
k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
|
||||
k[4*(i)+5] = ss[1] ^= ss[0]; \
|
||||
k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
|
||||
}
|
||||
|
||||
#define ke6(k,i) \
|
||||
{ \
|
||||
k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
|
||||
k[6*(i)+ 7] = ss[1] ^= ss[0]; \
|
||||
k[6*(i)+ 8] = ss[2] ^= ss[1]; \
|
||||
k[6*(i)+ 9] = ss[3] ^= ss[2]; \
|
||||
k[6*(i)+10] = ss[4] ^= ss[3]; \
|
||||
k[6*(i)+11] = ss[5] ^= ss[4]; \
|
||||
}
|
||||
|
||||
#define kel6(k,i) \
|
||||
{ \
|
||||
k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
|
||||
k[6*(i)+ 7] = ss[1] ^= ss[0]; \
|
||||
k[6*(i)+ 8] = ss[2] ^= ss[1]; \
|
||||
k[6*(i)+ 9] = ss[3] ^= ss[2]; \
|
||||
}
|
||||
|
||||
#define ke8(k,i) \
|
||||
{ \
|
||||
k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
|
||||
k[8*(i)+ 9] = ss[1] ^= ss[0]; \
|
||||
k[8*(i)+10] = ss[2] ^= ss[1]; \
|
||||
k[8*(i)+11] = ss[3] ^= ss[2]; \
|
||||
k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
|
||||
k[8*(i)+13] = ss[5] ^= ss[4]; \
|
||||
k[8*(i)+14] = ss[6] ^= ss[5]; \
|
||||
k[8*(i)+15] = ss[7] ^= ss[6]; \
|
||||
}
|
||||
|
||||
#define kel8(k,i) \
|
||||
{ \
|
||||
k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
|
||||
k[8*(i)+ 9] = ss[1] ^= ss[0]; \
|
||||
k[8*(i)+10] = ss[2] ^= ss[1]; \
|
||||
k[8*(i)+11] = ss[3] ^= ss[2]; \
|
||||
}
|
||||
|
||||
#define kdf4(k,i) \
|
||||
{ \
|
||||
ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
|
||||
ss[1] = ss[1] ^ ss[3]; \
|
||||
ss[2] = ss[2] ^ ss[3]; \
|
||||
ss[3] = ss[3]; \
|
||||
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
|
||||
ss[i % 4] ^= ss[4]; \
|
||||
ss[4] ^= k[4*(i)]; \
|
||||
k[4*(i)+4] = ff(ss[4]); \
|
||||
ss[4] ^= k[4*(i)+1]; \
|
||||
k[4*(i)+5] = ff(ss[4]); \
|
||||
ss[4] ^= k[4*(i)+2]; \
|
||||
k[4*(i)+6] = ff(ss[4]); \
|
||||
ss[4] ^= k[4*(i)+3]; \
|
||||
k[4*(i)+7] = ff(ss[4]); \
|
||||
}
|
||||
|
||||
#define kd4(k,i) \
|
||||
{ \
|
||||
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
|
||||
ss[i % 4] ^= ss[4]; \
|
||||
ss[4] = ff(ss[4]); \
|
||||
k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
|
||||
k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
|
||||
k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
|
||||
k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
|
||||
}
|
||||
|
||||
#define kdl4(k,i) \
|
||||
{ \
|
||||
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
|
||||
ss[i % 4] ^= ss[4]; \
|
||||
k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
|
||||
k[4*(i)+5] = ss[1] ^ ss[3]; \
|
||||
k[4*(i)+6] = ss[0]; \
|
||||
k[4*(i)+7] = ss[1]; \
|
||||
}
|
||||
|
||||
#define kdf6(k,i) \
|
||||
{ \
|
||||
ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
|
||||
k[6*(i)+ 6] = ff(ss[0]); \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[6*(i)+ 7] = ff(ss[1]); \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[6*(i)+ 8] = ff(ss[2]); \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[6*(i)+ 9] = ff(ss[3]); \
|
||||
ss[4] ^= ss[3]; \
|
||||
k[6*(i)+10] = ff(ss[4]); \
|
||||
ss[5] ^= ss[4]; \
|
||||
k[6*(i)+11] = ff(ss[5]); \
|
||||
}
|
||||
|
||||
#define kd6(k,i) \
|
||||
{ \
|
||||
ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
|
||||
ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
|
||||
k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
|
||||
ss[4] ^= ss[3]; \
|
||||
k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
|
||||
ss[5] ^= ss[4]; \
|
||||
k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
|
||||
}
|
||||
|
||||
#define kdl6(k,i) \
|
||||
{ \
|
||||
ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
|
||||
k[6*(i)+ 6] = ss[0]; \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[6*(i)+ 7] = ss[1]; \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[6*(i)+ 8] = ss[2]; \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[6*(i)+ 9] = ss[3]; \
|
||||
}
|
||||
|
||||
#define kdf8(k,i) \
|
||||
{ \
|
||||
ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
|
||||
k[8*(i)+ 8] = ff(ss[0]); \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[8*(i)+ 9] = ff(ss[1]); \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[8*(i)+10] = ff(ss[2]); \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[8*(i)+11] = ff(ss[3]); \
|
||||
ss[4] ^= ls_box(ss[3],0); \
|
||||
k[8*(i)+12] = ff(ss[4]); \
|
||||
ss[5] ^= ss[4]; \
|
||||
k[8*(i)+13] = ff(ss[5]); \
|
||||
ss[6] ^= ss[5]; \
|
||||
k[8*(i)+14] = ff(ss[6]); \
|
||||
ss[7] ^= ss[6]; \
|
||||
k[8*(i)+15] = ff(ss[7]); \
|
||||
}
|
||||
|
||||
#define kd8(k,i) \
|
||||
{ \
|
||||
u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
|
||||
ss[0] ^= __g; \
|
||||
__g = ff(__g); \
|
||||
k[8*(i)+ 8] = __g ^= k[8*(i)]; \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
|
||||
__g = ls_box(ss[3],0); \
|
||||
ss[4] ^= __g; \
|
||||
__g = ff(__g); \
|
||||
k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
|
||||
ss[5] ^= ss[4]; \
|
||||
k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
|
||||
ss[6] ^= ss[5]; \
|
||||
k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
|
||||
ss[7] ^= ss[6]; \
|
||||
k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
|
||||
}
|
||||
|
||||
#define kdl8(k,i) \
|
||||
{ \
|
||||
ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
|
||||
k[8*(i)+ 8] = ss[0]; \
|
||||
ss[1] ^= ss[0]; \
|
||||
k[8*(i)+ 9] = ss[1]; \
|
||||
ss[2] ^= ss[1]; \
|
||||
k[8*(i)+10] = ss[2]; \
|
||||
ss[3] ^= ss[2]; \
|
||||
k[8*(i)+11] = ss[3]; \
|
||||
}
|
||||
|
||||
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
int i;
|
||||
u32 ss[8];
|
||||
struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const __le32 *key = (const __le32 *)in_key;
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
|
||||
/* encryption schedule */
|
||||
|
||||
ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
|
||||
ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
|
||||
ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
|
||||
ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
|
||||
|
||||
switch(key_len) {
|
||||
case 16:
|
||||
for (i = 0; i < 9; i++)
|
||||
ke4(ctx->ekey, i);
|
||||
kel4(ctx->ekey, 9);
|
||||
ctx->rounds = 10;
|
||||
break;
|
||||
|
||||
case 24:
|
||||
ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
|
||||
ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
|
||||
for (i = 0; i < 7; i++)
|
||||
ke6(ctx->ekey, i);
|
||||
kel6(ctx->ekey, 7);
|
||||
ctx->rounds = 12;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
|
||||
ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
|
||||
ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
|
||||
ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
|
||||
for (i = 0; i < 6; i++)
|
||||
ke8(ctx->ekey, i);
|
||||
kel8(ctx->ekey, 6);
|
||||
ctx->rounds = 14;
|
||||
break;
|
||||
|
||||
default:
|
||||
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* decryption schedule */
|
||||
|
||||
ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
|
||||
ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
|
||||
ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
|
||||
ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
|
||||
|
||||
switch (key_len) {
|
||||
case 16:
|
||||
kdf4(ctx->dkey, 0);
|
||||
for (i = 1; i < 9; i++)
|
||||
kd4(ctx->dkey, i);
|
||||
kdl4(ctx->dkey, 9);
|
||||
break;
|
||||
|
||||
case 24:
|
||||
ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
|
||||
ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
|
||||
kdf6(ctx->dkey, 0);
|
||||
for (i = 1; i < 7; i++)
|
||||
kd6(ctx->dkey, i);
|
||||
kdl6(ctx->dkey, 7);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
|
||||
ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
|
||||
ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
|
||||
ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
|
||||
kdf8(ctx->dkey, 0);
|
||||
for (i = 1; i < 6; i++)
|
||||
kd8(ctx->dkey, i);
|
||||
kdl8(ctx->dkey, 6);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_enc_blk(tfm, dst, src);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
aes_dec_blk(tfm, dst, src);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-i586",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
gen_tabs();
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
|
||||
MODULE_ALIAS("aes");
|
335
arch/x86/crypto/twofish-i586-asm_32.S
Normal file
335
arch/x86/crypto/twofish-i586-asm_32.S
Normal file
@@ -0,0 +1,335 @@
|
||||
/***************************************************************************
|
||||
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
|
||||
* *
|
||||
* This program is free software; you can redistribute it and/or modify *
|
||||
* it under the terms of the GNU General Public License as published by *
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* This program is distributed in the hope that it will be useful, *
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||||
* GNU General Public License for more details. *
|
||||
* *
|
||||
* You should have received a copy of the GNU General Public License *
|
||||
* along with this program; if not, write to the *
|
||||
* Free Software Foundation, Inc., *
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
|
||||
***************************************************************************/
|
||||
|
||||
.file "twofish-i586-asm.S"
|
||||
.text
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
/* return adress at 0 */
|
||||
|
||||
#define in_blk 12 /* input byte array address parameter*/
|
||||
#define out_blk 8 /* output byte array address parameter*/
|
||||
#define tfm 4 /* Twofish context structure */
|
||||
|
||||
#define a_offset 0
|
||||
#define b_offset 4
|
||||
#define c_offset 8
|
||||
#define d_offset 12
|
||||
|
||||
/* Structure of the crypto context struct*/
|
||||
|
||||
#define s0 0 /* S0 Array 256 Words each */
|
||||
#define s1 1024 /* S1 Array */
|
||||
#define s2 2048 /* S2 Array */
|
||||
#define s3 3072 /* S3 Array */
|
||||
#define w 4096 /* 8 whitening keys (word) */
|
||||
#define k 4128 /* key 1-32 ( word ) */
|
||||
|
||||
/* define a few register aliases to allow macro substitution */
|
||||
|
||||
#define R0D %eax
|
||||
#define R0B %al
|
||||
#define R0H %ah
|
||||
|
||||
#define R1D %ebx
|
||||
#define R1B %bl
|
||||
#define R1H %bh
|
||||
|
||||
#define R2D %ecx
|
||||
#define R2B %cl
|
||||
#define R2H %ch
|
||||
|
||||
#define R3D %edx
|
||||
#define R3B %dl
|
||||
#define R3H %dh
|
||||
|
||||
|
||||
/* performs input whitening */
|
||||
#define input_whitening(src,context,offset)\
|
||||
xor w+offset(context), src;
|
||||
|
||||
/* performs input whitening */
|
||||
#define output_whitening(src,context,offset)\
|
||||
xor w+16+offset(context), src;
|
||||
|
||||
/*
|
||||
* a input register containing a (rotated 16)
|
||||
* b input register containing b
|
||||
* c input register containing c
|
||||
* d input register containing d (already rol $1)
|
||||
* operations on a and b are interleaved to increase performance
|
||||
*/
|
||||
#define encrypt_round(a,b,c,d,round)\
|
||||
push d ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
mov s1(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
mov s2(%ebp,%edi,4),%esi;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $16, b ## D;\
|
||||
xor s2(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $16, a ## D;\
|
||||
xor s3(%ebp,%edi,4),%esi;\
|
||||
movzx b ## B, %edi;\
|
||||
xor s3(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
xor (%ebp,%edi,4), %esi;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $15, b ## D;\
|
||||
xor (%ebp,%edi,4), d ## D;\
|
||||
movzx a ## H, %edi;\
|
||||
xor s1(%ebp,%edi,4),%esi;\
|
||||
pop %edi;\
|
||||
add d ## D, %esi;\
|
||||
add %esi, d ## D;\
|
||||
add k+round(%ebp), %esi;\
|
||||
xor %esi, c ## D;\
|
||||
rol $15, c ## D;\
|
||||
add k+4+round(%ebp),d ## D;\
|
||||
xor %edi, d ## D;
|
||||
|
||||
/*
|
||||
* a input register containing a (rotated 16)
|
||||
* b input register containing b
|
||||
* c input register containing c
|
||||
* d input register containing d (already rol $1)
|
||||
* operations on a and b are interleaved to increase performance
|
||||
* last round has different rotations for the output preparation
|
||||
*/
|
||||
#define encrypt_last_round(a,b,c,d,round)\
|
||||
push d ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
mov s1(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
mov s2(%ebp,%edi,4),%esi;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $16, b ## D;\
|
||||
xor s2(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $16, a ## D;\
|
||||
xor s3(%ebp,%edi,4),%esi;\
|
||||
movzx b ## B, %edi;\
|
||||
xor s3(%ebp,%edi,4),d ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
xor (%ebp,%edi,4), %esi;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $16, b ## D;\
|
||||
xor (%ebp,%edi,4), d ## D;\
|
||||
movzx a ## H, %edi;\
|
||||
xor s1(%ebp,%edi,4),%esi;\
|
||||
pop %edi;\
|
||||
add d ## D, %esi;\
|
||||
add %esi, d ## D;\
|
||||
add k+round(%ebp), %esi;\
|
||||
xor %esi, c ## D;\
|
||||
ror $1, c ## D;\
|
||||
add k+4+round(%ebp),d ## D;\
|
||||
xor %edi, d ## D;
|
||||
|
||||
/*
|
||||
* a input register containing a
|
||||
* b input register containing b (rotated 16)
|
||||
* c input register containing c
|
||||
* d input register containing d (already rol $1)
|
||||
* operations on a and b are interleaved to increase performance
|
||||
*/
|
||||
#define decrypt_round(a,b,c,d,round)\
|
||||
push c ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
mov (%ebp,%edi,4), c ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
mov s3(%ebp,%edi,4),%esi;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $16, a ## D;\
|
||||
xor s1(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $16, b ## D;\
|
||||
xor (%ebp,%edi,4), %esi;\
|
||||
movzx a ## B, %edi;\
|
||||
xor s2(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
xor s1(%ebp,%edi,4),%esi;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $15, a ## D;\
|
||||
xor s3(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## H, %edi;\
|
||||
xor s2(%ebp,%edi,4),%esi;\
|
||||
pop %edi;\
|
||||
add %esi, c ## D;\
|
||||
add c ## D, %esi;\
|
||||
add k+round(%ebp), c ## D;\
|
||||
xor %edi, c ## D;\
|
||||
add k+4+round(%ebp),%esi;\
|
||||
xor %esi, d ## D;\
|
||||
rol $15, d ## D;
|
||||
|
||||
/*
|
||||
* a input register containing a
|
||||
* b input register containing b (rotated 16)
|
||||
* c input register containing c
|
||||
* d input register containing d (already rol $1)
|
||||
* operations on a and b are interleaved to increase performance
|
||||
* last round has different rotations for the output preparation
|
||||
*/
|
||||
#define decrypt_last_round(a,b,c,d,round)\
|
||||
push c ## D;\
|
||||
movzx a ## B, %edi;\
|
||||
mov (%ebp,%edi,4), c ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
mov s3(%ebp,%edi,4),%esi;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $16, a ## D;\
|
||||
xor s1(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## H, %edi;\
|
||||
ror $16, b ## D;\
|
||||
xor (%ebp,%edi,4), %esi;\
|
||||
movzx a ## B, %edi;\
|
||||
xor s2(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## B, %edi;\
|
||||
xor s1(%ebp,%edi,4),%esi;\
|
||||
movzx a ## H, %edi;\
|
||||
ror $16, a ## D;\
|
||||
xor s3(%ebp,%edi,4),c ## D;\
|
||||
movzx b ## H, %edi;\
|
||||
xor s2(%ebp,%edi,4),%esi;\
|
||||
pop %edi;\
|
||||
add %esi, c ## D;\
|
||||
add c ## D, %esi;\
|
||||
add k+round(%ebp), c ## D;\
|
||||
xor %edi, c ## D;\
|
||||
add k+4+round(%ebp),%esi;\
|
||||
xor %esi, d ## D;\
|
||||
ror $1, d ## D;
|
||||
|
||||
.align 4
|
||||
.global twofish_enc_blk
|
||||
.global twofish_dec_blk
|
||||
|
||||
twofish_enc_blk:
|
||||
push %ebp /* save registers according to calling convention*/
|
||||
push %ebx
|
||||
push %esi
|
||||
push %edi
|
||||
|
||||
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
|
||||
add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
|
||||
mov in_blk+16(%esp),%edi /* input adress in edi */
|
||||
|
||||
mov (%edi), %eax
|
||||
mov b_offset(%edi), %ebx
|
||||
mov c_offset(%edi), %ecx
|
||||
mov d_offset(%edi), %edx
|
||||
input_whitening(%eax,%ebp,a_offset)
|
||||
ror $16, %eax
|
||||
input_whitening(%ebx,%ebp,b_offset)
|
||||
input_whitening(%ecx,%ebp,c_offset)
|
||||
input_whitening(%edx,%ebp,d_offset)
|
||||
rol $1, %edx
|
||||
|
||||
encrypt_round(R0,R1,R2,R3,0);
|
||||
encrypt_round(R2,R3,R0,R1,8);
|
||||
encrypt_round(R0,R1,R2,R3,2*8);
|
||||
encrypt_round(R2,R3,R0,R1,3*8);
|
||||
encrypt_round(R0,R1,R2,R3,4*8);
|
||||
encrypt_round(R2,R3,R0,R1,5*8);
|
||||
encrypt_round(R0,R1,R2,R3,6*8);
|
||||
encrypt_round(R2,R3,R0,R1,7*8);
|
||||
encrypt_round(R0,R1,R2,R3,8*8);
|
||||
encrypt_round(R2,R3,R0,R1,9*8);
|
||||
encrypt_round(R0,R1,R2,R3,10*8);
|
||||
encrypt_round(R2,R3,R0,R1,11*8);
|
||||
encrypt_round(R0,R1,R2,R3,12*8);
|
||||
encrypt_round(R2,R3,R0,R1,13*8);
|
||||
encrypt_round(R0,R1,R2,R3,14*8);
|
||||
encrypt_last_round(R2,R3,R0,R1,15*8);
|
||||
|
||||
output_whitening(%eax,%ebp,c_offset)
|
||||
output_whitening(%ebx,%ebp,d_offset)
|
||||
output_whitening(%ecx,%ebp,a_offset)
|
||||
output_whitening(%edx,%ebp,b_offset)
|
||||
mov out_blk+16(%esp),%edi;
|
||||
mov %eax, c_offset(%edi)
|
||||
mov %ebx, d_offset(%edi)
|
||||
mov %ecx, (%edi)
|
||||
mov %edx, b_offset(%edi)
|
||||
|
||||
pop %edi
|
||||
pop %esi
|
||||
pop %ebx
|
||||
pop %ebp
|
||||
mov $1, %eax
|
||||
ret
|
||||
|
||||
twofish_dec_blk:
|
||||
push %ebp /* save registers according to calling convention*/
|
||||
push %ebx
|
||||
push %esi
|
||||
push %edi
|
||||
|
||||
|
||||
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
|
||||
add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
|
||||
mov in_blk+16(%esp),%edi /* input adress in edi */
|
||||
|
||||
mov (%edi), %eax
|
||||
mov b_offset(%edi), %ebx
|
||||
mov c_offset(%edi), %ecx
|
||||
mov d_offset(%edi), %edx
|
||||
output_whitening(%eax,%ebp,a_offset)
|
||||
output_whitening(%ebx,%ebp,b_offset)
|
||||
ror $16, %ebx
|
||||
output_whitening(%ecx,%ebp,c_offset)
|
||||
output_whitening(%edx,%ebp,d_offset)
|
||||
rol $1, %ecx
|
||||
|
||||
decrypt_round(R0,R1,R2,R3,15*8);
|
||||
decrypt_round(R2,R3,R0,R1,14*8);
|
||||
decrypt_round(R0,R1,R2,R3,13*8);
|
||||
decrypt_round(R2,R3,R0,R1,12*8);
|
||||
decrypt_round(R0,R1,R2,R3,11*8);
|
||||
decrypt_round(R2,R3,R0,R1,10*8);
|
||||
decrypt_round(R0,R1,R2,R3,9*8);
|
||||
decrypt_round(R2,R3,R0,R1,8*8);
|
||||
decrypt_round(R0,R1,R2,R3,7*8);
|
||||
decrypt_round(R2,R3,R0,R1,6*8);
|
||||
decrypt_round(R0,R1,R2,R3,5*8);
|
||||
decrypt_round(R2,R3,R0,R1,4*8);
|
||||
decrypt_round(R0,R1,R2,R3,3*8);
|
||||
decrypt_round(R2,R3,R0,R1,2*8);
|
||||
decrypt_round(R0,R1,R2,R3,1*8);
|
||||
decrypt_last_round(R2,R3,R0,R1,0);
|
||||
|
||||
input_whitening(%eax,%ebp,c_offset)
|
||||
input_whitening(%ebx,%ebp,d_offset)
|
||||
input_whitening(%ecx,%ebp,a_offset)
|
||||
input_whitening(%edx,%ebp,b_offset)
|
||||
mov out_blk+16(%esp),%edi;
|
||||
mov %eax, c_offset(%edi)
|
||||
mov %ebx, d_offset(%edi)
|
||||
mov %ecx, (%edi)
|
||||
mov %edx, b_offset(%edi)
|
||||
|
||||
pop %edi
|
||||
pop %esi
|
||||
pop %ebx
|
||||
pop %ebp
|
||||
mov $1, %eax
|
||||
ret
|
97
arch/x86/crypto/twofish_32.c
Normal file
97
arch/x86/crypto/twofish_32.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Glue Code for optimized 586 assembler version of TWOFISH
|
||||
*
|
||||
* Originally Twofish for GPG
|
||||
* By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
|
||||
* 256-bit key length added March 20, 1999
|
||||
* Some modifications to reduce the text size by Werner Koch, April, 1998
|
||||
* Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
|
||||
* Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
|
||||
*
|
||||
* The original author has disclaimed all copyright interest in this
|
||||
* code and thus put it in the public domain. The subsequent authors
|
||||
* have put this under the GNU General Public License.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
* USA
|
||||
*
|
||||
* This code is a "clean room" implementation, written from the paper
|
||||
* _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
|
||||
* Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
|
||||
* through http://www.counterpane.com/twofish.html
|
||||
*
|
||||
* For background information on multiplication in finite fields, used for
|
||||
* the matrix operations in the key schedule, see the book _Contemporary
|
||||
* Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
|
||||
* Third Edition.
|
||||
*/
|
||||
|
||||
#include <crypto/twofish.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
|
||||
asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
|
||||
asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
|
||||
|
||||
static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
twofish_enc_blk(tfm, dst, src);
|
||||
}
|
||||
|
||||
static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
twofish_dec_blk(tfm, dst, src);
|
||||
}
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "twofish",
|
||||
.cra_driver_name = "twofish-i586",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = TF_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct twofish_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_list = LIST_HEAD_INIT(alg.cra_list),
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = TF_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = TF_MAX_KEY_SIZE,
|
||||
.cia_setkey = twofish_setkey,
|
||||
.cia_encrypt = twofish_encrypt,
|
||||
.cia_decrypt = twofish_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
return crypto_register_alg(&alg);
|
||||
}
|
||||
|
||||
static void __exit fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
}
|
||||
|
||||
module_init(init);
|
||||
module_exit(fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
|
||||
MODULE_ALIAS("twofish");
|
Reference in New Issue
Block a user