i386: move crypto

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Thomas Gleixner
2007-10-11 11:16:21 +02:00
父節點 af49d41e8c
當前提交 9c20194215
共有 8 個文件被更改,包括 4 次插入4 次删除

5
arch/x86/crypto/Makefile Normal file
查看文件

@@ -0,0 +1,5 @@
ifeq ($(CONFIG_X86_32),y)
include ${srctree}/arch/x86/crypto/Makefile_32
else
include ${srctree}/arch/x86_64/crypto/Makefile_64
endif

查看文件

@@ -0,0 +1,12 @@
#
# x86/crypto/Makefile
#
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
aes-i586-y := aes-i586-asm_32.o aes_32.o
twofish-i586-y := twofish-i586-asm_32.o twofish_32.o

查看文件

@@ -0,0 +1,373 @@
// -------------------------------------------------------------------------
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
// All rights reserved.
//
// LICENSE TERMS
//
// The free distribution and use of this software in both source and binary
// form is allowed (with or without changes) provided that:
//
// 1. distributions of this source code include the above copyright
// notice, this list of conditions and the following disclaimer//
//
// 2. distributions in binary form include the above copyright
// notice, this list of conditions and the following disclaimer
// in the documentation and/or other associated materials//
//
// 3. the copyright holder's name is not used to endorse products
// built using this software without specific written permission.
//
//
// ALTERNATIVELY, provided that this notice is retained in full, this product
// may be distributed under the terms of the GNU General Public License (GPL),
// in which case the provisions of the GPL apply INSTEAD OF those given above.
//
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
// DISCLAIMER
//
// This software is provided 'as is' with no explicit or implied warranties
// in respect of its properties including, but not limited to, correctness
// and fitness for purpose.
// -------------------------------------------------------------------------
// Issue Date: 29/07/2002
.file "aes-i586-asm.S"
.text
#include <asm/asm-offsets.h>
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
/* offsets to parameters with one register pushed onto stack */
#define tfm 8
#define out_blk 12
#define in_blk 16
/* offsets in crypto_tfm structure */
#define ekey (crypto_tfm_ctx_offset + 0)
#define nrnd (crypto_tfm_ctx_offset + 256)
#define dkey (crypto_tfm_ctx_offset + 260)
// register mapping for encrypt and decrypt subroutines
#define r0 eax
#define r1 ebx
#define r2 ecx
#define r3 edx
#define r4 esi
#define r5 edi
#define eaxl al
#define eaxh ah
#define ebxl bl
#define ebxh bh
#define ecxl cl
#define ecxh ch
#define edxl dl
#define edxh dh
#define _h(reg) reg##h
#define h(reg) _h(reg)
#define _l(reg) reg##l
#define l(reg) _l(reg)
// This macro takes a 32-bit word representing a column and uses
// each of its four bytes to index into four tables of 256 32-bit
// words to obtain values that are then xored into the appropriate
// output registers r0, r1, r4 or r5.
// Parameters:
// table table base address
// %1 out_state[0]
// %2 out_state[1]
// %3 out_state[2]
// %4 out_state[3]
// idx input register for the round (destroyed)
// tmp scratch register for the round
// sched key schedule
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
movzx %l(idx),%tmp; \
xor table(,%tmp,4),%a1; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+2*tlen(,%tmp,4),%a3; \
xor table+3*tlen(,%idx,4),%a4;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 12 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 4 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// initialise output registers from the key schedule
// NB1: original value of a3 is in idx on exit
// NB2: original values of a1,a2,a4 aren't used
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
mov 0 sched,%a1; \
movzx %l(idx),%tmp; \
mov 4 sched,%a2; \
xor table(,%tmp,4),%a1; \
mov 12 sched,%a4; \
movzx %h(idx),%tmp; \
shr $16,%idx; \
xor table+tlen(,%tmp,4),%a2; \
movzx %l(idx),%tmp; \
movzx %h(idx),%idx; \
xor table+3*tlen(,%idx,4),%a4; \
mov %a3,%idx; \
mov 8 sched,%a3; \
xor table+2*tlen(,%tmp,4),%a3;
// original Gladman had conditional saves to MMX regs.
#define save(a1, a2) \
mov %a2,4*a1(%esp)
#define restore(a1, a2) \
mov 4*a2(%esp),%a1
// These macros perform a forward encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage.
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define fwd_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define fwd_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
// These macros performs an inverse encryption cycle. They are entered with
// the first previous round column values in r0,r1,r4,r5 and
// exit with the final values in the same registers, using stack
// for temporary storage
// round column values
// on entry: r0,r1,r4,r5
// on exit: r2,r1,r4,r5
#define inv_rnd1(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
restore(r0,0); \
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
restore(r0,1); \
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
// round column values
// on entry: r2,r1,r4,r5
// on exit: r0,r1,r4,r5
#define inv_rnd2(arg, table) \
save (0,r1); \
save (1,r5); \
\
/* compute new column values */ \
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
restore(r2,0); \
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
restore(r2,1); \
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
.extern ft_tab
.extern fl_tab
.align 4
aes_enc_blk:
push %ebp
mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov nrnd(%ebp),%r3 // number of rounds
push %edi
#if ekey != 0
lea ekey(%ebp),%ebp // key pointer
#endif
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
add $16,%ebp // increment to next round key
cmp $12,%r3
jb 4f // 10 rounds for 128-bit key
lea 32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea 32(%ebp),%ebp
2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key
fwd_rnd2( -48(%ebp) ,ft_tab)
3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key
fwd_rnd2( -16(%ebp) ,ft_tab)
4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
fwd_rnd2( +16(%ebp) ,ft_tab)
fwd_rnd1( +32(%ebp) ,ft_tab)
fwd_rnd2( +48(%ebp) ,ft_tab)
fwd_rnd1( +64(%ebp) ,ft_tab)
fwd_rnd2( +80(%ebp) ,ft_tab)
fwd_rnd1( +96(%ebp) ,ft_tab)
fwd_rnd2(+112(%ebp) ,ft_tab)
fwd_rnd1(+128(%ebp) ,ft_tab)
fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
mov $1,%eax
ret
// AES (Rijndael) Decryption Subroutine
/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
.extern it_tab
.extern il_tab
.align 4
aes_dec_blk:
push %ebp
mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
1: push %ebx
mov in_blk+4(%esp),%r2
push %esi
mov nrnd(%ebp),%r3 // number of rounds
push %edi
#if dkey != 0
lea dkey(%ebp),%ebp // key pointer
#endif
mov %r3,%r0
shl $4,%r0
add %r0,%ebp
// input four columns and xor in first round key
mov (%r2),%r0
mov 4(%r2),%r1
mov 8(%r2),%r4
mov 12(%r2),%r5
xor (%ebp),%r0
xor 4(%ebp),%r1
xor 8(%ebp),%r4
xor 12(%ebp),%r5
sub $8,%esp // space for register saves on stack
sub $16,%ebp // increment to next round key
cmp $12,%r3
jb 4f // 10 rounds for 128-bit key
lea -32(%ebp),%ebp
je 3f // 12 rounds for 192-bit key
lea -32(%ebp),%ebp
2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key
inv_rnd2( +48(%ebp), it_tab)
3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key
inv_rnd2( +16(%ebp), it_tab)
4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
inv_rnd2( -16(%ebp), it_tab)
inv_rnd1( -32(%ebp), it_tab)
inv_rnd2( -48(%ebp), it_tab)
inv_rnd1( -64(%ebp), it_tab)
inv_rnd2( -80(%ebp), it_tab)
inv_rnd1( -96(%ebp), it_tab)
inv_rnd2(-112(%ebp), it_tab)
inv_rnd1(-128(%ebp), it_tab)
inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
// move final values to the output array. CAUTION: the
// order of these assigns rely on the register mappings
add $8,%esp
mov out_blk+12(%esp),%ebp
mov %r5,12(%ebp)
pop %edi
mov %r4,8(%ebp)
pop %esi
mov %r1,4(%ebp)
pop %ebx
mov %r0,(%ebp)
pop %ebp
mov $1,%eax
ret

515
arch/x86/crypto/aes_32.c Normal file
查看文件

@@ -0,0 +1,515 @@
/*
*
* Glue Code for optimized 586 assembler version of AES
*
* Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
* All rights reserved.
*
* LICENSE TERMS
*
* The free distribution and use of this software in both source and binary
* form is allowed (with or without changes) provided that:
*
* 1. distributions of this source code include the above copyright
* notice, this list of conditions and the following disclaimer;
*
* 2. distributions in binary form include the above copyright
* notice, this list of conditions and the following disclaimer
* in the documentation and/or other associated materials;
*
* 3. the copyright holder's name is not used to endorse products
* built using this software without specific written permission.
*
* ALTERNATIVELY, provided that this notice is retained in full, this product
* may be distributed under the terms of the GNU General Public License (GPL),
* in which case the provisions of the GPL apply INSTEAD OF those given above.
*
* DISCLAIMER
*
* This software is provided 'as is' with no explicit or implied warranties
* in respect of its properties, including, but not limited to, correctness
* and/or fitness for purpose.
*
* Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
* 2.5 API).
* Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
* Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
*
*/
#include <asm/byteorder.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/linkage.h>
asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
#define AES_MIN_KEY_SIZE 16
#define AES_MAX_KEY_SIZE 32
#define AES_BLOCK_SIZE 16
#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
#define RC_LENGTH 29
struct aes_ctx {
u32 ekey[AES_KS_LENGTH];
u32 rounds;
u32 dkey[AES_KS_LENGTH];
};
#define WPOLY 0x011b
#define bytes2word(b0, b1, b2, b3) \
(((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
/* define the finite field multiplies required for Rijndael */
#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
#define fi(x) ((x) ? pow[255 - log[x]]: 0)
static inline u32 upr(u32 x, int n)
{
return (x << 8 * n) | (x >> (32 - 8 * n));
}
static inline u8 bval(u32 x, int n)
{
return x >> 8 * n;
}
/* The forward and inverse affine transformations used in the S-box */
#define fwd_affine(x) \
(w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
#define inv_affine(x) \
(w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
static u32 rcon_tab[RC_LENGTH];
u32 ft_tab[4][256];
u32 fl_tab[4][256];
static u32 im_tab[4][256];
u32 il_tab[4][256];
u32 it_tab[4][256];
static void gen_tabs(void)
{
u32 i, w;
u8 pow[512], log[256];
/*
* log and power tables for GF(2^8) finite field with
* WPOLY as modular polynomial - the simplest primitive
* root is 0x03, used here to generate the tables.
*/
i = 0; w = 1;
do {
pow[i] = (u8)w;
pow[i + 255] = (u8)w;
log[w] = (u8)i++;
w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
} while (w != 1);
for(i = 0, w = 1; i < RC_LENGTH; ++i) {
rcon_tab[i] = bytes2word(w, 0, 0, 0);
w = f2(w);
}
for(i = 0; i < 256; ++i) {
u8 b;
b = fwd_affine(fi((u8)i));
w = bytes2word(f2(b), b, b, f3(b));
/* tables for a normal encryption round */
ft_tab[0][i] = w;
ft_tab[1][i] = upr(w, 1);
ft_tab[2][i] = upr(w, 2);
ft_tab[3][i] = upr(w, 3);
w = bytes2word(b, 0, 0, 0);
/*
* tables for last encryption round
* (may also be used in the key schedule)
*/
fl_tab[0][i] = w;
fl_tab[1][i] = upr(w, 1);
fl_tab[2][i] = upr(w, 2);
fl_tab[3][i] = upr(w, 3);
b = fi(inv_affine((u8)i));
w = bytes2word(fe(b), f9(b), fd(b), fb(b));
/* tables for the inverse mix column operation */
im_tab[0][b] = w;
im_tab[1][b] = upr(w, 1);
im_tab[2][b] = upr(w, 2);
im_tab[3][b] = upr(w, 3);
/* tables for a normal decryption round */
it_tab[0][i] = w;
it_tab[1][i] = upr(w,1);
it_tab[2][i] = upr(w,2);
it_tab[3][i] = upr(w,3);
w = bytes2word(b, 0, 0, 0);
/* tables for last decryption round */
il_tab[0][i] = w;
il_tab[1][i] = upr(w,1);
il_tab[2][i] = upr(w,2);
il_tab[3][i] = upr(w,3);
}
}
#define four_tables(x,tab,vf,rf,c) \
( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
tab[3][bval(vf(x,3,c),rf(3,c))] \
)
#define vf1(x,r,c) (x)
#define rf1(r,c) (r)
#define rf2(r,c) ((r-c)&3)
#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
#define ff(x) inv_mcol(x)
#define ke4(k,i) \
{ \
k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
k[4*(i)+5] = ss[1] ^= ss[0]; \
k[4*(i)+6] = ss[2] ^= ss[1]; \
k[4*(i)+7] = ss[3] ^= ss[2]; \
}
#define kel4(k,i) \
{ \
k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
k[4*(i)+5] = ss[1] ^= ss[0]; \
k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
}
#define ke6(k,i) \
{ \
k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
k[6*(i)+ 7] = ss[1] ^= ss[0]; \
k[6*(i)+ 8] = ss[2] ^= ss[1]; \
k[6*(i)+ 9] = ss[3] ^= ss[2]; \
k[6*(i)+10] = ss[4] ^= ss[3]; \
k[6*(i)+11] = ss[5] ^= ss[4]; \
}
#define kel6(k,i) \
{ \
k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
k[6*(i)+ 7] = ss[1] ^= ss[0]; \
k[6*(i)+ 8] = ss[2] ^= ss[1]; \
k[6*(i)+ 9] = ss[3] ^= ss[2]; \
}
#define ke8(k,i) \
{ \
k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
k[8*(i)+ 9] = ss[1] ^= ss[0]; \
k[8*(i)+10] = ss[2] ^= ss[1]; \
k[8*(i)+11] = ss[3] ^= ss[2]; \
k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
k[8*(i)+13] = ss[5] ^= ss[4]; \
k[8*(i)+14] = ss[6] ^= ss[5]; \
k[8*(i)+15] = ss[7] ^= ss[6]; \
}
#define kel8(k,i) \
{ \
k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
k[8*(i)+ 9] = ss[1] ^= ss[0]; \
k[8*(i)+10] = ss[2] ^= ss[1]; \
k[8*(i)+11] = ss[3] ^= ss[2]; \
}
#define kdf4(k,i) \
{ \
ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
ss[1] = ss[1] ^ ss[3]; \
ss[2] = ss[2] ^ ss[3]; \
ss[3] = ss[3]; \
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
ss[i % 4] ^= ss[4]; \
ss[4] ^= k[4*(i)]; \
k[4*(i)+4] = ff(ss[4]); \
ss[4] ^= k[4*(i)+1]; \
k[4*(i)+5] = ff(ss[4]); \
ss[4] ^= k[4*(i)+2]; \
k[4*(i)+6] = ff(ss[4]); \
ss[4] ^= k[4*(i)+3]; \
k[4*(i)+7] = ff(ss[4]); \
}
#define kd4(k,i) \
{ \
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
ss[i % 4] ^= ss[4]; \
ss[4] = ff(ss[4]); \
k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
}
#define kdl4(k,i) \
{ \
ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
ss[i % 4] ^= ss[4]; \
k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
k[4*(i)+5] = ss[1] ^ ss[3]; \
k[4*(i)+6] = ss[0]; \
k[4*(i)+7] = ss[1]; \
}
#define kdf6(k,i) \
{ \
ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
k[6*(i)+ 6] = ff(ss[0]); \
ss[1] ^= ss[0]; \
k[6*(i)+ 7] = ff(ss[1]); \
ss[2] ^= ss[1]; \
k[6*(i)+ 8] = ff(ss[2]); \
ss[3] ^= ss[2]; \
k[6*(i)+ 9] = ff(ss[3]); \
ss[4] ^= ss[3]; \
k[6*(i)+10] = ff(ss[4]); \
ss[5] ^= ss[4]; \
k[6*(i)+11] = ff(ss[5]); \
}
#define kd6(k,i) \
{ \
ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
ss[1] ^= ss[0]; \
k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
ss[2] ^= ss[1]; \
k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
ss[3] ^= ss[2]; \
k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
ss[4] ^= ss[3]; \
k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
ss[5] ^= ss[4]; \
k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
}
#define kdl6(k,i) \
{ \
ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
k[6*(i)+ 6] = ss[0]; \
ss[1] ^= ss[0]; \
k[6*(i)+ 7] = ss[1]; \
ss[2] ^= ss[1]; \
k[6*(i)+ 8] = ss[2]; \
ss[3] ^= ss[2]; \
k[6*(i)+ 9] = ss[3]; \
}
#define kdf8(k,i) \
{ \
ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
k[8*(i)+ 8] = ff(ss[0]); \
ss[1] ^= ss[0]; \
k[8*(i)+ 9] = ff(ss[1]); \
ss[2] ^= ss[1]; \
k[8*(i)+10] = ff(ss[2]); \
ss[3] ^= ss[2]; \
k[8*(i)+11] = ff(ss[3]); \
ss[4] ^= ls_box(ss[3],0); \
k[8*(i)+12] = ff(ss[4]); \
ss[5] ^= ss[4]; \
k[8*(i)+13] = ff(ss[5]); \
ss[6] ^= ss[5]; \
k[8*(i)+14] = ff(ss[6]); \
ss[7] ^= ss[6]; \
k[8*(i)+15] = ff(ss[7]); \
}
#define kd8(k,i) \
{ \
u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
ss[0] ^= __g; \
__g = ff(__g); \
k[8*(i)+ 8] = __g ^= k[8*(i)]; \
ss[1] ^= ss[0]; \
k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
ss[2] ^= ss[1]; \
k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
ss[3] ^= ss[2]; \
k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
__g = ls_box(ss[3],0); \
ss[4] ^= __g; \
__g = ff(__g); \
k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
ss[5] ^= ss[4]; \
k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
ss[6] ^= ss[5]; \
k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
ss[7] ^= ss[6]; \
k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
}
#define kdl8(k,i) \
{ \
ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
k[8*(i)+ 8] = ss[0]; \
ss[1] ^= ss[0]; \
k[8*(i)+ 9] = ss[1]; \
ss[2] ^= ss[1]; \
k[8*(i)+10] = ss[2]; \
ss[3] ^= ss[2]; \
k[8*(i)+11] = ss[3]; \
}
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
int i;
u32 ss[8];
struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
const __le32 *key = (const __le32 *)in_key;
u32 *flags = &tfm->crt_flags;
/* encryption schedule */
ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
switch(key_len) {
case 16:
for (i = 0; i < 9; i++)
ke4(ctx->ekey, i);
kel4(ctx->ekey, 9);
ctx->rounds = 10;
break;
case 24:
ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
for (i = 0; i < 7; i++)
ke6(ctx->ekey, i);
kel6(ctx->ekey, 7);
ctx->rounds = 12;
break;
case 32:
ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
for (i = 0; i < 6; i++)
ke8(ctx->ekey, i);
kel8(ctx->ekey, 6);
ctx->rounds = 14;
break;
default:
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
/* decryption schedule */
ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
switch (key_len) {
case 16:
kdf4(ctx->dkey, 0);
for (i = 1; i < 9; i++)
kd4(ctx->dkey, i);
kdl4(ctx->dkey, 9);
break;
case 24:
ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
kdf6(ctx->dkey, 0);
for (i = 1; i < 7; i++)
kd6(ctx->dkey, i);
kdl6(ctx->dkey, 7);
break;
case 32:
ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
kdf8(ctx->dkey, 0);
for (i = 1; i < 6; i++)
kd8(ctx->dkey, i);
kdl8(ctx->dkey, 6);
break;
}
return 0;
}
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_enc_blk(tfm, dst, src);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
aes_dec_blk(tfm, dst, src);
}
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-i586",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aes_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
};
static int __init aes_init(void)
{
gen_tabs();
return crypto_register_alg(&aes_alg);
}
static void __exit aes_fini(void)
{
crypto_unregister_alg(&aes_alg);
}
module_init(aes_init);
module_exit(aes_fini);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
MODULE_ALIAS("aes");

查看文件

@@ -0,0 +1,335 @@
/***************************************************************************
* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
.file "twofish-i586-asm.S"
.text
#include <asm/asm-offsets.h>
/* return adress at 0 */
#define in_blk 12 /* input byte array address parameter*/
#define out_blk 8 /* output byte array address parameter*/
#define tfm 4 /* Twofish context structure */
#define a_offset 0
#define b_offset 4
#define c_offset 8
#define d_offset 12
/* Structure of the crypto context struct*/
#define s0 0 /* S0 Array 256 Words each */
#define s1 1024 /* S1 Array */
#define s2 2048 /* S2 Array */
#define s3 3072 /* S3 Array */
#define w 4096 /* 8 whitening keys (word) */
#define k 4128 /* key 1-32 ( word ) */
/* define a few register aliases to allow macro substitution */
#define R0D %eax
#define R0B %al
#define R0H %ah
#define R1D %ebx
#define R1B %bl
#define R1H %bh
#define R2D %ecx
#define R2B %cl
#define R2H %ch
#define R3D %edx
#define R3B %dl
#define R3H %dh
/* performs input whitening */
#define input_whitening(src,context,offset)\
xor w+offset(context), src;
/* performs input whitening */
#define output_whitening(src,context,offset)\
xor w+16+offset(context), src;
/*
* a input register containing a (rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
*/
#define encrypt_round(a,b,c,d,round)\
push d ## D;\
movzx b ## B, %edi;\
mov s1(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
mov s2(%ebp,%edi,4),%esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%ebp,%edi,4),d ## D;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),%esi;\
movzx b ## B, %edi;\
xor s3(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
xor (%ebp,%edi,4), %esi;\
movzx b ## H, %edi;\
ror $15, b ## D;\
xor (%ebp,%edi,4), d ## D;\
movzx a ## H, %edi;\
xor s1(%ebp,%edi,4),%esi;\
pop %edi;\
add d ## D, %esi;\
add %esi, d ## D;\
add k+round(%ebp), %esi;\
xor %esi, c ## D;\
rol $15, c ## D;\
add k+4+round(%ebp),d ## D;\
xor %edi, d ## D;
/*
* a input register containing a (rotated 16)
* b input register containing b
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
* last round has different rotations for the output preparation
*/
#define encrypt_last_round(a,b,c,d,round)\
push d ## D;\
movzx b ## B, %edi;\
mov s1(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
mov s2(%ebp,%edi,4),%esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor s2(%ebp,%edi,4),d ## D;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),%esi;\
movzx b ## B, %edi;\
xor s3(%ebp,%edi,4),d ## D;\
movzx a ## B, %edi;\
xor (%ebp,%edi,4), %esi;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), d ## D;\
movzx a ## H, %edi;\
xor s1(%ebp,%edi,4),%esi;\
pop %edi;\
add d ## D, %esi;\
add %esi, d ## D;\
add k+round(%ebp), %esi;\
xor %esi, c ## D;\
ror $1, c ## D;\
add k+4+round(%ebp),d ## D;\
xor %edi, d ## D;
/*
* a input register containing a
* b input register containing b (rotated 16)
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
*/
#define decrypt_round(a,b,c,d,round)\
push c ## D;\
movzx a ## B, %edi;\
mov (%ebp,%edi,4), c ## D;\
movzx b ## B, %edi;\
mov s3(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s1(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), %esi;\
movzx a ## B, %edi;\
xor s2(%ebp,%edi,4),c ## D;\
movzx b ## B, %edi;\
xor s1(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $15, a ## D;\
xor s3(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
xor s2(%ebp,%edi,4),%esi;\
pop %edi;\
add %esi, c ## D;\
add c ## D, %esi;\
add k+round(%ebp), c ## D;\
xor %edi, c ## D;\
add k+4+round(%ebp),%esi;\
xor %esi, d ## D;\
rol $15, d ## D;
/*
* a input register containing a
* b input register containing b (rotated 16)
* c input register containing c
* d input register containing d (already rol $1)
* operations on a and b are interleaved to increase performance
* last round has different rotations for the output preparation
*/
#define decrypt_last_round(a,b,c,d,round)\
push c ## D;\
movzx a ## B, %edi;\
mov (%ebp,%edi,4), c ## D;\
movzx b ## B, %edi;\
mov s3(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s1(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
ror $16, b ## D;\
xor (%ebp,%edi,4), %esi;\
movzx a ## B, %edi;\
xor s2(%ebp,%edi,4),c ## D;\
movzx b ## B, %edi;\
xor s1(%ebp,%edi,4),%esi;\
movzx a ## H, %edi;\
ror $16, a ## D;\
xor s3(%ebp,%edi,4),c ## D;\
movzx b ## H, %edi;\
xor s2(%ebp,%edi,4),%esi;\
pop %edi;\
add %esi, c ## D;\
add c ## D, %esi;\
add k+round(%ebp), c ## D;\
xor %edi, c ## D;\
add k+4+round(%ebp),%esi;\
xor %esi, d ## D;\
ror $1, d ## D;
.align 4
.global twofish_enc_blk
.global twofish_dec_blk
twofish_enc_blk:
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
push %edi
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
mov in_blk+16(%esp),%edi /* input adress in edi */
mov (%edi), %eax
mov b_offset(%edi), %ebx
mov c_offset(%edi), %ecx
mov d_offset(%edi), %edx
input_whitening(%eax,%ebp,a_offset)
ror $16, %eax
input_whitening(%ebx,%ebp,b_offset)
input_whitening(%ecx,%ebp,c_offset)
input_whitening(%edx,%ebp,d_offset)
rol $1, %edx
encrypt_round(R0,R1,R2,R3,0);
encrypt_round(R2,R3,R0,R1,8);
encrypt_round(R0,R1,R2,R3,2*8);
encrypt_round(R2,R3,R0,R1,3*8);
encrypt_round(R0,R1,R2,R3,4*8);
encrypt_round(R2,R3,R0,R1,5*8);
encrypt_round(R0,R1,R2,R3,6*8);
encrypt_round(R2,R3,R0,R1,7*8);
encrypt_round(R0,R1,R2,R3,8*8);
encrypt_round(R2,R3,R0,R1,9*8);
encrypt_round(R0,R1,R2,R3,10*8);
encrypt_round(R2,R3,R0,R1,11*8);
encrypt_round(R0,R1,R2,R3,12*8);
encrypt_round(R2,R3,R0,R1,13*8);
encrypt_round(R0,R1,R2,R3,14*8);
encrypt_last_round(R2,R3,R0,R1,15*8);
output_whitening(%eax,%ebp,c_offset)
output_whitening(%ebx,%ebp,d_offset)
output_whitening(%ecx,%ebp,a_offset)
output_whitening(%edx,%ebp,b_offset)
mov out_blk+16(%esp),%edi;
mov %eax, c_offset(%edi)
mov %ebx, d_offset(%edi)
mov %ecx, (%edi)
mov %edx, b_offset(%edi)
pop %edi
pop %esi
pop %ebx
pop %ebp
mov $1, %eax
ret
twofish_dec_blk:
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
push %edi
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
mov in_blk+16(%esp),%edi /* input adress in edi */
mov (%edi), %eax
mov b_offset(%edi), %ebx
mov c_offset(%edi), %ecx
mov d_offset(%edi), %edx
output_whitening(%eax,%ebp,a_offset)
output_whitening(%ebx,%ebp,b_offset)
ror $16, %ebx
output_whitening(%ecx,%ebp,c_offset)
output_whitening(%edx,%ebp,d_offset)
rol $1, %ecx
decrypt_round(R0,R1,R2,R3,15*8);
decrypt_round(R2,R3,R0,R1,14*8);
decrypt_round(R0,R1,R2,R3,13*8);
decrypt_round(R2,R3,R0,R1,12*8);
decrypt_round(R0,R1,R2,R3,11*8);
decrypt_round(R2,R3,R0,R1,10*8);
decrypt_round(R0,R1,R2,R3,9*8);
decrypt_round(R2,R3,R0,R1,8*8);
decrypt_round(R0,R1,R2,R3,7*8);
decrypt_round(R2,R3,R0,R1,6*8);
decrypt_round(R0,R1,R2,R3,5*8);
decrypt_round(R2,R3,R0,R1,4*8);
decrypt_round(R0,R1,R2,R3,3*8);
decrypt_round(R2,R3,R0,R1,2*8);
decrypt_round(R0,R1,R2,R3,1*8);
decrypt_last_round(R2,R3,R0,R1,0);
input_whitening(%eax,%ebp,c_offset)
input_whitening(%ebx,%ebp,d_offset)
input_whitening(%ecx,%ebp,a_offset)
input_whitening(%edx,%ebp,b_offset)
mov out_blk+16(%esp),%edi;
mov %eax, c_offset(%edi)
mov %ebx, d_offset(%edi)
mov %ecx, (%edi)
mov %edx, b_offset(%edi)
pop %edi
pop %esi
pop %ebx
pop %ebp
mov $1, %eax
ret

查看文件

@@ -0,0 +1,97 @@
/*
* Glue Code for optimized 586 assembler version of TWOFISH
*
* Originally Twofish for GPG
* By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
* 256-bit key length added March 20, 1999
* Some modifications to reduce the text size by Werner Koch, April, 1998
* Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
* Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
*
* The original author has disclaimed all copyright interest in this
* code and thus put it in the public domain. The subsequent authors
* have put this under the GNU General Public License.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
* This code is a "clean room" implementation, written from the paper
* _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
* Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
* through http://www.counterpane.com/twofish.html
*
* For background information on multiplication in finite fields, used for
* the matrix operations in the key schedule, see the book _Contemporary
* Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
* Third Edition.
*/
#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
twofish_enc_blk(tfm, dst, src);
}
static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
twofish_dec_blk(tfm, dst, src);
}
static struct crypto_alg alg = {
.cra_name = "twofish",
.cra_driver_name = "twofish-i586",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = TF_MIN_KEY_SIZE,
.cia_max_keysize = TF_MAX_KEY_SIZE,
.cia_setkey = twofish_setkey,
.cia_encrypt = twofish_encrypt,
.cia_decrypt = twofish_decrypt
}
}
};
static int __init init(void)
{
return crypto_register_alg(&alg);
}
static void __exit fini(void)
{
crypto_unregister_alg(&alg);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
MODULE_ALIAS("twofish");