Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux

There's a Niagara 2 memcpy fix in this tree and I have
a Kconfig fix from Dave Jones which requires the sparc-next
changes which went upstream yesterday.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2012-10-02 23:02:10 -04:00
5608 changed files with 276183 additions and 152852 deletions

View File

@@ -6,3 +6,4 @@ obj-y += kernel/
obj-y += mm/
obj-y += math-emu/
obj-y += net/
obj-y += crypto/

View File

@@ -0,0 +1,25 @@
#
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
des-sparc64-y := des_asm.o des_glue.o crop_devid.o
camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o

1535
arch/sparc/crypto/aes_asm.S Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,477 @@
/* Glue code for AES encryption optimized for sparc64 crypto opcodes.
*
* This is based largely upon arch/x86/crypto/aesni-intel_glue.c
*
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
* interface for 64-bit kernels.
* Authors: Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Copyright (c) 2010, Intel Corporation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
struct aes_ops {
void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
void (*load_encrypt_keys)(const u64 *key);
void (*load_decrypt_keys)(const u64 *key);
void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output,
unsigned int len);
void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output,
unsigned int len);
void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output,
unsigned int len, u64 *iv);
void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output,
unsigned int len, u64 *iv);
void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output,
unsigned int len, u64 *iv);
};
struct crypto_sparc64_aes_ctx {
struct aes_ops *ops;
u64 key[AES_MAX_KEYLENGTH / sizeof(u64)];
u32 key_length;
u32 expanded_key_length;
};
extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input,
u32 *output);
extern void aes_sparc64_load_encrypt_keys_128(const u64 *key);
extern void aes_sparc64_load_encrypt_keys_192(const u64 *key);
extern void aes_sparc64_load_encrypt_keys_256(const u64 *key);
extern void aes_sparc64_load_decrypt_keys_128(const u64 *key);
extern void aes_sparc64_load_decrypt_keys_192(const u64 *key);
extern void aes_sparc64_load_decrypt_keys_256(const u64 *key);
extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input,
u64 *output, unsigned int len);
extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
struct aes_ops aes128_ops = {
.encrypt = aes_sparc64_encrypt_128,
.decrypt = aes_sparc64_decrypt_128,
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_128,
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_128,
.ecb_encrypt = aes_sparc64_ecb_encrypt_128,
.ecb_decrypt = aes_sparc64_ecb_decrypt_128,
.cbc_encrypt = aes_sparc64_cbc_encrypt_128,
.cbc_decrypt = aes_sparc64_cbc_decrypt_128,
.ctr_crypt = aes_sparc64_ctr_crypt_128,
};
struct aes_ops aes192_ops = {
.encrypt = aes_sparc64_encrypt_192,
.decrypt = aes_sparc64_decrypt_192,
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_192,
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_192,
.ecb_encrypt = aes_sparc64_ecb_encrypt_192,
.ecb_decrypt = aes_sparc64_ecb_decrypt_192,
.cbc_encrypt = aes_sparc64_cbc_encrypt_192,
.cbc_decrypt = aes_sparc64_cbc_decrypt_192,
.ctr_crypt = aes_sparc64_ctr_crypt_192,
};
struct aes_ops aes256_ops = {
.encrypt = aes_sparc64_encrypt_256,
.decrypt = aes_sparc64_decrypt_256,
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_256,
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_256,
.ecb_encrypt = aes_sparc64_ecb_encrypt_256,
.ecb_decrypt = aes_sparc64_ecb_decrypt_256,
.cbc_encrypt = aes_sparc64_cbc_encrypt_256,
.cbc_decrypt = aes_sparc64_cbc_decrypt_256,
.ctr_crypt = aes_sparc64_ctr_crypt_256,
};
extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key,
unsigned int key_len);
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
switch (key_len) {
case AES_KEYSIZE_128:
ctx->expanded_key_length = 0xb0;
ctx->ops = &aes128_ops;
break;
case AES_KEYSIZE_192:
ctx->expanded_key_length = 0xd0;
ctx->ops = &aes192_ops;
break;
case AES_KEYSIZE_256:
ctx->expanded_key_length = 0xf0;
ctx->ops = &aes256_ops;
break;
default:
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len);
ctx->key_length = key_len;
return 0;
}
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
}
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
static int ecb_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
ctx->ops->load_encrypt_keys(&ctx->key[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & AES_BLOCK_MASK;
if (likely(block_len)) {
ctx->ops->ecb_encrypt(&ctx->key[0],
(const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len);
}
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int ecb_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
u64 *key_end;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & AES_BLOCK_MASK;
if (likely(block_len)) {
ctx->ops->ecb_decrypt(key_end,
(const u64 *) walk.src.virt.addr,
(u64 *) walk.dst.virt.addr, block_len);
}
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int cbc_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
ctx->ops->load_encrypt_keys(&ctx->key[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & AES_BLOCK_MASK;
if (likely(block_len)) {
ctx->ops->cbc_encrypt(&ctx->key[0],
(const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len, (u64 *) walk.iv);
}
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int cbc_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
u64 *key_end;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & AES_BLOCK_MASK;
if (likely(block_len)) {
ctx->ops->cbc_decrypt(key_end,
(const u64 *) walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len, (u64 *) walk.iv);
}
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int ctr_crypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
ctx->ops->load_encrypt_keys(&ctx->key[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & AES_BLOCK_MASK;
if (likely(block_len)) {
ctx->ops->ctr_crypt(&ctx->key[0],
(const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len, (u64 *) walk.iv);
}
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static struct crypto_alg algs[] = { {
.cra_name = "aes",
.cra_driver_name = "aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
}, {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = aes_set_key,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = aes_set_key,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = aes_set_key,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
} };
static bool __init sparc64_has_aes_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_AES))
return false;
return true;
}
static int __init aes_sparc64_mod_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(algs); i++)
INIT_LIST_HEAD(&algs[i].cra_list);
if (sparc64_has_aes_opcode()) {
pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
}
pr_info("sparc64 aes opcodes not available.\n");
return -ENODEV;
}
static void __exit aes_sparc64_mod_fini(void)
{
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
}
module_init(aes_sparc64_mod_init);
module_exit(aes_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
MODULE_ALIAS("aes");

View File

@@ -0,0 +1,563 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \
CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \
CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \
CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \
CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \
CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
.data
.align 8
SIGMA: .xword 0xA09E667F3BCC908B
.xword 0xB67AE8584CAA73B2
.xword 0xC6EF372FE94F82BE
.xword 0x54FF53A5F1D36F1C
.xword 0x10E527FADE682D1D
.xword 0xB05688C2B3E6C1FD
.text
.align 32
ENTRY(camellia_sparc64_key_expand)
/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
VISEntry
ld [%o0 + 0x00], %f0 ! i0, k[0]
ld [%o0 + 0x04], %f1 ! i1, k[1]
ld [%o0 + 0x08], %f2 ! i2, k[2]
ld [%o0 + 0x0c], %f3 ! i3, k[3]
std %f0, [%o1 + 0x00] ! k[0, 1]
fsrc2 %f0, %f28
std %f2, [%o1 + 0x08] ! k[2, 3]
cmp %o2, 16
be 10f
fsrc2 %f2, %f30
ld [%o0 + 0x10], %f0
ld [%o0 + 0x14], %f1
std %f0, [%o1 + 0x20] ! k[8, 9]
cmp %o2, 24
fone %f10
be,a 1f
fxor %f10, %f0, %f2
ld [%o0 + 0x18], %f2
ld [%o0 + 0x1c], %f3
1:
std %f2, [%o1 + 0x28] ! k[10, 11]
fxor %f28, %f0, %f0
fxor %f30, %f2, %f2
10:
sethi %hi(SIGMA), %g3
or %g3, %lo(SIGMA), %g3
ldd [%g3 + 0x00], %f16
ldd [%g3 + 0x08], %f18
ldd [%g3 + 0x10], %f20
ldd [%g3 + 0x18], %f22
ldd [%g3 + 0x20], %f24
ldd [%g3 + 0x28], %f26
CAMELLIA_F(16, 2, 0, 2)
CAMELLIA_F(18, 0, 2, 0)
fxor %f28, %f0, %f0
fxor %f30, %f2, %f2
CAMELLIA_F(20, 2, 0, 2)
CAMELLIA_F(22, 0, 2, 0)
#define ROTL128(S01, S23, TMP1, TMP2, N) \
srlx S01, (64 - N), TMP1; \
sllx S01, N, S01; \
srlx S23, (64 - N), TMP2; \
sllx S23, N, S23; \
or S01, TMP2, S01; \
or S23, TMP1, S23
cmp %o2, 16
bne 1f
nop
/* 128-bit key */
std %f0, [%o1 + 0x10] ! k[ 4, 5]
std %f2, [%o1 + 0x18] ! k[ 6, 7]
MOVDTOX_F0_O4
MOVDTOX_F2_O5
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x30] ! k[12, 13]
stx %o5, [%o1 + 0x38] ! k[14, 15]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x40] ! k[16, 17]
stx %o5, [%o1 + 0x48] ! k[18, 19]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x60] ! k[24, 25]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x70] ! k[28, 29]
stx %o5, [%o1 + 0x78] ! k[30, 31]
ROTL128(%o4, %o5, %g2, %g3, 34)
stx %o4, [%o1 + 0xa0] ! k[40, 41]
stx %o5, [%o1 + 0xa8] ! k[42, 43]
ROTL128(%o4, %o5, %g2, %g3, 17)
stx %o4, [%o1 + 0xc0] ! k[48, 49]
stx %o5, [%o1 + 0xc8] ! k[50, 51]
ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x20] ! k[ 8, 9]
stx %o5, [%o1 + 0x28] ! k[10, 11]
ROTL128(%o4, %o5, %g2, %g3, 30)
stx %o4, [%o1 + 0x50] ! k[20, 21]
stx %o5, [%o1 + 0x58] ! k[22, 23]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o5, [%o1 + 0x68] ! k[26, 27]
ROTL128(%o4, %o5, %g2, %g3, 17)
stx %o4, [%o1 + 0x80] ! k[32, 33]
stx %o5, [%o1 + 0x88] ! k[34, 35]
ROTL128(%o4, %o5, %g2, %g3, 17)
stx %o4, [%o1 + 0x90] ! k[36, 37]
stx %o5, [%o1 + 0x98] ! k[38, 39]
ROTL128(%o4, %o5, %g2, %g3, 17)
stx %o4, [%o1 + 0xb0] ! k[44, 45]
stx %o5, [%o1 + 0xb8] ! k[46, 47]
ba,pt %xcc, 2f
mov (3 * 16 * 4), %o0
1:
/* 192-bit or 256-bit key */
std %f0, [%o1 + 0x30] ! k[12, 13]
std %f2, [%o1 + 0x38] ! k[14, 15]
ldd [%o1 + 0x20], %f4 ! k[ 8, 9]
ldd [%o1 + 0x28], %f6 ! k[10, 11]
fxor %f0, %f4, %f0
fxor %f2, %f6, %f2
CAMELLIA_F(24, 2, 0, 2)
CAMELLIA_F(26, 0, 2, 0)
std %f0, [%o1 + 0x10] ! k[ 4, 5]
std %f2, [%o1 + 0x18] ! k[ 6, 7]
MOVDTOX_F0_O4
MOVDTOX_F2_O5
ROTL128(%o4, %o5, %g2, %g3, 30)
stx %o4, [%o1 + 0x50] ! k[20, 21]
stx %o5, [%o1 + 0x58] ! k[22, 23]
ROTL128(%o4, %o5, %g2, %g3, 30)
stx %o4, [%o1 + 0xa0] ! k[40, 41]
stx %o5, [%o1 + 0xa8] ! k[42, 43]
ROTL128(%o4, %o5, %g2, %g3, 51)
stx %o4, [%o1 + 0x100] ! k[64, 65]
stx %o5, [%o1 + 0x108] ! k[66, 67]
ldx [%o1 + 0x20], %o4 ! k[ 8, 9]
ldx [%o1 + 0x28], %o5 ! k[10, 11]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x20] ! k[ 8, 9]
stx %o5, [%o1 + 0x28] ! k[10, 11]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x40] ! k[16, 17]
stx %o5, [%o1 + 0x48] ! k[18, 19]
ROTL128(%o4, %o5, %g2, %g3, 30)
stx %o4, [%o1 + 0x90] ! k[36, 37]
stx %o5, [%o1 + 0x98] ! k[38, 39]
ROTL128(%o4, %o5, %g2, %g3, 34)
stx %o4, [%o1 + 0xd0] ! k[52, 53]
stx %o5, [%o1 + 0xd8] ! k[54, 55]
ldx [%o1 + 0x30], %o4 ! k[12, 13]
ldx [%o1 + 0x38], %o5 ! k[14, 15]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x30] ! k[12, 13]
stx %o5, [%o1 + 0x38] ! k[14, 15]
ROTL128(%o4, %o5, %g2, %g3, 30)
stx %o4, [%o1 + 0x70] ! k[28, 29]
stx %o5, [%o1 + 0x78] ! k[30, 31]
srlx %o4, 32, %g2
srlx %o5, 32, %g3
stw %o4, [%o1 + 0xc0] ! k[48]
stw %g3, [%o1 + 0xc4] ! k[49]
stw %o5, [%o1 + 0xc8] ! k[50]
stw %g2, [%o1 + 0xcc] ! k[51]
ROTL128(%o4, %o5, %g2, %g3, 49)
stx %o4, [%o1 + 0xe0] ! k[56, 57]
stx %o5, [%o1 + 0xe8] ! k[58, 59]
ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
ROTL128(%o4, %o5, %g2, %g3, 45)
stx %o4, [%o1 + 0x60] ! k[24, 25]
stx %o5, [%o1 + 0x68] ! k[26, 27]
ROTL128(%o4, %o5, %g2, %g3, 15)
stx %o4, [%o1 + 0x80] ! k[32, 33]
stx %o5, [%o1 + 0x88] ! k[34, 35]
ROTL128(%o4, %o5, %g2, %g3, 17)
stx %o4, [%o1 + 0xb0] ! k[44, 45]
stx %o5, [%o1 + 0xb8] ! k[46, 47]
ROTL128(%o4, %o5, %g2, %g3, 34)
stx %o4, [%o1 + 0xf0] ! k[60, 61]
stx %o5, [%o1 + 0xf8] ! k[62, 63]
mov (4 * 16 * 4), %o0
2:
add %o1, %o0, %o1
ldd [%o1 + 0x00], %f0
ldd [%o1 + 0x08], %f2
std %f0, [%o3 + 0x00]
std %f2, [%o3 + 0x08]
add %o3, 0x10, %o3
1:
sub %o1, (16 * 4), %o1
ldd [%o1 + 0x38], %f0
ldd [%o1 + 0x30], %f2
ldd [%o1 + 0x28], %f4
ldd [%o1 + 0x20], %f6
ldd [%o1 + 0x18], %f8
ldd [%o1 + 0x10], %f10
std %f0, [%o3 + 0x00]
std %f2, [%o3 + 0x08]
std %f4, [%o3 + 0x10]
std %f6, [%o3 + 0x18]
std %f8, [%o3 + 0x20]
std %f10, [%o3 + 0x28]
ldd [%o1 + 0x08], %f0
ldd [%o1 + 0x00], %f2
std %f0, [%o3 + 0x30]
std %f2, [%o3 + 0x38]
subcc %o0, (16 * 4), %o0
bne,pt %icc, 1b
add %o3, (16 * 4), %o3
std %f2, [%o3 - 0x10]
std %f0, [%o3 - 0x08]
retl
VISExit
ENDPROC(camellia_sparc64_key_expand)
.align 32
ENTRY(camellia_sparc64_crypt)
/* %o0=key, %o1=input, %o2=output, %o3=key_len */
VISEntry
ld [%o1 + 0x00], %f0
ld [%o1 + 0x04], %f1
ld [%o1 + 0x08], %f2
ld [%o1 + 0x0c], %f3
ldd [%o0 + 0x00], %f4
ldd [%o0 + 0x08], %f6
cmp %o3, 16
fxor %f4, %f0, %f0
be 1f
fxor %f6, %f2, %f2
ldd [%o0 + 0x10], %f8
ldd [%o0 + 0x18], %f10
ldd [%o0 + 0x20], %f12
ldd [%o0 + 0x28], %f14
ldd [%o0 + 0x30], %f16
ldd [%o0 + 0x38], %f18
ldd [%o0 + 0x40], %f20
ldd [%o0 + 0x48], %f22
add %o0, 0x40, %o0
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
1:
ldd [%o0 + 0x10], %f8
ldd [%o0 + 0x18], %f10
ldd [%o0 + 0x20], %f12
ldd [%o0 + 0x28], %f14
ldd [%o0 + 0x30], %f16
ldd [%o0 + 0x38], %f18
ldd [%o0 + 0x40], %f20
ldd [%o0 + 0x48], %f22
ldd [%o0 + 0x50], %f24
ldd [%o0 + 0x58], %f26
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f32
ldd [%o0 + 0x78], %f34
ldd [%o0 + 0x80], %f36
ldd [%o0 + 0x88], %f38
ldd [%o0 + 0x90], %f40
ldd [%o0 + 0x98], %f42
ldd [%o0 + 0xa0], %f44
ldd [%o0 + 0xa8], %f46
ldd [%o0 + 0xb0], %f48
ldd [%o0 + 0xb8], %f50
ldd [%o0 + 0xc0], %f52
ldd [%o0 + 0xc8], %f54
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS(40, 0, 2)
fxor %f52, %f2, %f2
fxor %f54, %f0, %f0
st %f2, [%o2 + 0x00]
st %f3, [%o2 + 0x04]
st %f0, [%o2 + 0x08]
st %f1, [%o2 + 0x0c]
retl
VISExit
ENDPROC(camellia_sparc64_crypt)
.align 32
ENTRY(camellia_sparc64_load_keys)
/* %o0=key, %o1=key_len */
VISEntry
ldd [%o0 + 0x00], %f4
ldd [%o0 + 0x08], %f6
ldd [%o0 + 0x10], %f8
ldd [%o0 + 0x18], %f10
ldd [%o0 + 0x20], %f12
ldd [%o0 + 0x28], %f14
ldd [%o0 + 0x30], %f16
ldd [%o0 + 0x38], %f18
ldd [%o0 + 0x40], %f20
ldd [%o0 + 0x48], %f22
ldd [%o0 + 0x50], %f24
ldd [%o0 + 0x58], %f26
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f32
ldd [%o0 + 0x78], %f34
ldd [%o0 + 0x80], %f36
ldd [%o0 + 0x88], %f38
ldd [%o0 + 0x90], %f40
ldd [%o0 + 0x98], %f42
ldd [%o0 + 0xa0], %f44
ldd [%o0 + 0xa8], %f46
ldd [%o0 + 0xb0], %f48
ldd [%o0 + 0xb8], %f50
ldd [%o0 + 0xc0], %f52
retl
ldd [%o0 + 0xc8], %f54
ENDPROC(camellia_sparc64_load_keys)
.align 32
ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key */
1: ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
add %o0, 0x10, %o0
fxor %f4, %f0, %f0
fxor %f6, %f2, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS(40, 0, 2)
fxor %f52, %f2, %f2
fxor %f54, %f0, %f0
std %f2, [%o1 + 0x00]
std %f0, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
retl
nop
ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
.align 32
ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key */
1: ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
add %o0, 0x10, %o0
fxor %f4, %f0, %f0
fxor %f6, %f2, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
ldd [%o3 + 0xd0], %f8
ldd [%o3 + 0xd8], %f10
ldd [%o3 + 0xe0], %f12
ldd [%o3 + 0xe8], %f14
ldd [%o3 + 0xf0], %f16
ldd [%o3 + 0xf8], %f18
ldd [%o3 + 0x100], %f20
ldd [%o3 + 0x108], %f22
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
CAMELLIA_F(8, 2, 0, 2)
CAMELLIA_F(10, 0, 2, 0)
ldd [%o3 + 0x10], %f8
ldd [%o3 + 0x18], %f10
CAMELLIA_F(12, 2, 0, 2)
CAMELLIA_F(14, 0, 2, 0)
ldd [%o3 + 0x20], %f12
ldd [%o3 + 0x28], %f14
CAMELLIA_F(16, 2, 0, 2)
CAMELLIA_F(18, 0, 2, 0)
ldd [%o3 + 0x30], %f16
ldd [%o3 + 0x38], %f18
fxor %f20, %f2, %f2
fxor %f22, %f0, %f0
ldd [%o3 + 0x40], %f20
ldd [%o3 + 0x48], %f22
std %f2, [%o1 + 0x00]
std %f0, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
retl
nop
ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
.align 32
ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
ldd [%o4 + 0x00], %f60
ldd [%o4 + 0x08], %f62
1: ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
add %o0, 0x10, %o0
fxor %f60, %f0, %f0
fxor %f62, %f2, %f2
fxor %f4, %f0, %f0
fxor %f6, %f2, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS(40, 0, 2)
fxor %f52, %f2, %f60
fxor %f54, %f0, %f62
std %f60, [%o1 + 0x00]
std %f62, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
std %f60, [%o4 + 0x00]
retl
std %f62, [%o4 + 0x08]
ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
.align 32
ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
ldd [%o4 + 0x00], %f60
ldd [%o4 + 0x08], %f62
1: ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
add %o0, 0x10, %o0
fxor %f60, %f0, %f0
fxor %f62, %f2, %f2
fxor %f4, %f0, %f0
fxor %f6, %f2, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
ldd [%o3 + 0xd0], %f8
ldd [%o3 + 0xd8], %f10
ldd [%o3 + 0xe0], %f12
ldd [%o3 + 0xe8], %f14
ldd [%o3 + 0xf0], %f16
ldd [%o3 + 0xf8], %f18
ldd [%o3 + 0x100], %f20
ldd [%o3 + 0x108], %f22
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
CAMELLIA_F(8, 2, 0, 2)
CAMELLIA_F(10, 0, 2, 0)
ldd [%o3 + 0x10], %f8
ldd [%o3 + 0x18], %f10
CAMELLIA_F(12, 2, 0, 2)
CAMELLIA_F(14, 0, 2, 0)
ldd [%o3 + 0x20], %f12
ldd [%o3 + 0x28], %f14
CAMELLIA_F(16, 2, 0, 2)
CAMELLIA_F(18, 0, 2, 0)
ldd [%o3 + 0x30], %f16
ldd [%o3 + 0x38], %f18
fxor %f20, %f2, %f60
fxor %f22, %f0, %f62
ldd [%o3 + 0x40], %f20
ldd [%o3 + 0x48], %f22
std %f60, [%o1 + 0x00]
std %f62, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
std %f60, [%o4 + 0x00]
retl
std %f62, [%o4 + 0x08]
ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
.align 32
ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
ldd [%o4 + 0x00], %f60
ldd [%o4 + 0x08], %f62
1: ldd [%o0 + 0x00], %f56
ldd [%o0 + 0x08], %f58
add %o0, 0x10, %o0
fxor %f4, %f56, %f0
fxor %f6, %f58, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS(40, 0, 2)
fxor %f52, %f2, %f2
fxor %f54, %f0, %f0
fxor %f60, %f2, %f2
fxor %f62, %f0, %f0
fsrc2 %f56, %f60
fsrc2 %f58, %f62
std %f2, [%o1 + 0x00]
std %f0, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
std %f60, [%o4 + 0x00]
retl
std %f62, [%o4 + 0x08]
ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
.align 32
ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
ldd [%o4 + 0x00], %f60
ldd [%o4 + 0x08], %f62
1: ldd [%o0 + 0x00], %f56
ldd [%o0 + 0x08], %f58
add %o0, 0x10, %o0
fxor %f4, %f56, %f0
fxor %f6, %f58, %f2
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
ldd [%o3 + 0xd0], %f8
ldd [%o3 + 0xd8], %f10
ldd [%o3 + 0xe0], %f12
ldd [%o3 + 0xe8], %f14
ldd [%o3 + 0xf0], %f16
ldd [%o3 + 0xf8], %f18
ldd [%o3 + 0x100], %f20
ldd [%o3 + 0x108], %f22
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
CAMELLIA_F(8, 2, 0, 2)
CAMELLIA_F(10, 0, 2, 0)
ldd [%o3 + 0x10], %f8
ldd [%o3 + 0x18], %f10
CAMELLIA_F(12, 2, 0, 2)
CAMELLIA_F(14, 0, 2, 0)
ldd [%o3 + 0x20], %f12
ldd [%o3 + 0x28], %f14
CAMELLIA_F(16, 2, 0, 2)
CAMELLIA_F(18, 0, 2, 0)
ldd [%o3 + 0x30], %f16
ldd [%o3 + 0x38], %f18
fxor %f20, %f2, %f2
fxor %f22, %f0, %f0
ldd [%o3 + 0x40], %f20
ldd [%o3 + 0x48], %f22
fxor %f60, %f2, %f2
fxor %f62, %f0, %f0
fsrc2 %f56, %f60
fsrc2 %f58, %f62
std %f2, [%o1 + 0x00]
std %f0, [%o1 + 0x08]
subcc %o2, 0x10, %o2
bne,pt %icc, 1b
add %o1, 0x10, %o1
std %f60, [%o4 + 0x00]
retl
std %f62, [%o4 + 0x08]
ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)

View File

@@ -0,0 +1,322 @@
/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes.
*
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
#define CAMELLIA_BLOCK_SIZE 16
#define CAMELLIA_TABLE_BYTE_LEN 272
struct camellia_sparc64_ctx {
u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
int key_len;
};
extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key,
unsigned int key_len, u64 *decrypt_key);
static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
unsigned int key_len)
{
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
const u32 *in_key = (const u32 *) _in_key;
u32 *flags = &tfm->crt_flags;
if (key_len != 16 && key_len != 24 && key_len != 32) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
return -EINVAL;
}
ctx->key_len = key_len;
camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0],
key_len, &ctx->decrypt_key[0]);
return 0;
}
extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
u32 *output, unsigned int key_len);
static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
camellia_sparc64_crypt(&ctx->encrypt_key[0],
(const u32 *) src,
(u32 *) dst, ctx->key_len);
}
static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
camellia_sparc64_crypt(&ctx->decrypt_key[0],
(const u32 *) src,
(u32 *) dst, ctx->key_len);
}
extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len);
typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
const u64 *key);
extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
static int __ecb_crypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes, bool encrypt)
{
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
ecb_crypt_op *op;
const u64 *key;
int err;
op = camellia_sparc64_ecb_crypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_ecb_crypt_4_grand_rounds;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
if (encrypt)
key = &ctx->encrypt_key[0];
else
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64;
u64 *dst64;
src64 = (const u64 *)walk.src.virt.addr;
dst64 = (u64 *) walk.dst.virt.addr;
op(src64, dst64, block_len, key);
}
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb_crypt(desc, dst, src, nbytes, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb_crypt(desc, dst, src, nbytes, false);
}
typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
const u64 *key, u64 *iv);
extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
static int cbc_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
int err;
op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
key = &ctx->encrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64;
u64 *dst64;
src64 = (const u64 *)walk.src.virt.addr;
dst64 = (u64 *) walk.dst.virt.addr;
op(src64, dst64, block_len, key,
(u64 *) walk.iv);
}
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int cbc_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
int err;
op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64;
u64 *dst64;
src64 = (const u64 *)walk.src.virt.addr;
dst64 = (u64 *) walk.dst.virt.addr;
op(src64, dst64, block_len, key,
(u64 *) walk.iv);
}
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static struct crypto_alg algs[] = { {
.cra_name = "camellia",
.cra_driver_name = "camellia-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
.cra_alignmask = 3,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
.cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
.cia_setkey = camellia_set_key,
.cia_encrypt = camellia_encrypt,
.cia_decrypt = camellia_decrypt
}
}
}, {
.cra_name = "ecb(camellia)",
.cra_driver_name = "ecb-camellia-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_set_key,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(camellia)",
.cra_driver_name = "cbc-camellia-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_set_key,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}
};
static bool __init sparc64_has_camellia_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_CAMELLIA))
return false;
return true;
}
static int __init camellia_sparc64_mod_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(algs); i++)
INIT_LIST_HEAD(&algs[i].cra_list);
if (sparc64_has_camellia_opcode()) {
pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
}
pr_info("sparc64 camellia opcodes not available.\n");
return -ENODEV;
}
static void __exit camellia_sparc64_mod_fini(void)
{
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
}
module_init(camellia_sparc64_mod_init);
module_exit(camellia_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
MODULE_ALIAS("aes");

View File

@@ -0,0 +1,20 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include "opcodes.h"
ENTRY(crc32c_sparc64)
/* %o0=crc32p, %o1=data_ptr, %o2=len */
VISEntryHalf
lda [%o0] ASI_PL, %f1
1: ldd [%o1], %f2
CRC32C(0,2,0)
subcc %o2, 8, %o2
bne,pt %icc, 1b
add %o1, 0x8, %o1
sta %f1, [%o0] ASI_PL
VISExitHalf
2: retl
nop
ENDPROC(crc32c_sparc64)

View File

@@ -0,0 +1,179 @@
/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
*
* This is based largely upon arch/x86/crypto/crc32c-intel.c
*
* Copyright (C) 2008 Intel Corporation
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
* Kent Liu <kent.liu@intel.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) {
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
*(__le32 *)mctx = le32_to_cpup((__le32 *)key);
return 0;
}
static int crc32c_sparc64_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm);
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len)
{
unsigned int asm_len;
asm_len = len & ~7U;
if (asm_len) {
crc32c_sparc64(crcp, data, asm_len);
data += asm_len / 8;
len -= asm_len;
}
if (len)
*crcp = __crc32c_le(*crcp, (const unsigned char *) data, len);
}
static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
crc32c_compute(crcp, (const u64 *) data, len);
return 0;
}
static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
u32 tmp = *crcp;
crc32c_compute(&tmp, (const u64 *) data, len);
*(__le32 *) out = ~cpu_to_le32(tmp);
return 0;
}
static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *) out = ~cpu_to_le32p(crcp);
return 0;
}
static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = ~0;
return 0;
}
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
static struct shash_alg alg = {
.setkey = crc32c_sparc64_setkey,
.init = crc32c_sparc64_init,
.update = crc32c_sparc64_update,
.final = crc32c_sparc64_final,
.finup = crc32c_sparc64_finup,
.digest = crc32c_sparc64_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
.cra_init = crc32c_sparc64_cra_init,
}
};
static bool __init sparc64_has_crc32c_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_CRC32C))
return false;
return true;
}
static int __init crc32c_sparc64_mod_init(void)
{
if (sparc64_has_crc32c_opcode()) {
pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
return crypto_register_shash(&alg);
}
pr_info("sparc64 crc32c opcode not available.\n");
return -ENODEV;
}
static void __exit crc32c_sparc64_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32c_sparc64_mod_init);
module_exit(crc32c_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
MODULE_ALIAS("crc32c");

View File

@@ -0,0 +1,14 @@
#include <linux/module.h>
#include <linux/of_device.h>
/* This is a dummy device table linked into all of the crypto
* opcode drivers. It serves to trigger the module autoloading
* mechanisms in userspace which scan the OF device tree and
* load any modules which have device table entries that
* match OF device nodes.
*/
static const struct of_device_id crypto_opcode_match[] = {
{ .name = "cpu", .compatible = "sun4v", },
{},
};
MODULE_DEVICE_TABLE(of, crypto_opcode_match);

418
arch/sparc/crypto/des_asm.S Normal file
View File

@@ -0,0 +1,418 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
.align 32
ENTRY(des_sparc64_key_expand)
/* %o0=input_key, %o1=output_key */
VISEntryHalf
ld [%o0 + 0x00], %f0
ld [%o0 + 0x04], %f1
DES_KEXPAND(0, 0, 0)
DES_KEXPAND(0, 1, 2)
DES_KEXPAND(2, 3, 6)
DES_KEXPAND(2, 2, 4)
DES_KEXPAND(6, 3, 10)
DES_KEXPAND(6, 2, 8)
DES_KEXPAND(10, 3, 14)
DES_KEXPAND(10, 2, 12)
DES_KEXPAND(14, 1, 16)
DES_KEXPAND(16, 3, 20)
DES_KEXPAND(16, 2, 18)
DES_KEXPAND(20, 3, 24)
DES_KEXPAND(20, 2, 22)
DES_KEXPAND(24, 3, 28)
DES_KEXPAND(24, 2, 26)
DES_KEXPAND(28, 1, 30)
std %f0, [%o1 + 0x00]
std %f2, [%o1 + 0x08]
std %f4, [%o1 + 0x10]
std %f6, [%o1 + 0x18]
std %f8, [%o1 + 0x20]
std %f10, [%o1 + 0x28]
std %f12, [%o1 + 0x30]
std %f14, [%o1 + 0x38]
std %f16, [%o1 + 0x40]
std %f18, [%o1 + 0x48]
std %f20, [%o1 + 0x50]
std %f22, [%o1 + 0x58]
std %f24, [%o1 + 0x60]
std %f26, [%o1 + 0x68]
std %f28, [%o1 + 0x70]
std %f30, [%o1 + 0x78]
retl
VISExitHalf
ENDPROC(des_sparc64_key_expand)
.align 32
ENTRY(des_sparc64_crypt)
/* %o0=key, %o1=input, %o2=output */
VISEntry
ldd [%o1 + 0x00], %f32
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
ldd [%o0 + 0x10], %f4
ldd [%o0 + 0x18], %f6
ldd [%o0 + 0x20], %f8
ldd [%o0 + 0x28], %f10
ldd [%o0 + 0x30], %f12
ldd [%o0 + 0x38], %f14
ldd [%o0 + 0x40], %f16
ldd [%o0 + 0x48], %f18
ldd [%o0 + 0x50], %f20
ldd [%o0 + 0x58], %f22
ldd [%o0 + 0x60], %f24
ldd [%o0 + 0x68], %f26
ldd [%o0 + 0x70], %f28
ldd [%o0 + 0x78], %f30
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
DES_ROUND(4, 6, 32, 32)
DES_ROUND(8, 10, 32, 32)
DES_ROUND(12, 14, 32, 32)
DES_ROUND(16, 18, 32, 32)
DES_ROUND(20, 22, 32, 32)
DES_ROUND(24, 26, 32, 32)
DES_ROUND(28, 30, 32, 32)
DES_IIP(32, 32)
std %f32, [%o2 + 0x00]
retl
VISExit
ENDPROC(des_sparc64_crypt)
.align 32
ENTRY(des_sparc64_load_keys)
/* %o0=key */
VISEntry
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
ldd [%o0 + 0x10], %f4
ldd [%o0 + 0x18], %f6
ldd [%o0 + 0x20], %f8
ldd [%o0 + 0x28], %f10
ldd [%o0 + 0x30], %f12
ldd [%o0 + 0x38], %f14
ldd [%o0 + 0x40], %f16
ldd [%o0 + 0x48], %f18
ldd [%o0 + 0x50], %f20
ldd [%o0 + 0x58], %f22
ldd [%o0 + 0x60], %f24
ldd [%o0 + 0x68], %f26
ldd [%o0 + 0x70], %f28
retl
ldd [%o0 + 0x78], %f30
ENDPROC(des_sparc64_load_keys)
.align 32
ENTRY(des_sparc64_ecb_crypt)
/* %o0=input, %o1=output, %o2=len */
1: ldd [%o0 + 0x00], %f32
add %o0, 0x08, %o0
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
DES_ROUND(4, 6, 32, 32)
DES_ROUND(8, 10, 32, 32)
DES_ROUND(12, 14, 32, 32)
DES_ROUND(16, 18, 32, 32)
DES_ROUND(20, 22, 32, 32)
DES_ROUND(24, 26, 32, 32)
DES_ROUND(28, 30, 32, 32)
DES_IIP(32, 32)
std %f32, [%o1 + 0x00]
subcc %o2, 0x08, %o2
bne,pt %icc, 1b
add %o1, 0x08, %o1
retl
nop
ENDPROC(des_sparc64_ecb_crypt)
.align 32
ENTRY(des_sparc64_cbc_encrypt)
/* %o0=input, %o1=output, %o2=len, %o3=IV */
ldd [%o3 + 0x00], %f32
1: ldd [%o0 + 0x00], %f34
fxor %f32, %f34, %f32
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
DES_ROUND(4, 6, 32, 32)
DES_ROUND(8, 10, 32, 32)
DES_ROUND(12, 14, 32, 32)
DES_ROUND(16, 18, 32, 32)
DES_ROUND(20, 22, 32, 32)
DES_ROUND(24, 26, 32, 32)
DES_ROUND(28, 30, 32, 32)
DES_IIP(32, 32)
std %f32, [%o1 + 0x00]
add %o0, 0x08, %o0
subcc %o2, 0x08, %o2
bne,pt %icc, 1b
add %o1, 0x08, %o1
retl
std %f32, [%o3 + 0x00]
ENDPROC(des_sparc64_cbc_encrypt)
.align 32
ENTRY(des_sparc64_cbc_decrypt)
/* %o0=input, %o1=output, %o2=len, %o3=IV */
ldd [%o3 + 0x00], %f34
1: ldd [%o0 + 0x00], %f36
DES_IP(36, 32)
DES_ROUND(0, 2, 32, 32)
DES_ROUND(4, 6, 32, 32)
DES_ROUND(8, 10, 32, 32)
DES_ROUND(12, 14, 32, 32)
DES_ROUND(16, 18, 32, 32)
DES_ROUND(20, 22, 32, 32)
DES_ROUND(24, 26, 32, 32)
DES_ROUND(28, 30, 32, 32)
DES_IIP(32, 32)
fxor %f32, %f34, %f32
fsrc2 %f36, %f34
std %f32, [%o1 + 0x00]
add %o0, 0x08, %o0
subcc %o2, 0x08, %o2
bne,pt %icc, 1b
add %o1, 0x08, %o1
retl
std %f36, [%o3 + 0x00]
ENDPROC(des_sparc64_cbc_decrypt)
.align 32
ENTRY(des3_ede_sparc64_crypt)
/* %o0=key, %o1=input, %o2=output */
VISEntry
ldd [%o1 + 0x00], %f32
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
ldd [%o0 + 0x10], %f4
ldd [%o0 + 0x18], %f6
ldd [%o0 + 0x20], %f8
ldd [%o0 + 0x28], %f10
ldd [%o0 + 0x30], %f12
ldd [%o0 + 0x38], %f14
ldd [%o0 + 0x40], %f16
ldd [%o0 + 0x48], %f18
ldd [%o0 + 0x50], %f20
ldd [%o0 + 0x58], %f22
ldd [%o0 + 0x60], %f24
ldd [%o0 + 0x68], %f26
ldd [%o0 + 0x70], %f28
ldd [%o0 + 0x78], %f30
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
ldd [%o0 + 0x80], %f0
ldd [%o0 + 0x88], %f2
DES_ROUND(4, 6, 32, 32)
ldd [%o0 + 0x90], %f4
ldd [%o0 + 0x98], %f6
DES_ROUND(8, 10, 32, 32)
ldd [%o0 + 0xa0], %f8
ldd [%o0 + 0xa8], %f10
DES_ROUND(12, 14, 32, 32)
ldd [%o0 + 0xb0], %f12
ldd [%o0 + 0xb8], %f14
DES_ROUND(16, 18, 32, 32)
ldd [%o0 + 0xc0], %f16
ldd [%o0 + 0xc8], %f18
DES_ROUND(20, 22, 32, 32)
ldd [%o0 + 0xd0], %f20
ldd [%o0 + 0xd8], %f22
DES_ROUND(24, 26, 32, 32)
ldd [%o0 + 0xe0], %f24
ldd [%o0 + 0xe8], %f26
DES_ROUND(28, 30, 32, 32)
ldd [%o0 + 0xf0], %f28
ldd [%o0 + 0xf8], %f30
DES_IIP(32, 32)
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
ldd [%o0 + 0x100], %f0
ldd [%o0 + 0x108], %f2
DES_ROUND(4, 6, 32, 32)
ldd [%o0 + 0x110], %f4
ldd [%o0 + 0x118], %f6
DES_ROUND(8, 10, 32, 32)
ldd [%o0 + 0x120], %f8
ldd [%o0 + 0x128], %f10
DES_ROUND(12, 14, 32, 32)
ldd [%o0 + 0x130], %f12
ldd [%o0 + 0x138], %f14
DES_ROUND(16, 18, 32, 32)
ldd [%o0 + 0x140], %f16
ldd [%o0 + 0x148], %f18
DES_ROUND(20, 22, 32, 32)
ldd [%o0 + 0x150], %f20
ldd [%o0 + 0x158], %f22
DES_ROUND(24, 26, 32, 32)
ldd [%o0 + 0x160], %f24
ldd [%o0 + 0x168], %f26
DES_ROUND(28, 30, 32, 32)
ldd [%o0 + 0x170], %f28
ldd [%o0 + 0x178], %f30
DES_IIP(32, 32)
DES_IP(32, 32)
DES_ROUND(0, 2, 32, 32)
DES_ROUND(4, 6, 32, 32)
DES_ROUND(8, 10, 32, 32)
DES_ROUND(12, 14, 32, 32)
DES_ROUND(16, 18, 32, 32)
DES_ROUND(20, 22, 32, 32)
DES_ROUND(24, 26, 32, 32)
DES_ROUND(28, 30, 32, 32)
DES_IIP(32, 32)
std %f32, [%o2 + 0x00]
retl
VISExit
ENDPROC(des3_ede_sparc64_crypt)
.align 32
ENTRY(des3_ede_sparc64_load_keys)
/* %o0=key */
VISEntry
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
ldd [%o0 + 0x10], %f4
ldd [%o0 + 0x18], %f6
ldd [%o0 + 0x20], %f8
ldd [%o0 + 0x28], %f10
ldd [%o0 + 0x30], %f12
ldd [%o0 + 0x38], %f14
ldd [%o0 + 0x40], %f16
ldd [%o0 + 0x48], %f18
ldd [%o0 + 0x50], %f20
ldd [%o0 + 0x58], %f22
ldd [%o0 + 0x60], %f24
ldd [%o0 + 0x68], %f26
ldd [%o0 + 0x70], %f28
ldd [%o0 + 0x78], %f30
ldd [%o0 + 0x80], %f32
ldd [%o0 + 0x88], %f34
ldd [%o0 + 0x90], %f36
ldd [%o0 + 0x98], %f38
ldd [%o0 + 0xa0], %f40
ldd [%o0 + 0xa8], %f42
ldd [%o0 + 0xb0], %f44
ldd [%o0 + 0xb8], %f46
ldd [%o0 + 0xc0], %f48
ldd [%o0 + 0xc8], %f50
ldd [%o0 + 0xd0], %f52
ldd [%o0 + 0xd8], %f54
ldd [%o0 + 0xe0], %f56
retl
ldd [%o0 + 0xe8], %f58
ENDPROC(des3_ede_sparc64_load_keys)
#define DES3_LOOP_BODY(X) \
DES_IP(X, X) \
DES_ROUND(0, 2, X, X) \
DES_ROUND(4, 6, X, X) \
DES_ROUND(8, 10, X, X) \
DES_ROUND(12, 14, X, X) \
DES_ROUND(16, 18, X, X) \
ldd [%o0 + 0xf0], %f16; \
ldd [%o0 + 0xf8], %f18; \
DES_ROUND(20, 22, X, X) \
ldd [%o0 + 0x100], %f20; \
ldd [%o0 + 0x108], %f22; \
DES_ROUND(24, 26, X, X) \
ldd [%o0 + 0x110], %f24; \
ldd [%o0 + 0x118], %f26; \
DES_ROUND(28, 30, X, X) \
ldd [%o0 + 0x120], %f28; \
ldd [%o0 + 0x128], %f30; \
DES_IIP(X, X) \
DES_IP(X, X) \
DES_ROUND(32, 34, X, X) \
ldd [%o0 + 0x130], %f0; \
ldd [%o0 + 0x138], %f2; \
DES_ROUND(36, 38, X, X) \
ldd [%o0 + 0x140], %f4; \
ldd [%o0 + 0x148], %f6; \
DES_ROUND(40, 42, X, X) \
ldd [%o0 + 0x150], %f8; \
ldd [%o0 + 0x158], %f10; \
DES_ROUND(44, 46, X, X) \
ldd [%o0 + 0x160], %f12; \
ldd [%o0 + 0x168], %f14; \
DES_ROUND(48, 50, X, X) \
DES_ROUND(52, 54, X, X) \
DES_ROUND(56, 58, X, X) \
DES_ROUND(16, 18, X, X) \
ldd [%o0 + 0x170], %f16; \
ldd [%o0 + 0x178], %f18; \
DES_IIP(X, X) \
DES_IP(X, X) \
DES_ROUND(20, 22, X, X) \
ldd [%o0 + 0x50], %f20; \
ldd [%o0 + 0x58], %f22; \
DES_ROUND(24, 26, X, X) \
ldd [%o0 + 0x60], %f24; \
ldd [%o0 + 0x68], %f26; \
DES_ROUND(28, 30, X, X) \
ldd [%o0 + 0x70], %f28; \
ldd [%o0 + 0x78], %f30; \
DES_ROUND(0, 2, X, X) \
ldd [%o0 + 0x00], %f0; \
ldd [%o0 + 0x08], %f2; \
DES_ROUND(4, 6, X, X) \
ldd [%o0 + 0x10], %f4; \
ldd [%o0 + 0x18], %f6; \
DES_ROUND(8, 10, X, X) \
ldd [%o0 + 0x20], %f8; \
ldd [%o0 + 0x28], %f10; \
DES_ROUND(12, 14, X, X) \
ldd [%o0 + 0x30], %f12; \
ldd [%o0 + 0x38], %f14; \
DES_ROUND(16, 18, X, X) \
ldd [%o0 + 0x40], %f16; \
ldd [%o0 + 0x48], %f18; \
DES_IIP(X, X)
.align 32
ENTRY(des3_ede_sparc64_ecb_crypt)
/* %o0=key, %o1=input, %o2=output, %o3=len */
1: ldd [%o1 + 0x00], %f60
DES3_LOOP_BODY(60)
std %f60, [%o2 + 0x00]
subcc %o3, 0x08, %o3
bne,pt %icc, 1b
add %o2, 0x08, %o2
retl
nop
ENDPROC(des3_ede_sparc64_ecb_crypt)
.align 32
ENTRY(des3_ede_sparc64_cbc_encrypt)
/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
ldd [%o4 + 0x00], %f60
1: ldd [%o1 + 0x00], %f62
fxor %f60, %f62, %f60
DES3_LOOP_BODY(60)
std %f60, [%o2 + 0x00]
add %o1, 0x08, %o1
subcc %o3, 0x08, %o3
bne,pt %icc, 1b
add %o2, 0x08, %o2
retl
std %f60, [%o4 + 0x00]
ENDPROC(des3_ede_sparc64_cbc_encrypt)
.align 32
ENTRY(des3_ede_sparc64_cbc_decrypt)
/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
ldd [%o4 + 0x00], %f62
1: ldx [%o1 + 0x00], %g1
MOVXTOD_G1_F60
DES3_LOOP_BODY(60)
fxor %f62, %f60, %f60
MOVXTOD_G1_F62
std %f60, [%o2 + 0x00]
add %o1, 0x08, %o1
subcc %o3, 0x08, %o3
bne,pt %icc, 1b
add %o2, 0x08, %o2
retl
stx %g1, [%o4 + 0x00]
ENDPROC(des3_ede_sparc64_cbc_decrypt)

View File

@@ -0,0 +1,529 @@
/* Glue code for DES encryption optimized for sparc64 crypto opcodes.
*
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/des.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
struct des_sparc64_ctx {
u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
};
struct des3_ede_sparc64_ctx {
u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
};
static void encrypt_to_decrypt(u64 *d, const u64 *e)
{
const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1;
int i;
for (i = 0; i < DES_EXPKEY_WORDS / 2; i++)
*d++ = *s--;
}
extern void des_sparc64_key_expand(const u32 *input_key, u64 *key);
static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
u32 tmp[DES_EXPKEY_WORDS];
int ret;
/* Even though we have special instructions for key expansion,
* we call des_ekey() so that we don't have to write our own
* weak key detection code.
*/
ret = des_ekey(tmp, key);
if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
return -EINVAL;
}
des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
return 0;
}
extern void des_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
const u64 *K = ctx->encrypt_expkey;
des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
}
static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
const u64 *K = ctx->decrypt_expkey;
des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
}
extern void des_sparc64_load_keys(const u64 *key);
extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
unsigned int len);
#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
static int __ecb_crypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes, bool encrypt)
{
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
if (encrypt)
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
else
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb_crypt(desc, dst, src, nbytes, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb_crypt(desc, dst, src, nbytes, false);
}
extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
unsigned int len, u64 *iv);
static int cbc_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len, (u64 *) walk.iv);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
unsigned int len, u64 *iv);
static int cbc_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
(u64 *) walk.dst.virt.addr,
block_len, (u64 *) walk.iv);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
const u32 *K = (const u32 *)key;
u32 *flags = &tfm->crt_flags;
u64 k1[DES_EXPKEY_WORDS / 2];
u64 k2[DES_EXPKEY_WORDS / 2];
u64 k3[DES_EXPKEY_WORDS / 2];
if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
!((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
(*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
return -EINVAL;
}
des_sparc64_key_expand((const u32 *)key, k1);
key += DES_KEY_SIZE;
des_sparc64_key_expand((const u32 *)key, k2);
key += DES_KEY_SIZE;
des_sparc64_key_expand((const u32 *)key, k3);
memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1));
encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]);
memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
&k3[0], sizeof(k3));
encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]);
memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2],
&k2[0], sizeof(k2));
encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
&k1[0]);
return 0;
}
extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
const u64 *K = ctx->encrypt_expkey;
des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
}
static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
const u64 *K = ctx->decrypt_expkey;
des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
}
extern void des3_ede_sparc64_load_keys(const u64 *key);
extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len);
static int __ecb3_crypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes, bool encrypt)
{
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
const u64 *K;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
if (encrypt)
K = &ctx->encrypt_expkey[0];
else
K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64 = (const u64 *)walk.src.virt.addr;
des3_ede_sparc64_ecb_crypt(K, src64,
(u64 *) walk.dst.virt.addr,
block_len);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static int ecb3_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb3_crypt(desc, dst, src, nbytes, true);
}
static int ecb3_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
return __ecb3_crypt(desc, dst, src, nbytes, false);
}
extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
static int cbc3_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
const u64 *K;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
K = &ctx->encrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64 = (const u64 *)walk.src.virt.addr;
des3_ede_sparc64_cbc_encrypt(K, src64,
(u64 *) walk.dst.virt.addr,
block_len,
(u64 *) walk.iv);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
static int cbc3_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
const u64 *K;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
while ((nbytes = walk.nbytes)) {
unsigned int block_len = nbytes & DES_BLOCK_MASK;
if (likely(block_len)) {
const u64 *src64 = (const u64 *)walk.src.virt.addr;
des3_ede_sparc64_cbc_decrypt(K, src64,
(u64 *) walk.dst.virt.addr,
block_len,
(u64 *) walk.iv);
}
nbytes &= DES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
fprs_write(0);
return err;
}
static struct crypto_alg algs[] = { {
.cra_name = "des",
.cra_driver_name = "des-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = DES_KEY_SIZE,
.cia_max_keysize = DES_KEY_SIZE,
.cia_setkey = des_set_key,
.cia_encrypt = des_encrypt,
.cia_decrypt = des_decrypt
}
}
}, {
.cra_name = "ecb(des)",
.cra_driver_name = "ecb-des-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.setkey = des_set_key,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "cbc(des)",
.cra_driver_name = "cbc-des-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.setkey = des_set_key,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
.cra_alignmask = 7,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = DES3_EDE_KEY_SIZE,
.cia_max_keysize = DES3_EDE_KEY_SIZE,
.cia_setkey = des3_ede_set_key,
.cia_encrypt = des3_ede_encrypt,
.cia_decrypt = des3_ede_decrypt
}
}
}, {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "ecb-des3_ede-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.setkey = des3_ede_set_key,
.encrypt = ecb3_encrypt,
.decrypt = ecb3_decrypt,
},
},
}, {
.cra_name = "cbc(des3_ede)",
.cra_driver_name = "cbc-des3_ede-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
.cra_alignmask = 7,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.setkey = des3_ede_set_key,
.encrypt = cbc3_encrypt,
.decrypt = cbc3_decrypt,
},
},
} };
static bool __init sparc64_has_des_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_DES))
return false;
return true;
}
static int __init des_sparc64_mod_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(algs); i++)
INIT_LIST_HEAD(&algs[i].cra_list);
if (sparc64_has_des_opcode()) {
pr_info("Using sparc64 des opcodes optimized DES implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
}
pr_info("sparc64 des opcodes not available.\n");
return -ENODEV;
}
static void __exit des_sparc64_mod_fini(void)
{
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
}
module_init(des_sparc64_mod_init);
module_exit(des_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
MODULE_ALIAS("des");

View File

@@ -0,0 +1,70 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
ENTRY(md5_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntryHalf
ld [%o0 + 0x00], %f0
ld [%o0 + 0x04], %f1
andcc %o1, 0x7, %g0
ld [%o0 + 0x08], %f2
bne,pn %xcc, 10f
ld [%o0 + 0x0c], %f3
1:
ldd [%o1 + 0x00], %f8
ldd [%o1 + 0x08], %f10
ldd [%o1 + 0x10], %f12
ldd [%o1 + 0x18], %f14
ldd [%o1 + 0x20], %f16
ldd [%o1 + 0x28], %f18
ldd [%o1 + 0x30], %f20
ldd [%o1 + 0x38], %f22
MD5
subcc %o2, 1, %o2
bne,pt %xcc, 1b
add %o1, 0x40, %o1
5:
st %f0, [%o0 + 0x00]
st %f1, [%o0 + 0x04]
st %f2, [%o0 + 0x08]
st %f3, [%o0 + 0x0c]
retl
VISExitHalf
10:
alignaddr %o1, %g0, %o1
ldd [%o1 + 0x00], %f10
1:
ldd [%o1 + 0x08], %f12
ldd [%o1 + 0x10], %f14
ldd [%o1 + 0x18], %f16
ldd [%o1 + 0x20], %f18
ldd [%o1 + 0x28], %f20
ldd [%o1 + 0x30], %f22
ldd [%o1 + 0x38], %f24
ldd [%o1 + 0x40], %f26
faligndata %f10, %f12, %f8
faligndata %f12, %f14, %f10
faligndata %f14, %f16, %f12
faligndata %f16, %f18, %f14
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
MD5
subcc %o2, 1, %o2
fsrc2 %f26, %f10
bne,pt %xcc, 1b
add %o1, 0x40, %o1
ba,a,pt %xcc, 5b
ENDPROC(md5_sparc64_transform)

View File

@@ -0,0 +1,188 @@
/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
*
* This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
* and crypto/md5.c which are:
*
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Cryptoapi developers.
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/md5.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
unsigned int rounds);
static int md5_sparc64_init(struct shash_desc *desc)
{
struct md5_state *mctx = shash_desc_ctx(desc);
mctx->hash[0] = cpu_to_le32(0x67452301);
mctx->hash[1] = cpu_to_le32(0xefcdab89);
mctx->hash[2] = cpu_to_le32(0x98badcfe);
mctx->hash[3] = cpu_to_le32(0x10325476);
mctx->byte_count = 0;
return 0;
}
static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
sctx->byte_count += len;
if (partial) {
done = MD5_HMAC_BLOCK_SIZE - partial;
memcpy((u8 *)sctx->block + partial, data, done);
md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
}
if (len - done >= MD5_HMAC_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
md5_sparc64_transform(sctx->hash, data + done, rounds);
done += rounds * MD5_HMAC_BLOCK_SIZE;
}
memcpy(sctx->block, data + done, len - done);
}
static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct md5_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < MD5_HMAC_BLOCK_SIZE) {
sctx->byte_count += len;
memcpy((u8 *)sctx->block + partial, data, len);
} else
__md5_sparc64_update(sctx, data, len, partial);
return 0;
}
/* Add padding and return the message digest. */
static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
{
struct md5_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
u32 *dst = (u32 *)out;
__le64 bits;
static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_le64(sctx->byte_count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
/* We need to fill a whole block for __md5_sparc64_update() */
if (padlen <= 56) {
sctx->byte_count += padlen;
memcpy((u8 *)sctx->block + index, padding, padlen);
} else {
__md5_sparc64_update(sctx, padding, padlen, index);
}
__md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < MD5_HASH_WORDS; i++)
dst[i] = sctx->hash[i];
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int md5_sparc64_export(struct shash_desc *desc, void *out)
{
struct md5_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int md5_sparc64_import(struct shash_desc *desc, const void *in)
{
struct md5_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = MD5_DIGEST_SIZE,
.init = md5_sparc64_init,
.update = md5_sparc64_update,
.final = md5_sparc64_final,
.export = md5_sparc64_export,
.import = md5_sparc64_import,
.descsize = sizeof(struct md5_state),
.statesize = sizeof(struct md5_state),
.base = {
.cra_name = "md5",
.cra_driver_name= "md5-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static bool __init sparc64_has_md5_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_MD5))
return false;
return true;
}
static int __init md5_sparc64_mod_init(void)
{
if (sparc64_has_md5_opcode()) {
pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
return crypto_register_shash(&alg);
}
pr_info("sparc64 md5 opcode not available.\n");
return -ENODEV;
}
static void __exit md5_sparc64_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(md5_sparc64_mod_init);
module_exit(md5_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
MODULE_ALIAS("md5");

View File

@@ -0,0 +1,99 @@
#ifndef _OPCODES_H
#define _OPCODES_H
#define SPARC_CR_OPCODE_PRIORITY 300
#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5))
#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20)))
#define RS1(x) (FPD_ENCODE(x) << 14)
#define RS2(x) (FPD_ENCODE(x) << 0)
#define RS3(x) (FPD_ENCODE(x) << 9)
#define RD(x) (FPD_ENCODE(x) << 25)
#define IMM5_0(x) ((x) << 0)
#define IMM5_9(x) ((x) << 9)
#define CRC32C(a,b,c) \
.word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c));
#define MD5 \
.word 0x81b02800;
#define SHA1 \
.word 0x81b02820;
#define SHA256 \
.word 0x81b02840;
#define SHA512 \
.word 0x81b02860;
#define AES_EROUND01(a,b,c,d) \
.word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_EROUND23(a,b,c,d) \
.word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_DROUND01(a,b,c,d) \
.word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_DROUND23(a,b,c,d) \
.word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_EROUND01_L(a,b,c,d) \
.word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_EROUND23_L(a,b,c,d) \
.word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_DROUND01_L(a,b,c,d) \
.word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_DROUND23_L(a,b,c,d) \
.word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define AES_KEXPAND1(a,b,c,d) \
.word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d));
#define AES_KEXPAND0(a,b,c) \
.word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c));
#define AES_KEXPAND2(a,b,c) \
.word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
#define DES_IP(a,b) \
.word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
#define DES_IIP(a,b) \
.word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
#define DES_KEXPAND(a,b,c) \
.word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
#define DES_ROUND(a,b,c,d) \
.word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define CAMELLIA_F(a,b,c,d) \
.word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
#define CAMELLIA_FL(a,b,c) \
.word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
#define CAMELLIA_FLI(a,b,c) \
.word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
#define MOVDTOX_F0_O4 \
.word 0x99b02200
#define MOVDTOX_F2_O5 \
.word 0x9bb02202
#define MOVXTOD_G1_F60 \
.word 0xbbb02301
#define MOVXTOD_G1_F62 \
.word 0xbfb02301
#define MOVXTOD_G3_F4 \
.word 0x89b02303;
#define MOVXTOD_G7_F6 \
.word 0x8db02307;
#define MOVXTOD_G3_F0 \
.word 0x81b02303;
#define MOVXTOD_G7_F2 \
.word 0x85b02307;
#define MOVXTOD_O0_F0 \
.word 0x81b02308;
#define MOVXTOD_O5_F0 \
.word 0x81b0230d;
#define MOVXTOD_O5_F2 \
.word 0x85b0230d;
#define MOVXTOD_O5_F4 \
.word 0x89b0230d;
#define MOVXTOD_O5_F6 \
.word 0x8db0230d;
#define MOVXTOD_G3_F60 \
.word 0xbbb02303;
#define MOVXTOD_G7_F62 \
.word 0xbfb02307;
#endif /* _OPCODES_H */

View File

@@ -0,0 +1,72 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
ENTRY(sha1_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntryHalf
ld [%o0 + 0x00], %f0
ld [%o0 + 0x04], %f1
ld [%o0 + 0x08], %f2
andcc %o1, 0x7, %g0
ld [%o0 + 0x0c], %f3
bne,pn %xcc, 10f
ld [%o0 + 0x10], %f4
1:
ldd [%o1 + 0x00], %f8
ldd [%o1 + 0x08], %f10
ldd [%o1 + 0x10], %f12
ldd [%o1 + 0x18], %f14
ldd [%o1 + 0x20], %f16
ldd [%o1 + 0x28], %f18
ldd [%o1 + 0x30], %f20
ldd [%o1 + 0x38], %f22
SHA1
subcc %o2, 1, %o2
bne,pt %xcc, 1b
add %o1, 0x40, %o1
5:
st %f0, [%o0 + 0x00]
st %f1, [%o0 + 0x04]
st %f2, [%o0 + 0x08]
st %f3, [%o0 + 0x0c]
st %f4, [%o0 + 0x10]
retl
VISExitHalf
10:
alignaddr %o1, %g0, %o1
ldd [%o1 + 0x00], %f10
1:
ldd [%o1 + 0x08], %f12
ldd [%o1 + 0x10], %f14
ldd [%o1 + 0x18], %f16
ldd [%o1 + 0x20], %f18
ldd [%o1 + 0x28], %f20
ldd [%o1 + 0x30], %f22
ldd [%o1 + 0x38], %f24
ldd [%o1 + 0x40], %f26
faligndata %f10, %f12, %f8
faligndata %f12, %f14, %f10
faligndata %f14, %f16, %f12
faligndata %f16, %f18, %f14
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
SHA1
subcc %o2, 1, %o2
fsrc2 %f26, %f10
bne,pt %xcc, 1b
add %o1, 0x40, %o1
ba,a,pt %xcc, 5b
ENDPROC(sha1_sparc64_transform)

View File

@@ -0,0 +1,183 @@
/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes.
*
* This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
*
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
unsigned int rounds);
static int sha1_sparc64_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_sparc64_transform(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
}
static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
} else
__sha1_sparc64_update(sctx, data, len, partial);
return 0;
}
/* Add padding and return the message digest. */
static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
/* We need to fill a whole block for __sha1_sparc64_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_sparc64_update(sctx, padding, padlen, index);
}
__sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_sparc64_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_sparc64_init,
.update = sha1_sparc64_update,
.final = sha1_sparc64_final,
.export = sha1_sparc64_export,
.import = sha1_sparc64_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static bool __init sparc64_has_sha1_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_SHA1))
return false;
return true;
}
static int __init sha1_sparc64_mod_init(void)
{
if (sparc64_has_sha1_opcode()) {
pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n");
return crypto_register_shash(&alg);
}
pr_info("sparc64 sha1 opcode not available.\n");
return -ENODEV;
}
static void __exit sha1_sparc64_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_sparc64_mod_init);
module_exit(sha1_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
MODULE_ALIAS("sha1");

View File

@@ -0,0 +1,78 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
ENTRY(sha256_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntryHalf
ld [%o0 + 0x00], %f0
ld [%o0 + 0x04], %f1
ld [%o0 + 0x08], %f2
ld [%o0 + 0x0c], %f3
ld [%o0 + 0x10], %f4
ld [%o0 + 0x14], %f5
andcc %o1, 0x7, %g0
ld [%o0 + 0x18], %f6
bne,pn %xcc, 10f
ld [%o0 + 0x1c], %f7
1:
ldd [%o1 + 0x00], %f8
ldd [%o1 + 0x08], %f10
ldd [%o1 + 0x10], %f12
ldd [%o1 + 0x18], %f14
ldd [%o1 + 0x20], %f16
ldd [%o1 + 0x28], %f18
ldd [%o1 + 0x30], %f20
ldd [%o1 + 0x38], %f22
SHA256
subcc %o2, 1, %o2
bne,pt %xcc, 1b
add %o1, 0x40, %o1
5:
st %f0, [%o0 + 0x00]
st %f1, [%o0 + 0x04]
st %f2, [%o0 + 0x08]
st %f3, [%o0 + 0x0c]
st %f4, [%o0 + 0x10]
st %f5, [%o0 + 0x14]
st %f6, [%o0 + 0x18]
st %f7, [%o0 + 0x1c]
retl
VISExitHalf
10:
alignaddr %o1, %g0, %o1
ldd [%o1 + 0x00], %f10
1:
ldd [%o1 + 0x08], %f12
ldd [%o1 + 0x10], %f14
ldd [%o1 + 0x18], %f16
ldd [%o1 + 0x20], %f18
ldd [%o1 + 0x28], %f20
ldd [%o1 + 0x30], %f22
ldd [%o1 + 0x38], %f24
ldd [%o1 + 0x40], %f26
faligndata %f10, %f12, %f8
faligndata %f12, %f14, %f10
faligndata %f14, %f16, %f12
faligndata %f16, %f18, %f14
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
SHA256
subcc %o2, 1, %o2
fsrc2 %f26, %f10
bne,pt %xcc, 1b
add %o1, 0x40, %o1
ba,a,pt %xcc, 5b
ENDPROC(sha256_sparc64_transform)

View File

@@ -0,0 +1,241 @@
/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
*
* This is based largely upon crypto/sha256_generic.c
*
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
* SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
unsigned int rounds);
static int sha224_sparc64_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int sha256_sparc64_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
sctx->state[2] = SHA256_H2;
sctx->state[3] = SHA256_H3;
sctx->state[4] = SHA256_H4;
sctx->state[5] = SHA256_H5;
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
return 0;
}
static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha256_sparc64_transform(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA256_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
sha256_sparc64_transform(sctx->state, data + done, rounds);
done += rounds * SHA256_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
}
static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < SHA256_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buf + partial, data, len);
} else
__sha256_sparc64_update(sctx, data, len, partial);
return 0;
}
static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA256_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
/* We need to fill a whole block for __sha256_sparc64_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha256_sparc64_update(sctx, padding, padlen, index);
}
__sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_sparc64_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memset(D, 0, SHA256_DIGEST_SIZE);
return 0;
}
static int sha256_sparc64_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg sha256 = {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_sparc64_init,
.update = sha256_sparc64_update,
.final = sha256_sparc64_final,
.export = sha256_sparc64_export,
.import = sha256_sparc64_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name= "sha256-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static struct shash_alg sha224 = {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_sparc64_init,
.update = sha256_sparc64_update,
.final = sha224_sparc64_final,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name= "sha224-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static bool __init sparc64_has_sha256_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_SHA256))
return false;
return true;
}
static int __init sha256_sparc64_mod_init(void)
{
if (sparc64_has_sha256_opcode()) {
int ret = crypto_register_shash(&sha224);
if (ret < 0)
return ret;
ret = crypto_register_shash(&sha256);
if (ret < 0) {
crypto_unregister_shash(&sha224);
return ret;
}
pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
return 0;
}
pr_info("sparc64 sha256 opcode not available.\n");
return -ENODEV;
}
static void __exit sha256_sparc64_mod_fini(void)
{
crypto_unregister_shash(&sha224);
crypto_unregister_shash(&sha256);
}
module_init(sha256_sparc64_mod_init);
module_exit(sha256_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
MODULE_ALIAS("sha224");
MODULE_ALIAS("sha256");

View File

@@ -0,0 +1,102 @@
#include <linux/linkage.h>
#include <asm/visasm.h>
#include "opcodes.h"
ENTRY(sha512_sparc64_transform)
/* %o0 = digest, %o1 = data, %o2 = rounds */
VISEntry
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
ldd [%o0 + 0x10], %f4
ldd [%o0 + 0x18], %f6
ldd [%o0 + 0x20], %f8
ldd [%o0 + 0x28], %f10
andcc %o1, 0x7, %g0
ldd [%o0 + 0x30], %f12
bne,pn %xcc, 10f
ldd [%o0 + 0x38], %f14
1:
ldd [%o1 + 0x00], %f16
ldd [%o1 + 0x08], %f18
ldd [%o1 + 0x10], %f20
ldd [%o1 + 0x18], %f22
ldd [%o1 + 0x20], %f24
ldd [%o1 + 0x28], %f26
ldd [%o1 + 0x30], %f28
ldd [%o1 + 0x38], %f30
ldd [%o1 + 0x40], %f32
ldd [%o1 + 0x48], %f34
ldd [%o1 + 0x50], %f36
ldd [%o1 + 0x58], %f38
ldd [%o1 + 0x60], %f40
ldd [%o1 + 0x68], %f42
ldd [%o1 + 0x70], %f44
ldd [%o1 + 0x78], %f46
SHA512
subcc %o2, 1, %o2
bne,pt %xcc, 1b
add %o1, 0x80, %o1
5:
std %f0, [%o0 + 0x00]
std %f2, [%o0 + 0x08]
std %f4, [%o0 + 0x10]
std %f6, [%o0 + 0x18]
std %f8, [%o0 + 0x20]
std %f10, [%o0 + 0x28]
std %f12, [%o0 + 0x30]
std %f14, [%o0 + 0x38]
retl
VISExit
10:
alignaddr %o1, %g0, %o1
ldd [%o1 + 0x00], %f18
1:
ldd [%o1 + 0x08], %f20
ldd [%o1 + 0x10], %f22
ldd [%o1 + 0x18], %f24
ldd [%o1 + 0x20], %f26
ldd [%o1 + 0x28], %f28
ldd [%o1 + 0x30], %f30
ldd [%o1 + 0x38], %f32
ldd [%o1 + 0x40], %f34
ldd [%o1 + 0x48], %f36
ldd [%o1 + 0x50], %f38
ldd [%o1 + 0x58], %f40
ldd [%o1 + 0x60], %f42
ldd [%o1 + 0x68], %f44
ldd [%o1 + 0x70], %f46
ldd [%o1 + 0x78], %f48
ldd [%o1 + 0x80], %f50
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
faligndata %f26, %f28, %f24
faligndata %f28, %f30, %f26
faligndata %f30, %f32, %f28
faligndata %f32, %f34, %f30
faligndata %f34, %f36, %f32
faligndata %f36, %f38, %f34
faligndata %f38, %f40, %f36
faligndata %f40, %f42, %f38
faligndata %f42, %f44, %f40
faligndata %f44, %f46, %f42
faligndata %f46, %f48, %f44
faligndata %f48, %f50, %f46
SHA512
subcc %o2, 1, %o2
fsrc2 %f50, %f18
bne,pt %xcc, 1b
add %o1, 0x80, %o1
ba,a,pt %xcc, 5b
ENDPROC(sha512_sparc64_transform)

View File

@@ -0,0 +1,226 @@
/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes.
*
* This is based largely upon crypto/sha512_generic.c
*
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/pstate.h>
#include <asm/elf.h>
#include "opcodes.h"
asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
unsigned int rounds);
static int sha512_sparc64_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_sparc64_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
if ((sctx->count[0] += len) < len)
sctx->count[1]++;
if (partial) {
done = SHA512_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha512_sparc64_transform(sctx->state, sctx->buf, 1);
}
if (len - done >= SHA512_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
sha512_sparc64_transform(sctx->state, data + done, rounds);
done += rounds * SHA512_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
}
static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
/* Handle the fast case right here */
if (partial + len < SHA512_BLOCK_SIZE) {
if ((sctx->count[0] += len) < len)
sctx->count[1]++;
memcpy(sctx->buf + partial, data, len);
} else
__sha512_sparc64_update(sctx, data, len, partial);
return 0;
}
static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be64 *dst = (__be64 *)out;
__be64 bits[2];
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
/* Save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128 and append length */
index = sctx->count[0] % SHA512_BLOCK_SIZE;
padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
/* We need to fill a whole block for __sha512_sparc64_update() */
if (padlen <= 112) {
if ((sctx->count[0] += padlen) < padlen)
sctx->count[1]++;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha512_sparc64_update(sctx, padding, padlen, index);
}
__sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
{
u8 D[64];
sha512_sparc64_final(desc, D);
memcpy(hash, D, 48);
memset(D, 0, 64);
return 0;
}
static struct shash_alg sha512 = {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_sparc64_init,
.update = sha512_sparc64_update,
.final = sha512_sparc64_final,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name= "sha512-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static struct shash_alg sha384 = {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_sparc64_init,
.update = sha512_sparc64_update,
.final = sha384_sparc64_final,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name= "sha384-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static bool __init sparc64_has_sha512_opcode(void)
{
unsigned long cfr;
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
if (!(cfr & CFR_SHA512))
return false;
return true;
}
static int __init sha512_sparc64_mod_init(void)
{
if (sparc64_has_sha512_opcode()) {
int ret = crypto_register_shash(&sha384);
if (ret < 0)
return ret;
ret = crypto_register_shash(&sha512);
if (ret < 0) {
crypto_unregister_shash(&sha384);
return ret;
}
pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n");
return 0;
}
pr_info("sparc64 sha512 opcode not available.\n");
return -ENODEV;
}
static void __exit sha512_sparc64_mod_fini(void)
{
crypto_unregister_shash(&sha384);
crypto_unregister_shash(&sha512);
}
module_init(sha512_sparc64_mod_init);
module_exit(sha512_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
MODULE_ALIAS("sha384");
MODULE_ALIAS("sha512");

View File

@@ -141,7 +141,8 @@
/* SpitFire and later extended ASIs. The "(III)" marker designates
* UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
* Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
* ASIs, "(4V)" designates SUN4V specific ASIs.
* ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4
* and later ASIs.
*/
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
@@ -243,6 +244,7 @@
#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/
#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */
#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */
#define ASI_PIC 0xb0 /* (NG4) PIC registers */
#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */
#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */
#define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */

View File

@@ -86,6 +86,15 @@
#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */
#define AV_SPARC_ASI_CACHE_SPARING \
0x00800000 /* cache sparing ASIs available */
#define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */
#define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */
/* Solaris decided to enumerate every single crypto instruction type
* in the AT_HWCAP bits. This is wasteful, since if crypto is present,
* you still need to look in the CFR register to see if the opcode is
* really available. So we simply advertise only "crypto" support.
*/
#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */
#define CORE_DUMP_USE_REGSET

View File

@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
unsigned long len);
#endif
#define HV_FAST_VT_GET_PERFREG 0x184
#define HV_FAST_VT_SET_PERFREG 0x185
#ifndef __ASSEMBLY__
extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num,
unsigned long *reg_val);
extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
#define HV_GRP_NIU 0x0204
#define HV_GRP_VF_CPU 0x0205
#define HV_GRP_KT_CPU 0x0209
#define HV_GRP_VT_CPU 0x020c
#define HV_GRP_DIAG 0x0300
#ifndef __ASSEMBLY__

View File

@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
extern void mdesc_fill_in_cpu_data(cpumask_t *mask);
extern void mdesc_populate_present_mask(cpumask_t *mask);
extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask);
extern void sun4v_mdesc_init(void);

View File

@@ -2,8 +2,13 @@
#define __PCR_H
struct pcr_ops {
u64 (*read)(void);
void (*write)(u64);
u64 (*read_pcr)(unsigned long);
void (*write_pcr)(unsigned long, u64);
u64 (*read_pic)(unsigned long);
void (*write_pic)(unsigned long, u64);
u64 (*nmi_picl_value)(unsigned int nmi_hz);
u64 pcr_nmi_enable;
u64 pcr_nmi_disable;
};
extern const struct pcr_ops *pcr_ops;
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void);
#define PCR_N2_SL1_SHIFT 27
#define PCR_N2_OV1 0x80000000
extern unsigned int picl_shift;
/* In order to commonize as much of the implementation as
* possible, we use PICH as our counter. Mostly this is
* to accommodate Niagara-1 which can only count insn cycles
* in PICH.
*/
static inline u64 picl_value(unsigned int nmi_hz)
{
u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift);
return ((u64)((0 - delta) & 0xffffffff)) << 32;
}
extern u64 pcr_enable;
#define PCR_N4_OV 0x00000001 /* PIC overflow */
#define PCR_N4_TOE 0x00000002 /* Trap On Event */
#define PCR_N4_UTRACE 0x00000004 /* Trace user events */
#define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */
#define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */
#define PCR_N4_MASK 0x000007e0 /* Event mask */
#define PCR_N4_MASK_SHIFT 5
#define PCR_N4_SL 0x0000f800 /* Event Select */
#define PCR_N4_SL_SHIFT 11
#define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */
#define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */
#define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */
extern int pcr_arch_init(void);

View File

@@ -54,11 +54,6 @@ enum perfctr_opcode {
PERFCTR_GETPCR
};
/* I don't want the kernel's namespace to be polluted with this
* stuff when this file is included. --DaveM
*/
#ifndef __KERNEL__
#define PRIV 0x00000001
#define SYS 0x00000002
#define USR 0x00000004
@@ -168,29 +163,4 @@ struct vcounter_struct {
unsigned long long vcnt1;
};
#else /* !(__KERNEL__) */
#ifndef CONFIG_SPARC32
/* Performance counter register access. */
#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p))
#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
/* Blackbird errata workaround. See commentary in
* arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
* for more information.
*/
#define write_pic(__p) \
__asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
" nop\n\t" \
".align 64\n" \
"99:wr %0, 0x0, %%pic\n\t" \
"rd %%pic, %%g0" : : "r" (__p))
#define reset_pic() write_pic(0)
#endif /* !CONFIG_SPARC32 */
#endif /* !(__KERNEL__) */
#endif /* !(PERF_COUNTER_API) */

View File

@@ -88,4 +88,18 @@
#define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */
#define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/
/* Compatability Feature Register (%asr26), SPARC-T4 and later */
#define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */
#define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */
#define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */
#define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/
#define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */
#define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */
#define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */
#define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */
#define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */
#define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */
#define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */
#define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */
#endif /* !(_SPARC64_PSTATE_H) */

View File

@@ -559,10 +559,10 @@ niagara_tlb_fixup:
be,pt %xcc, niagara2_patch
nop
cmp %g1, SUN4V_CHIP_NIAGARA4
be,pt %xcc, niagara2_patch
be,pt %xcc, niagara4_patch
nop
cmp %g1, SUN4V_CHIP_NIAGARA5
be,pt %xcc, niagara2_patch
be,pt %xcc, niagara4_patch
nop
call generic_patch_copyops
@@ -573,6 +573,16 @@ niagara_tlb_fixup:
nop
ba,a,pt %xcc, 80f
niagara4_patch:
call niagara4_patch_copyops
nop
call niagara_patch_bzero
nop
call niagara4_patch_pageops
nop
ba,a,pt %xcc, 80f
niagara2_patch:
call niagara2_patch_copyops
nop

View File

@@ -45,6 +45,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_NIU, },
{ .group = HV_GRP_VF_CPU, },
{ .group = HV_GRP_KT_CPU, },
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
};

View File

@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)
retl
nop
ENDPROC(sun4v_reboot_data_set)
ENTRY(sun4v_vt_get_perfreg)
mov %o1, %o4
mov HV_FAST_VT_GET_PERFREG, %o5
ta HV_FAST_TRAP
stx %o1, [%o4]
retl
nop
ENDPROC(sun4v_vt_get_perfreg)
ENTRY(sun4v_vt_set_perfreg)
mov HV_FAST_VT_SET_PERFREG, %o5
ta HV_FAST_TRAP
retl
nop
ENDPROC(sun4v_vt_set_perfreg)

View File

@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
be,pn %xcc, kvmap_dtlb_longpath
2: sethi %hi(kpte_linear_bitmap), %g2
or %g2, %lo(kpte_linear_bitmap), %g2
/* Get the 256MB physical address index. */
sllx %g4, 21, %g5
mov 1, %g7
or %g2, %lo(kpte_linear_bitmap), %g2
srlx %g5, 21 + 28, %g5
and %g5, (32 - 1), %g7
/* Don't try this at home kids... this depends upon srlx
* only taking the low 6 bits of the shift count in %g5.
*/
sllx %g7, %g5, %g7
/* Divide by 64 to get the offset into the bitmask. */
srlx %g5, 6, %g5
/* Divide by 32 to get the offset into the bitmask. */
srlx %g5, 5, %g5
add %g7, %g7, %g7
sllx %g5, 3, %g5
/* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
/* kern_linear_pte_xor[(mask >> shift) & 3)] */
ldx [%g2 + %g5], %g2
andcc %g2, %g7, %g0
srlx %g2, %g7, %g7
sethi %hi(kern_linear_pte_xor), %g5
and %g7, 3, %g7
or %g5, %lo(kern_linear_pte_xor), %g5
bne,a,pt %xcc, 1f
add %g5, 8, %g5
1: ldx [%g5], %g2
sllx %g7, 3, %g7
ldx [%g5 + %g7], %g2
.globl kvmap_linear_patch
kvmap_linear_patch:

View File

@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}
void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
{
#ifdef CONFIG_PCI_DEBUG
printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
pci_name(dev));
#endif
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
}
/* in/out routines taken from pcic.c
*
* This probably belongs here rather than ioport.c because

View File

@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
}
static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
{
const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
unsigned long *pgsz_mask = arg;
u64 val;
val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
if (pgsz_prop)
val = *pgsz_prop;
if (!*pgsz_mask)
*pgsz_mask = val;
else
*pgsz_mask &= val;
return NULL;
}
void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
{
*pgsz_mask = 0;
mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
}
static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
{
const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);

View File

@@ -22,7 +22,6 @@
#include <asm/perf_event.h>
#include <asm/ptrace.h>
#include <asm/pcr.h>
#include <asm/perfctr.h>
#include "kstack.h"
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
touched = 1;
else
pcr_ops->write(PCR_PIC_PRIV);
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
sum = local_cpu_data().irq0_irqs;
if (__get_cpu_var(nmi_touch)) {
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
__this_cpu_write(alert_counter, 0);
}
if (__get_cpu_var(wd_enabled)) {
write_pic(picl_value(nmi_hz));
pcr_ops->write(pcr_enable);
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
restore_hardirq_stack(orig_sp);
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
void stop_nmi_watchdog(void *unused)
{
pcr_ops->write(PCR_PIC_PRIV);
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)
__get_cpu_var(wd_enabled) = 1;
atomic_inc(&nmi_active);
pcr_ops->write(PCR_PIC_PRIV);
write_pic(picl_value(nmi_hz));
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
pcr_ops->write(pcr_enable);
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
static void nmi_adjust_hz_one(void *unused)
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)
if (!__get_cpu_var(wd_enabled))
return;
pcr_ops->write(PCR_PIC_PRIV);
write_pic(picl_value(nmi_hz));
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
pcr_ops->write(pcr_enable);
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
}
void nmi_adjust_hz(unsigned int new_hz)

View File

@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
{
}
void pcibios_update_irq(struct pci_dev *pdev, int irq)
{
}
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size, resource_size_t align)
{

View File

@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
vdma[0], vdma[1]);
return -EINVAL;
};
}
dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
num_tsb_entries = vdma[1] / IO_PAGE_SIZE;

View File

@@ -13,23 +13,14 @@
#include <asm/pil.h>
#include <asm/pcr.h>
#include <asm/nmi.h>
#include <asm/asi.h>
#include <asm/spitfire.h>
#include <asm/perfctr.h>
/* This code is shared between various users of the performance
* counters. Users will be oprofile, pseudo-NMI watchdog, and the
* perf_event support layer.
*/
#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \
PCR_N2_TOE_OV1 | \
(2 << PCR_N2_SL1_SHIFT) | \
(0xff << PCR_N2_MASK1_SHIFT))
u64 pcr_enable;
unsigned int picl_shift;
/* Performance counter interrupts run unmasked at PIL level 15.
* Therefore we can't do things like wakeups and other work
* that expects IRQ disabling to be adhered to in locking etc.
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void)
const struct pcr_ops *pcr_ops;
EXPORT_SYMBOL_GPL(pcr_ops);
static u64 direct_pcr_read(void)
static u64 direct_pcr_read(unsigned long reg_num)
{
u64 val;
read_pcr(val);
WARN_ON_ONCE(reg_num != 0);
__asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
return val;
}
static void direct_pcr_write(u64 val)
static void direct_pcr_write(unsigned long reg_num, u64 val)
{
write_pcr(val);
WARN_ON_ONCE(reg_num != 0);
__asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
}
static u64 direct_pic_read(unsigned long reg_num)
{
u64 val;
WARN_ON_ONCE(reg_num != 0);
__asm__ __volatile__("rd %%pic, %0" : "=r" (val));
return val;
}
static void direct_pic_write(unsigned long reg_num, u64 val)
{
WARN_ON_ONCE(reg_num != 0);
/* Blackbird errata workaround. See commentary in
* arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
* for more information.
*/
__asm__ __volatile__("ba,pt %%xcc, 99f\n\t"
" nop\n\t"
".align 64\n"
"99:wr %0, 0x0, %%pic\n\t"
"rd %%pic, %%g0" : : "r" (val));
}
static u64 direct_picl_value(unsigned int nmi_hz)
{
u32 delta = local_cpu_data().clock_tick / nmi_hz;
return ((u64)((0 - delta) & 0xffffffff)) << 32;
}
static const struct pcr_ops direct_pcr_ops = {
.read = direct_pcr_read,
.write = direct_pcr_write,
.read_pcr = direct_pcr_read,
.write_pcr = direct_pcr_write,
.read_pic = direct_pic_read,
.write_pic = direct_pic_write,
.nmi_picl_value = direct_picl_value,
.pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
.pcr_nmi_disable = PCR_PIC_PRIV,
};
static void n2_pcr_write(u64 val)
static void n2_pcr_write(unsigned long reg_num, u64 val)
{
unsigned long ret;
WARN_ON_ONCE(reg_num != 0);
if (val & PCR_N2_HTRACE) {
ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
if (ret != HV_EOK)
write_pcr(val);
direct_pcr_write(reg_num, val);
} else
write_pcr(val);
direct_pcr_write(reg_num, val);
}
static u64 n2_picl_value(unsigned int nmi_hz)
{
u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
return ((u64)((0 - delta) & 0xffffffff)) << 32;
}
static const struct pcr_ops n2_pcr_ops = {
.read = direct_pcr_read,
.write = n2_pcr_write,
.read_pcr = direct_pcr_read,
.write_pcr = n2_pcr_write,
.read_pic = direct_pic_read,
.write_pic = direct_pic_write,
.nmi_picl_value = n2_picl_value,
.pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
PCR_N2_TOE_OV1 |
(2 << PCR_N2_SL1_SHIFT) |
(0xff << PCR_N2_MASK1_SHIFT)),
.pcr_nmi_disable = PCR_PIC_PRIV,
};
static u64 n4_pcr_read(unsigned long reg_num)
{
unsigned long val;
(void) sun4v_vt_get_perfreg(reg_num, &val);
return val;
}
static void n4_pcr_write(unsigned long reg_num, u64 val)
{
(void) sun4v_vt_set_perfreg(reg_num, val);
}
static u64 n4_pic_read(unsigned long reg_num)
{
unsigned long val;
__asm__ __volatile__("ldxa [%1] %2, %0"
: "=r" (val)
: "r" (reg_num * 0x8UL), "i" (ASI_PIC));
return val;
}
static void n4_pic_write(unsigned long reg_num, u64 val)
{
__asm__ __volatile__("stxa %0, [%1] %2"
: /* no outputs */
: "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
}
static u64 n4_picl_value(unsigned int nmi_hz)
{
u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
return ((u64)((0 - delta) & 0xffffffff));
}
static const struct pcr_ops n4_pcr_ops = {
.read_pcr = n4_pcr_read,
.write_pcr = n4_pcr_write,
.read_pic = n4_pic_read,
.write_pic = n4_pic_write,
.nmi_picl_value = n4_picl_value,
.pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
PCR_N4_UTRACE | PCR_N4_TOE |
(26 << PCR_N4_SL_SHIFT)),
.pcr_nmi_disable = PCR_N4_PICNPT,
};
static unsigned long perf_hsvc_group;
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_KT_CPU;
break;
case SUN4V_CHIP_NIAGARA4:
perf_hsvc_group = HV_GRP_VT_CPU;
break;
default:
return -ENODEV;
}
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)
sun4v_hvapi_unregister(perf_hsvc_group);
}
static int __init setup_sun4v_pcr_ops(void)
{
int ret = 0;
switch (sun4v_chip_type) {
case SUN4V_CHIP_NIAGARA1:
case SUN4V_CHIP_NIAGARA2:
case SUN4V_CHIP_NIAGARA3:
pcr_ops = &n2_pcr_ops;
break;
case SUN4V_CHIP_NIAGARA4:
pcr_ops = &n4_pcr_ops;
break;
default:
ret = -ENODEV;
break;
}
return ret;
}
int __init pcr_arch_init(void)
{
int err = register_perf_hsvc();
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void)
switch (tlb_type) {
case hypervisor:
pcr_ops = &n2_pcr_ops;
pcr_enable = PCR_N2_ENABLE;
picl_shift = 2;
err = setup_sun4v_pcr_ops();
if (err)
goto out_unregister;
break;
case cheetah:
case cheetah_plus:
pcr_ops = &direct_pcr_ops;
pcr_enable = PCR_SUN4U_ENABLE;
break;
case spitfire:

View File

@@ -25,36 +25,48 @@
#include <linux/atomic.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
#include <asm/perfctr.h>
#include <asm/cacheflush.h>
#include "kernel.h"
#include "kstack.h"
/* Sparc64 chips have two performance counters, 32-bits each, with
* overflow interrupts generated on transition from 0xffffffff to 0.
* The counters are accessed in one go using a 64-bit register.
/* Two classes of sparc64 chips currently exist. All of which have
* 32-bit counters which can generate overflow interrupts on the
* transition from 0xffffffff to 0.
*
* Both counters are controlled using a single control register. The
* only way to stop all sampling is to clear all of the context (user,
* supervisor, hypervisor) sampling enable bits. But these bits apply
* to both counters, thus the two counters can't be enabled/disabled
* individually.
* All chips upto and including SPARC-T3 have two performance
* counters. The two 32-bit counters are accessed in one go using a
* single 64-bit register.
*
* The control register has two event fields, one for each of the two
* counters. It's thus nearly impossible to have one counter going
* while keeping the other one stopped. Therefore it is possible to
* get overflow interrupts for counters not currently "in use" and
* that condition must be checked in the overflow interrupt handler.
* On these older chips both counters are controlled using a single
* control register. The only way to stop all sampling is to clear
* all of the context (user, supervisor, hypervisor) sampling enable
* bits. But these bits apply to both counters, thus the two counters
* can't be enabled/disabled individually.
*
* Furthermore, the control register on these older chips have two
* event fields, one for each of the two counters. It's thus nearly
* impossible to have one counter going while keeping the other one
* stopped. Therefore it is possible to get overflow interrupts for
* counters not currently "in use" and that condition must be checked
* in the overflow interrupt handler.
*
* So we use a hack, in that we program inactive counters with the
* "sw_count0" and "sw_count1" events. These count how many times
* the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
* unusual way to encode a NOP and therefore will not trigger in
* normal code.
*
* Starting with SPARC-T4 we have one control register per counter.
* And the counters are stored in individual registers. The registers
* for the counters are 64-bit but only a 32-bit counter is
* implemented. The event selections on SPARC-T4 lack any
* restrictions, therefore we can elide all of the complicated
* conflict resolution code we have for SPARC-T3 and earlier chips.
*/
#define MAX_HWEVENTS 2
#define MAX_HWEVENTS 4
#define MAX_PCRS 4
#define MAX_PERIOD ((1UL << 32) - 1)
#define PIC_UPPER_INDEX 0
@@ -90,8 +102,8 @@ struct cpu_hw_events {
*/
int current_idx[MAX_HWEVENTS];
/* Software copy of %pcr register on this cpu. */
u64 pcr;
/* Software copy of %pcr register(s) on this cpu. */
u64 pcr[MAX_HWEVENTS];
/* Enabled/disable state. */
int enabled;
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
/* An event map describes the characteristics of a performance
* counter event. In particular it gives the encoding as well as
* a mask telling which counters the event can be measured on.
*
* The mask is unused on SPARC-T4 and later.
*/
struct perf_event_map {
u16 encoding;
@@ -142,15 +156,53 @@ struct sparc_pmu {
const struct perf_event_map *(*event_map)(int);
const cache_map_t *cache_map;
int max_events;
u32 (*read_pmc)(int);
void (*write_pmc)(int, u64);
int upper_shift;
int lower_shift;
int event_mask;
int user_bit;
int priv_bit;
int hv_bit;
int irq_bit;
int upper_nop;
int lower_nop;
unsigned int flags;
#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
#define SPARC_PMU_HAS_CONFLICTS 0x00000002
int max_hw_events;
int num_pcrs;
int num_pic_regs;
};
static u32 sparc_default_read_pmc(int idx)
{
u64 val;
val = pcr_ops->read_pic(0);
if (idx == PIC_UPPER_INDEX)
val >>= 32;
return val & 0xffffffff;
}
static void sparc_default_write_pmc(int idx, u64 val)
{
u64 shift, mask, pic;
shift = 0;
if (idx == PIC_UPPER_INDEX)
shift = 32;
mask = ((u64) 0xffffffff) << shift;
val <<= shift;
pic = pcr_ops->read_pic(0);
pic &= ~mask;
pic |= val;
pcr_ops->write_pic(0, pic);
}
static const struct perf_event_map ultra3_perfmon_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {
.event_map = ultra3_event_map,
.cache_map = &ultra3_cache_map,
.max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
.read_pmc = sparc_default_read_pmc,
.write_pmc = sparc_default_write_pmc,
.upper_shift = 11,
.lower_shift = 4,
.event_mask = 0x3f,
.user_bit = PCR_UTRACE,
.priv_bit = PCR_STRACE,
.upper_nop = 0x1c,
.lower_nop = 0x14,
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
SPARC_PMU_HAS_CONFLICTS),
.max_hw_events = 2,
.num_pcrs = 1,
.num_pic_regs = 1,
};
/* Niagara1 is very limited. The upper PIC is hard-locked to count
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {
.event_map = niagara1_event_map,
.cache_map = &niagara1_cache_map,
.max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
.read_pmc = sparc_default_read_pmc,
.write_pmc = sparc_default_write_pmc,
.upper_shift = 0,
.lower_shift = 4,
.event_mask = 0x7,
.user_bit = PCR_UTRACE,
.priv_bit = PCR_STRACE,
.upper_nop = 0x0,
.lower_nop = 0x0,
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
SPARC_PMU_HAS_CONFLICTS),
.max_hw_events = 2,
.num_pcrs = 1,
.num_pic_regs = 1,
};
static const struct perf_event_map niagara2_perfmon_event_map[] = {
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {
.event_map = niagara2_event_map,
.cache_map = &niagara2_cache_map,
.max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
.read_pmc = sparc_default_read_pmc,
.write_pmc = sparc_default_write_pmc,
.upper_shift = 19,
.lower_shift = 6,
.event_mask = 0xfff,
.hv_bit = 0x8,
.user_bit = PCR_UTRACE,
.priv_bit = PCR_STRACE,
.hv_bit = PCR_N2_HTRACE,
.irq_bit = 0x30,
.upper_nop = 0x220,
.lower_nop = 0x220,
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
SPARC_PMU_HAS_CONFLICTS),
.max_hw_events = 2,
.num_pcrs = 1,
.num_pic_regs = 1,
};
static const struct perf_event_map niagara4_perfmon_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
[PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
[PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
[PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
[PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
};
static const struct perf_event_map *niagara4_event_map(int event_id)
{
return &niagara4_perfmon_event_map[event_id];
}
static const cache_map_t niagara4_cache_map = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
[C(RESULT_MISS)] = { (11 << 6) | 0x03 },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
[ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { (17 << 6) | 0x3f },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { (6 << 6) | 0x3f },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
},
},
};
static u32 sparc_vt_read_pmc(int idx)
{
u64 val = pcr_ops->read_pic(idx);
return val & 0xffffffff;
}
static void sparc_vt_write_pmc(int idx, u64 val)
{
u64 pcr;
/* There seems to be an internal latch on the overflow event
* on SPARC-T4 that prevents it from triggering unless you
* update the PIC exactly as we do here. The requirement
* seems to be that you have to turn off event counting in the
* PCR around the PIC update.
*
* For example, after the following sequence:
*
* 1) set PIC to -1
* 2) enable event counting and overflow reporting in PCR
* 3) overflow triggers, softint 15 handler invoked
* 4) clear OV bit in PCR
* 5) write PIC to -1
*
* a subsequent overflow event will not trigger. This
* sequence works on SPARC-T3 and previous chips.
*/
pcr = pcr_ops->read_pcr(idx);
pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
pcr_ops->write_pic(idx, val & 0xffffffff);
pcr_ops->write_pcr(idx, pcr);
}
static const struct sparc_pmu niagara4_pmu = {
.event_map = niagara4_event_map,
.cache_map = &niagara4_cache_map,
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
.read_pmc = sparc_vt_read_pmc,
.write_pmc = sparc_vt_write_pmc,
.upper_shift = 5,
.lower_shift = 5,
.event_mask = 0x7ff,
.user_bit = PCR_N4_UTRACE,
.priv_bit = PCR_N4_STRACE,
/* We explicitly don't support hypervisor tracing. The T4
* generates the overflow event for precise events via a trap
* which will not be generated (ie. it's completely lost) if
* we happen to be in the hypervisor when the event triggers.
* Essentially, the overflow event reporting is completely
* unusable when you have hypervisor mode tracing enabled.
*/
.hv_bit = 0,
.irq_bit = PCR_N4_TOE,
.upper_nop = 0,
.lower_nop = 0,
.flags = 0,
.max_hw_events = 4,
.num_pcrs = 4,
.num_pic_regs = 4,
};
static const struct sparc_pmu *sparc_pmu __read_mostly;
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)
static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 val, mask = mask_for_index(idx);
int pcr_index = 0;
val = cpuc->pcr;
if (sparc_pmu->num_pcrs > 1)
pcr_index = idx;
val = cpuc->pcr[pcr_index];
val &= ~mask;
val |= hwc->config;
cpuc->pcr = val;
cpuc->pcr[pcr_index] = val;
pcr_ops->write(cpuc->pcr);
pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
}
static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 mask = mask_for_index(idx);
u64 nop = nop_for_index(idx);
int pcr_index = 0;
u64 val;
val = cpuc->pcr;
if (sparc_pmu->num_pcrs > 1)
pcr_index = idx;
val = cpuc->pcr[pcr_index];
val &= ~mask;
val |= nop;
cpuc->pcr = val;
cpuc->pcr[pcr_index] = val;
pcr_ops->write(cpuc->pcr);
}
static u32 read_pmc(int idx)
{
u64 val;
read_pic(val);
if (idx == PIC_UPPER_INDEX)
val >>= 32;
return val & 0xffffffff;
}
static void write_pmc(int idx, u64 val)
{
u64 shift, mask, pic;
shift = 0;
if (idx == PIC_UPPER_INDEX)
shift = 32;
mask = ((u64) 0xffffffff) << shift;
val <<= shift;
read_pic(pic);
pic &= ~mask;
pic |= val;
write_pic(pic);
pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
}
static u64 sparc_perf_event_update(struct perf_event *event,
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,
again:
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = read_pmc(idx);
new_raw_count = sparc_pmu->read_pmc(idx);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,
local64_set(&hwc->prev_count, (u64)-left);
write_pmc(idx, (u64)(-left) & 0xffffffff);
sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
perf_event_update_userpage(event);
return ret;
}
/* If performance event entries have been added, move existing
* events around (if necessary) and then assign new entries to
* counters.
*/
static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
static void read_in_all_counters(struct cpu_hw_events *cpuc)
{
int i;
if (!cpuc->n_added)
goto out;
/* Read in the counters which are moving. */
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *cp = cpuc->event[i];
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
cpuc->current_idx[i] = PIC_NO_INDEX;
}
}
}
/* On this PMU all PICs are programmed using a single PCR. Calculate
* the combined control register value.
*
* For such chips we require that all of the events have the same
* configuration, so just fetch the settings from the first entry.
*/
static void calculate_single_pcr(struct cpu_hw_events *cpuc)
{
int i;
if (!cpuc->n_added)
goto out;
/* Assign to counters all unassigned events. */
for (i = 0; i < cpuc->n_events; i++) {
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
pcr &= ~mask_for_index(idx);
cpuc->pcr[0] &= ~mask_for_index(idx);
if (hwc->state & PERF_HES_STOPPED)
pcr |= nop_for_index(idx);
cpuc->pcr[0] |= nop_for_index(idx);
else
pcr |= event_encoding(enc, idx);
cpuc->pcr[0] |= event_encoding(enc, idx);
}
out:
return pcr;
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
/* On this PMU each PIC has it's own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
int i;
if (!cpuc->n_added)
goto out;
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[idx] &= ~mask_for_index(idx);
if (hwc->state & PERF_HES_STOPPED)
cpuc->pcr[idx] |= nop_for_index(idx);
else
cpuc->pcr[idx] |= event_encoding(enc, idx);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *cp = cpuc->event[i];
int idx = cp->hw.idx;
cpuc->pcr[idx] |= cp->hw.config_base;
}
}
/* If performance event entries have been added, move existing events
* around (if necessary) and then assign new entries to counters.
*/
static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
{
if (cpuc->n_added)
read_in_all_counters(cpuc);
if (sparc_pmu->num_pcrs == 1) {
calculate_single_pcr(cpuc);
} else {
calculate_multiple_pcrs(cpuc);
}
}
static void sparc_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 pcr;
int i;
if (cpuc->enabled)
return;
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)
cpuc->enabled = 1;
barrier();
pcr = cpuc->pcr;
if (!cpuc->n_events) {
pcr = 0;
} else {
pcr = maybe_change_configuration(cpuc, pcr);
if (cpuc->n_events)
update_pcrs_for_enable(cpuc);
/* We require that all of the events have the same
* configuration, so just fetch the settings from the
* first entry.
*/
cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
}
pcr_ops->write(cpuc->pcr);
for (i = 0; i < sparc_pmu->num_pcrs; i++)
pcr_ops->write_pcr(i, cpuc->pcr[i]);
}
static void sparc_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
int i;
if (!cpuc->enabled)
return;
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)
cpuc->enabled = 0;
cpuc->n_added = 0;
val = cpuc->pcr;
val &= ~(PCR_UTRACE | PCR_STRACE |
sparc_pmu->hv_bit | sparc_pmu->irq_bit);
cpuc->pcr = val;
for (i = 0; i < sparc_pmu->num_pcrs; i++) {
u64 val = cpuc->pcr[i];
pcr_ops->write(cpuc->pcr);
val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
sparc_pmu->hv_bit | sparc_pmu->irq_bit);
cpuc->pcr[i] = val;
pcr_ops->write_pcr(i, cpuc->pcr[i]);
}
}
static int active_event_index(struct cpu_hw_events *cpuc,
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);
static void perf_stop_nmi_watchdog(void *unused)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int i;
stop_nmi_watchdog(NULL);
cpuc->pcr = pcr_ops->read();
for (i = 0; i < sparc_pmu->num_pcrs; i++)
cpuc->pcr[i] = pcr_ops->read_pcr(i);
}
void perf_event_grab_pmc(void)
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,
if (!n_ev)
return 0;
if (n_ev > MAX_HWEVENTS)
if (n_ev > sparc_pmu->max_hw_events)
return -1;
if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
int i;
for (i = 0; i < n_ev; i++)
evts[i]->hw.idx = i;
return 0;
}
msk0 = perf_event_get_msk(events[0]);
if (n_ev == 1) {
if (msk0 & PIC_LOWER)
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
struct perf_event *event;
int i, n, first;
if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
return 0;
n = n_prev + n_new;
if (n <= 1)
return 0;
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= MAX_HWEVENTS)
if (n0 >= sparc_pmu->max_hw_events)
goto out;
cpuc->event[n0] = event;
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
/* We save the enable bits in the config_base. */
hwc->config_base = sparc_pmu->irq_bit;
if (!attr->exclude_user)
hwc->config_base |= PCR_UTRACE;
hwc->config_base |= sparc_pmu->user_bit;
if (!attr->exclude_kernel)
hwc->config_base |= PCR_STRACE;
hwc->config_base |= sparc_pmu->priv_bit;
if (!attr->exclude_hv)
hwc->config_base |= sparc_pmu->hv_bit;
n = 0;
if (event->group_leader != event) {
n = collect_events(event->group_leader,
MAX_HWEVENTS - 1,
sparc_pmu->max_hw_events - 1,
evts, events, current_idx_dmy);
if (n < 0)
return -EINVAL;
@@ -1254,8 +1557,7 @@ static struct pmu pmu = {
void perf_event_print_debug(void)
{
unsigned long flags;
u64 pcr, pic;
int cpu;
int cpu, i;
if (!sparc_pmu)
return;
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)
cpu = smp_processor_id();
pcr = pcr_ops->read();
read_pic(pic);
pr_info("\n");
pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
cpu, pcr, pic);
for (i = 0; i < sparc_pmu->num_pcrs; i++)
pr_info("CPU#%d: PCR%d[%016llx]\n",
cpu, i, pcr_ops->read_pcr(i));
for (i = 0; i < sparc_pmu->num_pic_regs; i++)
pr_info("CPU#%d: PIC%d[%016llx]\n",
cpu, i, pcr_ops->read_pic(i));
local_irq_restore(flags);
}
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
* Do this before we peek at the counters to determine
* overflow so we don't lose any events.
*/
if (sparc_pmu->irq_bit)
pcr_ops->write(cpuc->pcr);
if (sparc_pmu->irq_bit &&
sparc_pmu->num_pcrs == 1)
pcr_ops->write_pcr(0, cpuc->pcr[0]);
for (i = 0; i < cpuc->n_events; i++) {
struct perf_event *event = cpuc->event[i];
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
struct hw_perf_event *hwc;
u64 val;
if (sparc_pmu->irq_bit &&
sparc_pmu->num_pcrs > 1)
pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
hwc = &event->hw;
val = sparc_perf_event_update(event, hwc, idx);
if (val & (1ULL << 31))
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara2_pmu;
return true;
}
if (!strcmp(sparc_pmu_type, "niagara4")) {
sparc_pmu = &niagara4_pmu;
return true;
}
return false;
}

View File

@@ -340,7 +340,12 @@ static const char *hwcaps[] = {
*/
"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
"ima", "cspare",
"ima", "cspare", "pause", "cbcond",
};
static const char *crypto_hwcaps[] = {
"aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
"sha512", "mpmul", "montmul", "montsqr", "crc32c",
};
void cpucap_info(struct seq_file *m)
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)
printed++;
}
}
if (caps & HWCAP_SPARC_CRYPTO) {
unsigned long cfr;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
unsigned long bit = 1UL << i;
if (cfr & bit) {
seq_printf(m, "%s%s",
printed ? "," : "", crypto_hwcaps[i]);
printed++;
}
}
}
seq_putc(m, '\n');
}
static void __init report_one_hwcap(int *printed, const char *name)
{
if ((*printed) == 0)
printk(KERN_INFO "CPU CAPS: [");
printk(KERN_CONT "%s%s",
(*printed) ? "," : "", name);
if (++(*printed) == 8) {
printk(KERN_CONT "]\n");
*printed = 0;
}
}
static void __init report_crypto_hwcaps(int *printed)
{
unsigned long cfr;
int i;
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
unsigned long bit = 1UL << i;
if (cfr & bit)
report_one_hwcap(printed, crypto_hwcaps[i]);
}
}
static void __init report_hwcaps(unsigned long caps)
{
int i, printed = 0;
printk(KERN_INFO "CPU CAPS: [");
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
unsigned long bit = 1UL << i;
if (caps & bit) {
printk(KERN_CONT "%s%s",
printed ? "," : "", hwcaps[i]);
if (++printed == 8) {
printk(KERN_CONT "]\n");
printk(KERN_INFO "CPU CAPS: [");
printed = 0;
}
}
if (caps & bit)
report_one_hwcap(&printed, hwcaps[i]);
}
printk(KERN_CONT "]\n");
if (caps & HWCAP_SPARC_CRYPTO)
report_crypto_hwcaps(&printed);
if (printed != 0)
printk(KERN_CONT "]\n");
}
static unsigned long __init mdesc_cpu_hwcap_list(void)
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
break;
}
}
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
if (!strcmp(prop, crypto_hwcaps[i]))
caps |= HWCAP_SPARC_CRYPTO;
}
plen = strlen(prop) + 1;
prop += plen;

View File

@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
lib-$(CONFIG_SPARC64) += NG2patch.o
lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o

View File

@@ -0,0 +1,30 @@
/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
*
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/
#define EX_LD(x) \
98: x; \
.section __ex_table,"a";\
.align 4; \
.word 98b, __retl_one_asi;\
.text; \
.align 4;
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
#define FUNC_NAME NG4copy_from_user
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
#define EX_RETVAL(x) 0
#ifdef __KERNEL__
#define PREAMBLE \
rd %asi, %g1; \
cmp %g1, ASI_AIUS; \
bne,pn %icc, ___copy_in_user; \
nop
#endif
#include "NG4memcpy.S"

View File

@@ -0,0 +1,57 @@
/* NG4copy_page.S: Niagara-4 optimized copy page.
*
* Copyright (C) 2012 (davem@davemloft.net)
*/
#include <asm/asi.h>
#include <asm/page.h>
.text
.align 32
.register %g2, #scratch
.register %g3, #scratch
.globl NG4copy_user_page
NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
prefetch [%o1 + 0x000], #n_reads_strong
prefetch [%o1 + 0x040], #n_reads_strong
prefetch [%o1 + 0x080], #n_reads_strong
prefetch [%o1 + 0x0c0], #n_reads_strong
set PAGE_SIZE, %g7
prefetch [%o1 + 0x100], #n_reads_strong
prefetch [%o1 + 0x140], #n_reads_strong
prefetch [%o1 + 0x180], #n_reads_strong
prefetch [%o1 + 0x1c0], #n_reads_strong
1:
ldx [%o1 + 0x00], %o2
subcc %g7, 0x40, %g7
ldx [%o1 + 0x08], %o3
ldx [%o1 + 0x10], %o4
ldx [%o1 + 0x18], %o5
ldx [%o1 + 0x20], %g1
stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
ldx [%o1 + 0x28], %g2
stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
ldx [%o1 + 0x30], %g3
stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
ldx [%o1 + 0x38], %o2
add %o1, 0x40, %o1
stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
add %o0, 0x08, %o0
bne,pt %icc, 1b
prefetch [%o1 + 0x200], #n_reads_strong
retl
membar #StoreLoad | #StoreStore
.size NG4copy_user_page,.-NG4copy_user_page

View File

@@ -0,0 +1,39 @@
/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
*
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/
#define EX_ST(x) \
98: x; \
.section __ex_table,"a";\
.align 4; \
.word 98b, __retl_one_asi;\
.text; \
.align 4;
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
#endif
#define FUNC_NAME NG4copy_to_user
#define STORE(type,src,addr) type##a src, [addr] %asi
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
#define EX_RETVAL(x) 0
#ifdef __KERNEL__
/* Writing to %asi is _expensive_ so we hardcode it.
* Reading %asi to check for KERNEL_DS is comparatively
* cheap.
*/
#define PREAMBLE \
rd %asi, %g1; \
cmp %g1, ASI_AIUS; \
bne,pn %icc, ___copy_in_user; \
nop
#endif
#include "NG4memcpy.S"

360
arch/sparc/lib/NG4memcpy.S Normal file
View File

@@ -0,0 +1,360 @@
/* NG4memcpy.S: Niagara-4 optimized memcpy.
*
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
*/
#ifdef __KERNEL__
#include <asm/visasm.h>
#include <asm/asi.h>
#define GLOBAL_SPARE %g7
#else
#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
#define FPRS_FEF 0x04
/* On T4 it is very expensive to access ASRs like %fprs and
* %asi, avoiding a read or a write can save ~50 cycles.
*/
#define FPU_ENTER \
rd %fprs, %o5; \
andcc %o5, FPRS_FEF, %g0; \
be,a,pn %icc, 999f; \
wr %g0, FPRS_FEF, %fprs; \
999:
#ifdef MEMCPY_DEBUG
#define VISEntryHalf FPU_ENTER; \
clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#else
#define VISEntryHalf FPU_ENTER
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
#endif
#define GLOBAL_SPARE %g5
#endif
#ifndef STORE_ASI
#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
#else
#define STORE_ASI 0x80 /* ASI_P */
#endif
#endif
#ifndef EX_LD
#define EX_LD(x) x
#endif
#ifndef EX_ST
#define EX_ST(x) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif
#ifndef LOAD
#define LOAD(type,addr,dest) type [addr], dest
#endif
#ifndef STORE
#ifndef MEMCPY_DEBUG
#define STORE(type,src,addr) type src, [addr]
#else
#define STORE(type,src,addr) type##a src, [addr] %asi
#endif
#endif
#ifndef STORE_INIT
#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
#endif
#ifndef FUNC_NAME
#define FUNC_NAME NG4memcpy
#endif
#ifndef PREAMBLE
#define PREAMBLE
#endif
#ifndef XCC
#define XCC xcc
#endif
.register %g2,#scratch
.register %g3,#scratch
.text
.align 64
.globl FUNC_NAME
.type FUNC_NAME,#function
FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
#ifdef MEMCPY_DEBUG
wr %g0, 0x80, %asi
#endif
srlx %o2, 31, %g2
cmp %g2, 0
tne %XCC, 5
PREAMBLE
mov %o0, %o3
brz,pn %o2, .Lexit
cmp %o2, 3
ble,pn %icc, .Ltiny
cmp %o2, 19
ble,pn %icc, .Lsmall
or %o0, %o1, %g2
cmp %o2, 128
bl,pn %icc, .Lmedium
nop
.Llarge:/* len >= 0x80 */
/* First get dest 8 byte aligned. */
sub %g0, %o0, %g1
and %g1, 0x7, %g1
brz,pt %g1, 51f
sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
EX_ST(STORE(stb, %g2, %o0 - 0x01))
51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
/* Check if we can use the straight fully aligned
* loop, or we require the alignaddr/faligndata variant.
*/
andcc %o1, 0x7, %o5
bne,pn %icc, .Llarge_src_unaligned
sub %g0, %o0, %g1
/* Legitimize the use of initializing stores by getting dest
* to be 64-byte aligned.
*/
and %g1, 0x3f, %g1
brz,pt %g1, .Llarge_aligned
sub %o2, %g1, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
add %o1, 8, %o1
subcc %g1, 8, %g1
add %o0, 8, %o0
bne,pt %icc, 1b
EX_ST(STORE(stx, %g2, %o0 - 0x08))
.Llarge_aligned:
/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
andn %o2, 0x3f, %o4
sub %o2, %o4, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
add %o1, 0x40, %o1
EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
subcc %o4, 0x40, %o4
EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
EX_ST(STORE_INIT(%g1, %o0))
add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g2, %o0))
add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
EX_ST(STORE_INIT(%g3, %o0))
add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
add %o0, 0x08, %o0
EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
EX_ST(STORE_INIT(%o5, %o0))
add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g2, %o0))
add %o0, 0x08, %o0
EX_ST(STORE_INIT(%g3, %o0))
add %o0, 0x08, %o0
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
add %o0, 0x08, %o0
bne,pt %icc, 1b
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
membar #StoreLoad | #StoreStore
brz,pn %o2, .Lexit
cmp %o2, 19
ble,pn %icc, .Lsmall_unaligned
nop
ba,a,pt %icc, .Lmedium_noprefetch
.Lexit: retl
mov EX_RETVAL(%o3), %o0
.Llarge_src_unaligned:
andn %o2, 0x3f, %o4
sub %o2, %o4, %o2
VISEntryHalf
alignaddr %o1, %g0, %g1
add %o1, %o4, %o1
EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
subcc %o4, 0x40, %o4
EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
faligndata %f0, %f2, %f16
EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
faligndata %f2, %f4, %f18
add %g1, 0x40, %g1
faligndata %f4, %f6, %f20
faligndata %f6, %f8, %f22
faligndata %f8, %f10, %f24
faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
EX_ST(STORE(std, %f16, %o0 + 0x00))
EX_ST(STORE(std, %f18, %o0 + 0x08))
EX_ST(STORE(std, %f20, %o0 + 0x10))
EX_ST(STORE(std, %f22, %o0 + 0x18))
EX_ST(STORE(std, %f24, %o0 + 0x20))
EX_ST(STORE(std, %f26, %o0 + 0x28))
EX_ST(STORE(std, %f28, %o0 + 0x30))
EX_ST(STORE(std, %f30, %o0 + 0x38))
add %o0, 0x40, %o0
bne,pt %icc, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
VISExitHalf
brz,pn %o2, .Lexit
cmp %o2, 19
ble,pn %icc, .Lsmall_unaligned
nop
ba,a,pt %icc, .Lmedium_unaligned
.Lmedium:
LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
andcc %g2, 0x7, %g0
bne,pn %icc, .Lmedium_unaligned
nop
.Lmedium_noprefetch:
andncc %o2, 0x20 - 1, %o5
be,pn %icc, 2f
sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
add %o1, 0x20, %o1
subcc %o5, 0x20, %o5
EX_ST(STORE(stx, %g1, %o0 + 0x00))
EX_ST(STORE(stx, %g2, %o0 + 0x08))
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
EX_ST(STORE(stx, %o4, %o0 + 0x18))
bne,pt %icc, 1b
add %o0, 0x20, %o0
2: andcc %o2, 0x18, %o5
be,pt %icc, 3f
sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
add %o1, 0x08, %o1
add %o0, 0x08, %o0
subcc %o5, 0x08, %o5
bne,pt %icc, 1b
EX_ST(STORE(stx, %g1, %o0 - 0x08))
3: brz,pt %o2, .Lexit
cmp %o2, 0x04
bl,pn %icc, .Ltiny
nop
EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
add %o1, 0x04, %o1
add %o0, 0x04, %o0
subcc %o2, 0x04, %o2
bne,pn %icc, .Ltiny
EX_ST(STORE(stw, %g1, %o0 - 0x04))
ba,a,pt %icc, .Lexit
.Lmedium_unaligned:
/* First get dest 8 byte aligned. */
sub %g0, %o0, %g1
and %g1, 0x7, %g1
brz,pt %g1, 2f
sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
EX_ST(STORE(stb, %g2, %o0 - 0x01))
2:
and %o1, 0x7, %g1
brz,pn %g1, .Lmedium_noprefetch
sll %g1, 3, %g1
mov 64, %g2
sub %g2, %g1, %g2
andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
sllx %o4, %g1, %o4
andn %o2, 0x08 - 1, %o5
sub %o2, %o5, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
add %o1, 0x08, %o1
subcc %o5, 0x08, %o5
srlx %g3, %g2, GLOBAL_SPARE
or GLOBAL_SPARE, %o4, GLOBAL_SPARE
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
add %o0, 0x08, %o0
bne,pt %icc, 1b
sllx %g3, %g1, %o4
srl %g1, 3, %g1
add %o1, %g1, %o1
brz,pn %o2, .Lexit
nop
ba,pt %icc, .Lsmall_unaligned
.Ltiny:
EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
subcc %o2, 1, %o2
be,pn %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x00))
EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
subcc %o2, 1, %o2
be,pn %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x01))
EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
ba,pt %icc, .Lexit
EX_ST(STORE(stb, %g1, %o0 + 0x02))
.Lsmall:
andcc %g2, 0x3, %g0
bne,pn %icc, .Lsmall_unaligned
andn %o2, 0x4 - 1, %o5
sub %o2, %o5, %o2
1:
EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
add %o1, 0x04, %o1
subcc %o5, 0x04, %o5
add %o0, 0x04, %o0
bne,pt %icc, 1b
EX_ST(STORE(stw, %g1, %o0 - 0x04))
brz,pt %o2, .Lexit
nop
ba,a,pt %icc, .Ltiny
.Lsmall_unaligned:
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
add %o1, 1, %o1
add %o0, 1, %o0
subcc %o2, 1, %o2
bne,pt %icc, 1b
EX_ST(STORE(stb, %g1, %o0 - 0x01))
ba,a,pt %icc, .Lexit
.size FUNC_NAME, .-FUNC_NAME

43
arch/sparc/lib/NG4patch.S Normal file
View File

@@ -0,0 +1,43 @@
/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
*
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
sethi %hi(NEW), %g1; \
or %g1, %lo(NEW), %g1; \
sethi %hi(OLD), %g2; \
or %g2, %lo(OLD), %g2; \
sub %g1, %g2, %g1; \
sethi %hi(BRANCH_ALWAYS), %g3; \
sll %g1, 11, %g1; \
srl %g1, 11 + 2, %g1; \
or %g3, %lo(BRANCH_ALWAYS), %g3; \
or %g3, %g1, %g3; \
stw %g3, [%g2]; \
sethi %hi(NOP), %g3; \
or %g3, %lo(NOP), %g3; \
stw %g3, [%g2 + 0x4]; \
flush %g2;
.globl niagara4_patch_copyops
.type niagara4_patch_copyops,#function
niagara4_patch_copyops:
NG_DO_PATCH(memcpy, NG4memcpy)
NG_DO_PATCH(___copy_from_user, NG4copy_from_user)
NG_DO_PATCH(___copy_to_user, NG4copy_to_user)
retl
nop
.size niagara4_patch_copyops,.-niagara4_patch_copyops
.globl niagara4_patch_pageops
.type niagara4_patch_pageops,#function
niagara4_patch_pageops:
NG_DO_PATCH(copy_user_page, NG4copy_user_page)
NG_DO_PATCH(_clear_page, NGclear_page)
NG_DO_PATCH(clear_user_page, NGclear_user_page)
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops

View File

@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
restore
.align 32
.globl NGclear_page
.globl NGclear_user_page
NGclear_page: /* %o0=dest */
NGclear_user_page: /* %o0=dest, %o1=vaddr */
rd %asi, %g3

View File

@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page);
void VISenter(void);
EXPORT_SYMBOL(VISenter);
/* CRYPTO code needs this */
void VISenterhalf(void);
EXPORT_SYMBOL(VISenterhalf);
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
unsigned long *);

View File

@@ -51,22 +51,40 @@
#include "init_64.h"
unsigned long kern_linear_pte_xor[2] __read_mostly;
unsigned long kern_linear_pte_xor[4] __read_mostly;
/* A bitmap, one bit for every 256MB of physical memory. If the bit
* is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
* if set we should use a 256MB page (via kern_linear_pte_xor[1]).
/* A bitmap, two bits for every 256MB of physical memory. These two
* bits determine what page size we use for kernel linear
* translations. They form an index into kern_linear_pte_xor[]. The
* value in the indexed slot is XOR'd with the TLB miss virtual
* address to form the resulting TTE. The mapping is:
*
* 0 ==> 4MB
* 1 ==> 256MB
* 2 ==> 2GB
* 3 ==> 16GB
*
* All sun4v chips support 256MB pages. Only SPARC-T4 and later
* support 2GB pages, and hopefully future cpus will support the 16GB
* pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
* if these larger page sizes are not supported by the cpu.
*
* It would be nice to determine this from the machine description
* 'cpu' properties, but we need to have this table setup before the
* MDESC is initialized.
*/
unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
#ifndef CONFIG_DEBUG_PAGEALLOC
/* A special kernel TSB for 4MB and 256MB linear mappings.
* Space is allocated for this right after the trap table
* in arch/sparc64/kernel/head.S
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
* Space is allocated for this right after the trap table in
* arch/sparc64/kernel/head.S
*/
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
#endif
static unsigned long cpu_pgsz_mask;
#define MAX_BANKS 32
static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata;
@@ -403,6 +421,12 @@ EXPORT_SYMBOL(flush_icache_range);
void mmu_info(struct seq_file *m)
{
static const char *pgsz_strings[] = {
"8K", "64K", "512K", "4MB", "32MB",
"256MB", "2GB", "16GB",
};
int i, printed;
if (tlb_type == cheetah)
seq_printf(m, "MMU Type\t: Cheetah\n");
else if (tlb_type == cheetah_plus)
@@ -414,6 +438,17 @@ void mmu_info(struct seq_file *m)
else
seq_printf(m, "MMU Type\t: ???\n");
seq_printf(m, "MMU PGSZs\t: ");
printed = 0;
for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
if (cpu_pgsz_mask & (1UL << i)) {
seq_printf(m, "%s%s",
printed ? "," : "", pgsz_strings[i]);
printed++;
}
}
seq_putc(m, '\n');
#ifdef CONFIG_DEBUG_DCFLUSH
seq_printf(m, "DCPageFlushes\t: %d\n",
atomic_read(&dcpage_flushes));
@@ -1358,32 +1393,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
extern unsigned int kvmap_linear_patch[1];
#endif /* CONFIG_DEBUG_PAGEALLOC */
static void __init kpte_set_val(unsigned long index, unsigned long val)
{
unsigned long *ptr = kpte_linear_bitmap;
val <<= ((index % (BITS_PER_LONG / 2)) * 2);
ptr += (index / (BITS_PER_LONG / 2));
*ptr |= val;
}
static const unsigned long kpte_shift_min = 28; /* 256MB */
static const unsigned long kpte_shift_max = 34; /* 16GB */
static const unsigned long kpte_shift_incr = 3;
static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
unsigned long shift)
{
unsigned long size = (1UL << shift);
unsigned long mask = (size - 1UL);
unsigned long remains = end - start;
unsigned long val;
if (remains < size || (start & mask))
return start;
/* VAL maps:
*
* shift 28 --> kern_linear_pte_xor index 1
* shift 31 --> kern_linear_pte_xor index 2
* shift 34 --> kern_linear_pte_xor index 3
*/
val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
remains &= ~mask;
if (shift != kpte_shift_max)
remains = size;
while (remains) {
unsigned long index = start >> kpte_shift_min;
kpte_set_val(index, val);
start += 1UL << kpte_shift_min;
remains -= 1UL << kpte_shift_min;
}
return start;
}
static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
{
const unsigned long shift_256MB = 28;
const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
const unsigned long size_256MB = (1UL << shift_256MB);
unsigned long smallest_size, smallest_mask;
unsigned long s;
smallest_size = (1UL << kpte_shift_min);
smallest_mask = (smallest_size - 1UL);
while (start < end) {
long remains;
unsigned long orig_start = start;
remains = end - start;
if (remains < size_256MB)
break;
for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
start = kpte_mark_using_shift(start, end, s);
if (start & mask_256MB) {
start = (start + size_256MB) & ~mask_256MB;
continue;
if (start != orig_start)
break;
}
while (remains >= size_256MB) {
unsigned long index = start >> shift_256MB;
__set_bit(index, kpte_linear_bitmap);
start += size_256MB;
remains -= size_256MB;
}
if (start == orig_start)
start = (start + smallest_size) & ~smallest_mask;
}
}
@@ -1577,13 +1655,16 @@ static void __init sun4v_ktsb_init(void)
ktsb_descr[0].resv = 0;
#ifndef CONFIG_DEBUG_PAGEALLOC
/* Second KTSB for 4MB/256MB mappings. */
/* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
ktsb_pa = (kern_base +
((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
HV_PGSZ_MASK_256MB);
ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
HV_PGSZ_MASK_256MB |
HV_PGSZ_MASK_2GB |
HV_PGSZ_MASK_16GB) &
cpu_pgsz_mask);
ktsb_descr[1].assoc = 1;
ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
ktsb_descr[1].ctx_idx = 0;
@@ -1606,6 +1687,47 @@ void __cpuinit sun4v_ktsb_register(void)
}
}
static void __init sun4u_linear_pte_xor_finalize(void)
{
#ifndef CONFIG_DEBUG_PAGEALLOC
/* This is where we would add Panther support for
* 32MB and 256MB pages.
*/
#endif
}
static void __init sun4v_linear_pte_xor_finalize(void)
{
#ifndef CONFIG_DEBUG_PAGEALLOC
if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
0xfffff80000000000UL;
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
}
if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
0xfffff80000000000UL;
kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
}
if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
0xfffff80000000000UL;
kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
} else {
kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
}
#endif
}
/* paging_init() sets up the page tables */
static unsigned long last_valid_pfn;
@@ -1665,10 +1787,8 @@ void __init paging_init(void)
ktsb_phys_patch();
}
if (tlb_type == hypervisor) {
if (tlb_type == hypervisor)
sun4v_patch_tlb_handlers();
sun4v_ktsb_init();
}
/* Find available physical memory...
*
@@ -1727,9 +1847,6 @@ void __init paging_init(void)
__flush_tlb_all();
if (tlb_type == hypervisor)
sun4v_ktsb_register();
prom_build_devicetree();
of_populate_present_mask();
#ifndef CONFIG_SMP
@@ -1742,8 +1859,36 @@ void __init paging_init(void)
#ifndef CONFIG_SMP
mdesc_fill_in_cpu_data(cpu_all_mask);
#endif
mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
sun4v_linear_pte_xor_finalize();
sun4v_ktsb_init();
sun4v_ktsb_register();
} else {
unsigned long impl, ver;
cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
impl = ((ver >> 32) & 0xffff);
if (impl == PANTHER_IMPL)
cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
HV_PGSZ_MASK_256MB);
sun4u_linear_pte_xor_finalize();
}
/* Flush the TLBs and the 4M TSB so that the updated linear
* pte XOR settings are realized for all mappings.
*/
__flush_tlb_all();
#ifndef CONFIG_DEBUG_PAGEALLOC
memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
#endif
__flush_tlb_all();
/* Setup bootmem... */
last_valid_pfn = end_pfn = bootmem_init(phys_base);
@@ -2110,6 +2255,7 @@ static void __init sun4u_pgprot_init(void)
{
unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit;
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
_PAGE_CACHE_4U | _PAGE_P_4U |
@@ -2137,8 +2283,8 @@ static void __init sun4u_pgprot_init(void)
kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
_PAGE_P_4U | _PAGE_W_4U);
/* XXX Should use 256MB on Panther. XXX */
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
_PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
@@ -2164,6 +2310,7 @@ static void __init sun4v_pgprot_init(void)
{
unsigned long page_none, page_shared, page_copy, page_readonly;
unsigned long page_exec_bit;
int i;
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
_PAGE_CACHE_4V | _PAGE_P_4V |
@@ -2185,15 +2332,8 @@ static void __init sun4v_pgprot_init(void)
kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
#ifdef CONFIG_DEBUG_PAGEALLOC
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
0xfffff80000000000UL;
#else
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
0xfffff80000000000UL;
#endif
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V);

View File

@@ -8,12 +8,12 @@
#define MAX_PHYS_ADDRESS (1UL << 41UL)
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
#define KPTE_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
#define VALID_ADDR_BITMAP_BYTES \
((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
extern unsigned long kern_linear_pte_xor[2];
extern unsigned long kern_linear_pte_xor[4];
extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
extern unsigned int sparc64_highest_unlocked_tlb_ent;
extern unsigned long sparc64_kern_pri_context;

View File

@@ -464,8 +464,12 @@ void bpf_jit_compile(struct sk_filter *fp)
emit_alu_K(OR, K);
break;
case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
case BPF_S_ALU_XOR_X:
emit_alu_X(XOR);
break;
case BPF_S_ALU_XOR_K: /* A ^= K */
emit_alu_K(XOR, K);
break;
case BPF_S_ALU_LSH_X: /* A <<= X */
emit_alu_X(SLL);
break;