Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
There's a Niagara 2 memcpy fix in this tree and I have a Kconfig fix from Dave Jones which requires the sparc-next changes which went upstream yesterday. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -6,3 +6,4 @@ obj-y += kernel/
|
||||
obj-y += mm/
|
||||
obj-y += math-emu/
|
||||
obj-y += net/
|
||||
obj-y += crypto/
|
||||
|
25
arch/sparc/crypto/Makefile
Normal file
25
arch/sparc/crypto/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
|
||||
obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
|
||||
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
|
||||
obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
|
||||
|
||||
sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
|
||||
sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
|
||||
sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
|
||||
md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
|
||||
|
||||
aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
|
||||
des-sparc64-y := des_asm.o des_glue.o crop_devid.o
|
||||
camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
|
||||
|
||||
crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
|
1535
arch/sparc/crypto/aes_asm.S
Normal file
1535
arch/sparc/crypto/aes_asm.S
Normal file
File diff suppressed because it is too large
Load Diff
477
arch/sparc/crypto/aes_glue.c
Normal file
477
arch/sparc/crypto/aes_glue.c
Normal file
@@ -0,0 +1,477 @@
|
||||
/* Glue code for AES encryption optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon arch/x86/crypto/aesni-intel_glue.c
|
||||
*
|
||||
* Copyright (C) 2008, Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
|
||||
* interface for 64-bit kernels.
|
||||
* Authors: Adrian Hoban <adrian.hoban@intel.com>
|
||||
* Gabriele Paoloni <gabriele.paoloni@intel.com>
|
||||
* Tadeusz Struk (tadeusz.struk@intel.com)
|
||||
* Aidan O'Mahony (aidan.o.mahony@intel.com)
|
||||
* Copyright (c) 2010, Intel Corporation.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
#include <asm/fpumacro.h>
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
struct aes_ops {
|
||||
void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
|
||||
void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
|
||||
void (*load_encrypt_keys)(const u64 *key);
|
||||
void (*load_decrypt_keys)(const u64 *key);
|
||||
void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output,
|
||||
unsigned int len);
|
||||
void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output,
|
||||
unsigned int len);
|
||||
void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output,
|
||||
unsigned int len, u64 *iv);
|
||||
void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output,
|
||||
unsigned int len, u64 *iv);
|
||||
void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output,
|
||||
unsigned int len, u64 *iv);
|
||||
};
|
||||
|
||||
struct crypto_sparc64_aes_ctx {
|
||||
struct aes_ops *ops;
|
||||
u64 key[AES_MAX_KEYLENGTH / sizeof(u64)];
|
||||
u32 key_length;
|
||||
u32 expanded_key_length;
|
||||
};
|
||||
|
||||
extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
|
||||
extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input,
|
||||
u32 *output);
|
||||
|
||||
extern void aes_sparc64_load_encrypt_keys_128(const u64 *key);
|
||||
extern void aes_sparc64_load_encrypt_keys_192(const u64 *key);
|
||||
extern void aes_sparc64_load_encrypt_keys_256(const u64 *key);
|
||||
|
||||
extern void aes_sparc64_load_decrypt_keys_128(const u64 *key);
|
||||
extern void aes_sparc64_load_decrypt_keys_192(const u64 *key);
|
||||
extern void aes_sparc64_load_decrypt_keys_256(const u64 *key);
|
||||
|
||||
extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
|
||||
extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
|
||||
extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
struct aes_ops aes128_ops = {
|
||||
.encrypt = aes_sparc64_encrypt_128,
|
||||
.decrypt = aes_sparc64_decrypt_128,
|
||||
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_128,
|
||||
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_128,
|
||||
.ecb_encrypt = aes_sparc64_ecb_encrypt_128,
|
||||
.ecb_decrypt = aes_sparc64_ecb_decrypt_128,
|
||||
.cbc_encrypt = aes_sparc64_cbc_encrypt_128,
|
||||
.cbc_decrypt = aes_sparc64_cbc_decrypt_128,
|
||||
.ctr_crypt = aes_sparc64_ctr_crypt_128,
|
||||
};
|
||||
|
||||
struct aes_ops aes192_ops = {
|
||||
.encrypt = aes_sparc64_encrypt_192,
|
||||
.decrypt = aes_sparc64_decrypt_192,
|
||||
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_192,
|
||||
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_192,
|
||||
.ecb_encrypt = aes_sparc64_ecb_encrypt_192,
|
||||
.ecb_decrypt = aes_sparc64_ecb_decrypt_192,
|
||||
.cbc_encrypt = aes_sparc64_cbc_encrypt_192,
|
||||
.cbc_decrypt = aes_sparc64_cbc_decrypt_192,
|
||||
.ctr_crypt = aes_sparc64_ctr_crypt_192,
|
||||
};
|
||||
|
||||
struct aes_ops aes256_ops = {
|
||||
.encrypt = aes_sparc64_encrypt_256,
|
||||
.decrypt = aes_sparc64_decrypt_256,
|
||||
.load_encrypt_keys = aes_sparc64_load_encrypt_keys_256,
|
||||
.load_decrypt_keys = aes_sparc64_load_decrypt_keys_256,
|
||||
.ecb_encrypt = aes_sparc64_ecb_encrypt_256,
|
||||
.ecb_decrypt = aes_sparc64_ecb_decrypt_256,
|
||||
.cbc_encrypt = aes_sparc64_cbc_encrypt_256,
|
||||
.cbc_decrypt = aes_sparc64_cbc_decrypt_256,
|
||||
.ctr_crypt = aes_sparc64_ctr_crypt_256,
|
||||
};
|
||||
|
||||
extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key,
|
||||
unsigned int key_len);
|
||||
|
||||
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
ctx->expanded_key_length = 0xb0;
|
||||
ctx->ops = &aes128_ops;
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_192:
|
||||
ctx->expanded_key_length = 0xd0;
|
||||
ctx->ops = &aes192_ops;
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_256:
|
||||
ctx->expanded_key_length = 0xf0;
|
||||
ctx->ops = &aes256_ops;
|
||||
break;
|
||||
|
||||
default:
|
||||
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len);
|
||||
ctx->key_length = key_len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
|
||||
}
|
||||
|
||||
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
ctx->ops->load_encrypt_keys(&ctx->key[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & AES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
ctx->ops->ecb_encrypt(&ctx->key[0],
|
||||
(const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len);
|
||||
}
|
||||
nbytes &= AES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
u64 *key_end;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
ctx->ops->load_decrypt_keys(&ctx->key[0]);
|
||||
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & AES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
ctx->ops->ecb_decrypt(key_end,
|
||||
(const u64 *) walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr, block_len);
|
||||
}
|
||||
nbytes &= AES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
ctx->ops->load_encrypt_keys(&ctx->key[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & AES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
ctx->ops->cbc_encrypt(&ctx->key[0],
|
||||
(const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len, (u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= AES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
u64 *key_end;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
ctx->ops->load_decrypt_keys(&ctx->key[0]);
|
||||
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & AES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
ctx->ops->cbc_decrypt(key_end,
|
||||
(const u64 *) walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len, (u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= AES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_crypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
ctx->ops->load_encrypt_keys(&ctx->key[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & AES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
ctx->ops->ctr_crypt(&ctx->key[0],
|
||||
(const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len, (u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= AES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg algs[] = { {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(aes)",
|
||||
.cra_driver_name = "ecb-aes-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = aes_set_key,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(aes)",
|
||||
.cra_driver_name = "cbc-aes-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = aes_set_key,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.setkey = aes_set_key,
|
||||
.encrypt = ctr_crypt,
|
||||
.decrypt = ctr_crypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static bool __init sparc64_has_aes_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_AES))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init aes_sparc64_mod_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(algs); i++)
|
||||
INIT_LIST_HEAD(&algs[i].cra_list);
|
||||
|
||||
if (sparc64_has_aes_opcode()) {
|
||||
pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
|
||||
return crypto_register_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
pr_info("sparc64 aes opcodes not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit aes_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(aes_sparc64_mod_init);
|
||||
module_exit(aes_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("aes");
|
563
arch/sparc/crypto/camellia_asm.S
Normal file
563
arch/sparc/crypto/camellia_asm.S
Normal file
@@ -0,0 +1,563 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
|
||||
CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \
|
||||
CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \
|
||||
CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \
|
||||
CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \
|
||||
CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \
|
||||
CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
|
||||
|
||||
#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
|
||||
CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
|
||||
CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
|
||||
CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
|
||||
|
||||
.data
|
||||
|
||||
.align 8
|
||||
SIGMA: .xword 0xA09E667F3BCC908B
|
||||
.xword 0xB67AE8584CAA73B2
|
||||
.xword 0xC6EF372FE94F82BE
|
||||
.xword 0x54FF53A5F1D36F1C
|
||||
.xword 0x10E527FADE682D1D
|
||||
.xword 0xB05688C2B3E6C1FD
|
||||
|
||||
.text
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_key_expand)
|
||||
/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
|
||||
VISEntry
|
||||
ld [%o0 + 0x00], %f0 ! i0, k[0]
|
||||
ld [%o0 + 0x04], %f1 ! i1, k[1]
|
||||
ld [%o0 + 0x08], %f2 ! i2, k[2]
|
||||
ld [%o0 + 0x0c], %f3 ! i3, k[3]
|
||||
std %f0, [%o1 + 0x00] ! k[0, 1]
|
||||
fsrc2 %f0, %f28
|
||||
std %f2, [%o1 + 0x08] ! k[2, 3]
|
||||
cmp %o2, 16
|
||||
be 10f
|
||||
fsrc2 %f2, %f30
|
||||
|
||||
ld [%o0 + 0x10], %f0
|
||||
ld [%o0 + 0x14], %f1
|
||||
std %f0, [%o1 + 0x20] ! k[8, 9]
|
||||
cmp %o2, 24
|
||||
fone %f10
|
||||
be,a 1f
|
||||
fxor %f10, %f0, %f2
|
||||
ld [%o0 + 0x18], %f2
|
||||
ld [%o0 + 0x1c], %f3
|
||||
1:
|
||||
std %f2, [%o1 + 0x28] ! k[10, 11]
|
||||
fxor %f28, %f0, %f0
|
||||
fxor %f30, %f2, %f2
|
||||
|
||||
10:
|
||||
sethi %hi(SIGMA), %g3
|
||||
or %g3, %lo(SIGMA), %g3
|
||||
ldd [%g3 + 0x00], %f16
|
||||
ldd [%g3 + 0x08], %f18
|
||||
ldd [%g3 + 0x10], %f20
|
||||
ldd [%g3 + 0x18], %f22
|
||||
ldd [%g3 + 0x20], %f24
|
||||
ldd [%g3 + 0x28], %f26
|
||||
CAMELLIA_F(16, 2, 0, 2)
|
||||
CAMELLIA_F(18, 0, 2, 0)
|
||||
fxor %f28, %f0, %f0
|
||||
fxor %f30, %f2, %f2
|
||||
CAMELLIA_F(20, 2, 0, 2)
|
||||
CAMELLIA_F(22, 0, 2, 0)
|
||||
|
||||
#define ROTL128(S01, S23, TMP1, TMP2, N) \
|
||||
srlx S01, (64 - N), TMP1; \
|
||||
sllx S01, N, S01; \
|
||||
srlx S23, (64 - N), TMP2; \
|
||||
sllx S23, N, S23; \
|
||||
or S01, TMP2, S01; \
|
||||
or S23, TMP1, S23
|
||||
|
||||
cmp %o2, 16
|
||||
bne 1f
|
||||
nop
|
||||
/* 128-bit key */
|
||||
std %f0, [%o1 + 0x10] ! k[ 4, 5]
|
||||
std %f2, [%o1 + 0x18] ! k[ 6, 7]
|
||||
MOVDTOX_F0_O4
|
||||
MOVDTOX_F2_O5
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x30] ! k[12, 13]
|
||||
stx %o5, [%o1 + 0x38] ! k[14, 15]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x40] ! k[16, 17]
|
||||
stx %o5, [%o1 + 0x48] ! k[18, 19]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x60] ! k[24, 25]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x70] ! k[28, 29]
|
||||
stx %o5, [%o1 + 0x78] ! k[30, 31]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 34)
|
||||
stx %o4, [%o1 + 0xa0] ! k[40, 41]
|
||||
stx %o5, [%o1 + 0xa8] ! k[42, 43]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 17)
|
||||
stx %o4, [%o1 + 0xc0] ! k[48, 49]
|
||||
stx %o5, [%o1 + 0xc8] ! k[50, 51]
|
||||
|
||||
ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
|
||||
ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x20] ! k[ 8, 9]
|
||||
stx %o5, [%o1 + 0x28] ! k[10, 11]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 30)
|
||||
stx %o4, [%o1 + 0x50] ! k[20, 21]
|
||||
stx %o5, [%o1 + 0x58] ! k[22, 23]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o5, [%o1 + 0x68] ! k[26, 27]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 17)
|
||||
stx %o4, [%o1 + 0x80] ! k[32, 33]
|
||||
stx %o5, [%o1 + 0x88] ! k[34, 35]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 17)
|
||||
stx %o4, [%o1 + 0x90] ! k[36, 37]
|
||||
stx %o5, [%o1 + 0x98] ! k[38, 39]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 17)
|
||||
stx %o4, [%o1 + 0xb0] ! k[44, 45]
|
||||
stx %o5, [%o1 + 0xb8] ! k[46, 47]
|
||||
|
||||
ba,pt %xcc, 2f
|
||||
mov (3 * 16 * 4), %o0
|
||||
|
||||
1:
|
||||
/* 192-bit or 256-bit key */
|
||||
std %f0, [%o1 + 0x30] ! k[12, 13]
|
||||
std %f2, [%o1 + 0x38] ! k[14, 15]
|
||||
ldd [%o1 + 0x20], %f4 ! k[ 8, 9]
|
||||
ldd [%o1 + 0x28], %f6 ! k[10, 11]
|
||||
fxor %f0, %f4, %f0
|
||||
fxor %f2, %f6, %f2
|
||||
CAMELLIA_F(24, 2, 0, 2)
|
||||
CAMELLIA_F(26, 0, 2, 0)
|
||||
std %f0, [%o1 + 0x10] ! k[ 4, 5]
|
||||
std %f2, [%o1 + 0x18] ! k[ 6, 7]
|
||||
MOVDTOX_F0_O4
|
||||
MOVDTOX_F2_O5
|
||||
ROTL128(%o4, %o5, %g2, %g3, 30)
|
||||
stx %o4, [%o1 + 0x50] ! k[20, 21]
|
||||
stx %o5, [%o1 + 0x58] ! k[22, 23]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 30)
|
||||
stx %o4, [%o1 + 0xa0] ! k[40, 41]
|
||||
stx %o5, [%o1 + 0xa8] ! k[42, 43]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 51)
|
||||
stx %o4, [%o1 + 0x100] ! k[64, 65]
|
||||
stx %o5, [%o1 + 0x108] ! k[66, 67]
|
||||
ldx [%o1 + 0x20], %o4 ! k[ 8, 9]
|
||||
ldx [%o1 + 0x28], %o5 ! k[10, 11]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x20] ! k[ 8, 9]
|
||||
stx %o5, [%o1 + 0x28] ! k[10, 11]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x40] ! k[16, 17]
|
||||
stx %o5, [%o1 + 0x48] ! k[18, 19]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 30)
|
||||
stx %o4, [%o1 + 0x90] ! k[36, 37]
|
||||
stx %o5, [%o1 + 0x98] ! k[38, 39]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 34)
|
||||
stx %o4, [%o1 + 0xd0] ! k[52, 53]
|
||||
stx %o5, [%o1 + 0xd8] ! k[54, 55]
|
||||
ldx [%o1 + 0x30], %o4 ! k[12, 13]
|
||||
ldx [%o1 + 0x38], %o5 ! k[14, 15]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x30] ! k[12, 13]
|
||||
stx %o5, [%o1 + 0x38] ! k[14, 15]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 30)
|
||||
stx %o4, [%o1 + 0x70] ! k[28, 29]
|
||||
stx %o5, [%o1 + 0x78] ! k[30, 31]
|
||||
srlx %o4, 32, %g2
|
||||
srlx %o5, 32, %g3
|
||||
stw %o4, [%o1 + 0xc0] ! k[48]
|
||||
stw %g3, [%o1 + 0xc4] ! k[49]
|
||||
stw %o5, [%o1 + 0xc8] ! k[50]
|
||||
stw %g2, [%o1 + 0xcc] ! k[51]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 49)
|
||||
stx %o4, [%o1 + 0xe0] ! k[56, 57]
|
||||
stx %o5, [%o1 + 0xe8] ! k[58, 59]
|
||||
ldx [%o1 + 0x00], %o4 ! k[ 0, 1]
|
||||
ldx [%o1 + 0x08], %o5 ! k[ 2, 3]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 45)
|
||||
stx %o4, [%o1 + 0x60] ! k[24, 25]
|
||||
stx %o5, [%o1 + 0x68] ! k[26, 27]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 15)
|
||||
stx %o4, [%o1 + 0x80] ! k[32, 33]
|
||||
stx %o5, [%o1 + 0x88] ! k[34, 35]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 17)
|
||||
stx %o4, [%o1 + 0xb0] ! k[44, 45]
|
||||
stx %o5, [%o1 + 0xb8] ! k[46, 47]
|
||||
ROTL128(%o4, %o5, %g2, %g3, 34)
|
||||
stx %o4, [%o1 + 0xf0] ! k[60, 61]
|
||||
stx %o5, [%o1 + 0xf8] ! k[62, 63]
|
||||
mov (4 * 16 * 4), %o0
|
||||
2:
|
||||
add %o1, %o0, %o1
|
||||
ldd [%o1 + 0x00], %f0
|
||||
ldd [%o1 + 0x08], %f2
|
||||
std %f0, [%o3 + 0x00]
|
||||
std %f2, [%o3 + 0x08]
|
||||
add %o3, 0x10, %o3
|
||||
1:
|
||||
sub %o1, (16 * 4), %o1
|
||||
ldd [%o1 + 0x38], %f0
|
||||
ldd [%o1 + 0x30], %f2
|
||||
ldd [%o1 + 0x28], %f4
|
||||
ldd [%o1 + 0x20], %f6
|
||||
ldd [%o1 + 0x18], %f8
|
||||
ldd [%o1 + 0x10], %f10
|
||||
std %f0, [%o3 + 0x00]
|
||||
std %f2, [%o3 + 0x08]
|
||||
std %f4, [%o3 + 0x10]
|
||||
std %f6, [%o3 + 0x18]
|
||||
std %f8, [%o3 + 0x20]
|
||||
std %f10, [%o3 + 0x28]
|
||||
|
||||
ldd [%o1 + 0x08], %f0
|
||||
ldd [%o1 + 0x00], %f2
|
||||
std %f0, [%o3 + 0x30]
|
||||
std %f2, [%o3 + 0x38]
|
||||
subcc %o0, (16 * 4), %o0
|
||||
bne,pt %icc, 1b
|
||||
add %o3, (16 * 4), %o3
|
||||
|
||||
std %f2, [%o3 - 0x10]
|
||||
std %f0, [%o3 - 0x08]
|
||||
|
||||
retl
|
||||
VISExit
|
||||
ENDPROC(camellia_sparc64_key_expand)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_crypt)
|
||||
/* %o0=key, %o1=input, %o2=output, %o3=key_len */
|
||||
VISEntry
|
||||
|
||||
ld [%o1 + 0x00], %f0
|
||||
ld [%o1 + 0x04], %f1
|
||||
ld [%o1 + 0x08], %f2
|
||||
ld [%o1 + 0x0c], %f3
|
||||
|
||||
ldd [%o0 + 0x00], %f4
|
||||
ldd [%o0 + 0x08], %f6
|
||||
|
||||
cmp %o3, 16
|
||||
fxor %f4, %f0, %f0
|
||||
be 1f
|
||||
fxor %f6, %f2, %f2
|
||||
|
||||
ldd [%o0 + 0x10], %f8
|
||||
ldd [%o0 + 0x18], %f10
|
||||
ldd [%o0 + 0x20], %f12
|
||||
ldd [%o0 + 0x28], %f14
|
||||
ldd [%o0 + 0x30], %f16
|
||||
ldd [%o0 + 0x38], %f18
|
||||
ldd [%o0 + 0x40], %f20
|
||||
ldd [%o0 + 0x48], %f22
|
||||
add %o0, 0x40, %o0
|
||||
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
|
||||
1:
|
||||
ldd [%o0 + 0x10], %f8
|
||||
ldd [%o0 + 0x18], %f10
|
||||
ldd [%o0 + 0x20], %f12
|
||||
ldd [%o0 + 0x28], %f14
|
||||
ldd [%o0 + 0x30], %f16
|
||||
ldd [%o0 + 0x38], %f18
|
||||
ldd [%o0 + 0x40], %f20
|
||||
ldd [%o0 + 0x48], %f22
|
||||
ldd [%o0 + 0x50], %f24
|
||||
ldd [%o0 + 0x58], %f26
|
||||
ldd [%o0 + 0x60], %f28
|
||||
ldd [%o0 + 0x68], %f30
|
||||
ldd [%o0 + 0x70], %f32
|
||||
ldd [%o0 + 0x78], %f34
|
||||
ldd [%o0 + 0x80], %f36
|
||||
ldd [%o0 + 0x88], %f38
|
||||
ldd [%o0 + 0x90], %f40
|
||||
ldd [%o0 + 0x98], %f42
|
||||
ldd [%o0 + 0xa0], %f44
|
||||
ldd [%o0 + 0xa8], %f46
|
||||
ldd [%o0 + 0xb0], %f48
|
||||
ldd [%o0 + 0xb8], %f50
|
||||
ldd [%o0 + 0xc0], %f52
|
||||
ldd [%o0 + 0xc8], %f54
|
||||
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS(40, 0, 2)
|
||||
fxor %f52, %f2, %f2
|
||||
fxor %f54, %f0, %f0
|
||||
|
||||
st %f2, [%o2 + 0x00]
|
||||
st %f3, [%o2 + 0x04]
|
||||
st %f0, [%o2 + 0x08]
|
||||
st %f1, [%o2 + 0x0c]
|
||||
|
||||
retl
|
||||
VISExit
|
||||
ENDPROC(camellia_sparc64_crypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_load_keys)
|
||||
/* %o0=key, %o1=key_len */
|
||||
VISEntry
|
||||
ldd [%o0 + 0x00], %f4
|
||||
ldd [%o0 + 0x08], %f6
|
||||
ldd [%o0 + 0x10], %f8
|
||||
ldd [%o0 + 0x18], %f10
|
||||
ldd [%o0 + 0x20], %f12
|
||||
ldd [%o0 + 0x28], %f14
|
||||
ldd [%o0 + 0x30], %f16
|
||||
ldd [%o0 + 0x38], %f18
|
||||
ldd [%o0 + 0x40], %f20
|
||||
ldd [%o0 + 0x48], %f22
|
||||
ldd [%o0 + 0x50], %f24
|
||||
ldd [%o0 + 0x58], %f26
|
||||
ldd [%o0 + 0x60], %f28
|
||||
ldd [%o0 + 0x68], %f30
|
||||
ldd [%o0 + 0x70], %f32
|
||||
ldd [%o0 + 0x78], %f34
|
||||
ldd [%o0 + 0x80], %f36
|
||||
ldd [%o0 + 0x88], %f38
|
||||
ldd [%o0 + 0x90], %f40
|
||||
ldd [%o0 + 0x98], %f42
|
||||
ldd [%o0 + 0xa0], %f44
|
||||
ldd [%o0 + 0xa8], %f46
|
||||
ldd [%o0 + 0xb0], %f48
|
||||
ldd [%o0 + 0xb8], %f50
|
||||
ldd [%o0 + 0xc0], %f52
|
||||
retl
|
||||
ldd [%o0 + 0xc8], %f54
|
||||
ENDPROC(camellia_sparc64_load_keys)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key */
|
||||
1: ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f4, %f0, %f0
|
||||
fxor %f6, %f2, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS(40, 0, 2)
|
||||
fxor %f52, %f2, %f2
|
||||
fxor %f54, %f0, %f0
|
||||
std %f2, [%o1 + 0x00]
|
||||
std %f0, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
retl
|
||||
nop
|
||||
ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key */
|
||||
1: ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f4, %f0, %f0
|
||||
fxor %f6, %f2, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
ldd [%o3 + 0xd0], %f8
|
||||
ldd [%o3 + 0xd8], %f10
|
||||
ldd [%o3 + 0xe0], %f12
|
||||
ldd [%o3 + 0xe8], %f14
|
||||
ldd [%o3 + 0xf0], %f16
|
||||
ldd [%o3 + 0xf8], %f18
|
||||
ldd [%o3 + 0x100], %f20
|
||||
ldd [%o3 + 0x108], %f22
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
|
||||
CAMELLIA_F(8, 2, 0, 2)
|
||||
CAMELLIA_F(10, 0, 2, 0)
|
||||
ldd [%o3 + 0x10], %f8
|
||||
ldd [%o3 + 0x18], %f10
|
||||
CAMELLIA_F(12, 2, 0, 2)
|
||||
CAMELLIA_F(14, 0, 2, 0)
|
||||
ldd [%o3 + 0x20], %f12
|
||||
ldd [%o3 + 0x28], %f14
|
||||
CAMELLIA_F(16, 2, 0, 2)
|
||||
CAMELLIA_F(18, 0, 2, 0)
|
||||
ldd [%o3 + 0x30], %f16
|
||||
ldd [%o3 + 0x38], %f18
|
||||
fxor %f20, %f2, %f2
|
||||
fxor %f22, %f0, %f0
|
||||
ldd [%o3 + 0x40], %f20
|
||||
ldd [%o3 + 0x48], %f22
|
||||
std %f2, [%o1 + 0x00]
|
||||
std %f0, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
retl
|
||||
nop
|
||||
ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f60
|
||||
ldd [%o4 + 0x08], %f62
|
||||
1: ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f60, %f0, %f0
|
||||
fxor %f62, %f2, %f2
|
||||
fxor %f4, %f0, %f0
|
||||
fxor %f6, %f2, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS(40, 0, 2)
|
||||
fxor %f52, %f2, %f60
|
||||
fxor %f54, %f0, %f62
|
||||
std %f60, [%o1 + 0x00]
|
||||
std %f62, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
std %f60, [%o4 + 0x00]
|
||||
retl
|
||||
std %f62, [%o4 + 0x08]
|
||||
ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f60
|
||||
ldd [%o4 + 0x08], %f62
|
||||
1: ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f60, %f0, %f0
|
||||
fxor %f62, %f2, %f2
|
||||
fxor %f4, %f0, %f0
|
||||
fxor %f6, %f2, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
ldd [%o3 + 0xd0], %f8
|
||||
ldd [%o3 + 0xd8], %f10
|
||||
ldd [%o3 + 0xe0], %f12
|
||||
ldd [%o3 + 0xe8], %f14
|
||||
ldd [%o3 + 0xf0], %f16
|
||||
ldd [%o3 + 0xf8], %f18
|
||||
ldd [%o3 + 0x100], %f20
|
||||
ldd [%o3 + 0x108], %f22
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
|
||||
CAMELLIA_F(8, 2, 0, 2)
|
||||
CAMELLIA_F(10, 0, 2, 0)
|
||||
ldd [%o3 + 0x10], %f8
|
||||
ldd [%o3 + 0x18], %f10
|
||||
CAMELLIA_F(12, 2, 0, 2)
|
||||
CAMELLIA_F(14, 0, 2, 0)
|
||||
ldd [%o3 + 0x20], %f12
|
||||
ldd [%o3 + 0x28], %f14
|
||||
CAMELLIA_F(16, 2, 0, 2)
|
||||
CAMELLIA_F(18, 0, 2, 0)
|
||||
ldd [%o3 + 0x30], %f16
|
||||
ldd [%o3 + 0x38], %f18
|
||||
fxor %f20, %f2, %f60
|
||||
fxor %f22, %f0, %f62
|
||||
ldd [%o3 + 0x40], %f20
|
||||
ldd [%o3 + 0x48], %f22
|
||||
std %f60, [%o1 + 0x00]
|
||||
std %f62, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
std %f60, [%o4 + 0x00]
|
||||
retl
|
||||
std %f62, [%o4 + 0x08]
|
||||
ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f60
|
||||
ldd [%o4 + 0x08], %f62
|
||||
1: ldd [%o0 + 0x00], %f56
|
||||
ldd [%o0 + 0x08], %f58
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f4, %f56, %f0
|
||||
fxor %f6, %f58, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS(40, 0, 2)
|
||||
fxor %f52, %f2, %f2
|
||||
fxor %f54, %f0, %f0
|
||||
fxor %f60, %f2, %f2
|
||||
fxor %f62, %f0, %f0
|
||||
fsrc2 %f56, %f60
|
||||
fsrc2 %f58, %f62
|
||||
std %f2, [%o1 + 0x00]
|
||||
std %f0, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
std %f60, [%o4 + 0x00]
|
||||
retl
|
||||
std %f62, [%o4 + 0x08]
|
||||
ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
|
||||
|
||||
.align 32
|
||||
ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f60
|
||||
ldd [%o4 + 0x08], %f62
|
||||
1: ldd [%o0 + 0x00], %f56
|
||||
ldd [%o0 + 0x08], %f58
|
||||
add %o0, 0x10, %o0
|
||||
fxor %f4, %f56, %f0
|
||||
fxor %f6, %f58, %f2
|
||||
CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
|
||||
ldd [%o3 + 0xd0], %f8
|
||||
ldd [%o3 + 0xd8], %f10
|
||||
ldd [%o3 + 0xe0], %f12
|
||||
ldd [%o3 + 0xe8], %f14
|
||||
ldd [%o3 + 0xf0], %f16
|
||||
ldd [%o3 + 0xf8], %f18
|
||||
ldd [%o3 + 0x100], %f20
|
||||
ldd [%o3 + 0x108], %f22
|
||||
CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
|
||||
CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
|
||||
CAMELLIA_F(8, 2, 0, 2)
|
||||
CAMELLIA_F(10, 0, 2, 0)
|
||||
ldd [%o3 + 0x10], %f8
|
||||
ldd [%o3 + 0x18], %f10
|
||||
CAMELLIA_F(12, 2, 0, 2)
|
||||
CAMELLIA_F(14, 0, 2, 0)
|
||||
ldd [%o3 + 0x20], %f12
|
||||
ldd [%o3 + 0x28], %f14
|
||||
CAMELLIA_F(16, 2, 0, 2)
|
||||
CAMELLIA_F(18, 0, 2, 0)
|
||||
ldd [%o3 + 0x30], %f16
|
||||
ldd [%o3 + 0x38], %f18
|
||||
fxor %f20, %f2, %f2
|
||||
fxor %f22, %f0, %f0
|
||||
ldd [%o3 + 0x40], %f20
|
||||
ldd [%o3 + 0x48], %f22
|
||||
fxor %f60, %f2, %f2
|
||||
fxor %f62, %f0, %f0
|
||||
fsrc2 %f56, %f60
|
||||
fsrc2 %f58, %f62
|
||||
std %f2, [%o1 + 0x00]
|
||||
std %f0, [%o1 + 0x08]
|
||||
subcc %o2, 0x10, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x10, %o1
|
||||
std %f60, [%o4 + 0x00]
|
||||
retl
|
||||
std %f62, [%o4 + 0x08]
|
||||
ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
|
322
arch/sparc/crypto/camellia_glue.c
Normal file
322
arch/sparc/crypto/camellia_glue.c
Normal file
@@ -0,0 +1,322 @@
|
||||
/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
#include <asm/fpumacro.h>
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
#define CAMELLIA_MIN_KEY_SIZE 16
|
||||
#define CAMELLIA_MAX_KEY_SIZE 32
|
||||
#define CAMELLIA_BLOCK_SIZE 16
|
||||
#define CAMELLIA_TABLE_BYTE_LEN 272
|
||||
|
||||
struct camellia_sparc64_ctx {
|
||||
u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
|
||||
u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
|
||||
int key_len;
|
||||
};
|
||||
|
||||
extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key,
|
||||
unsigned int key_len, u64 *decrypt_key);
|
||||
|
||||
static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *in_key = (const u32 *) _in_key;
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
|
||||
if (key_len != 16 && key_len != 24 && key_len != 32) {
|
||||
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ctx->key_len = key_len;
|
||||
|
||||
camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0],
|
||||
key_len, &ctx->decrypt_key[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
|
||||
u32 *output, unsigned int key_len);
|
||||
|
||||
static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
camellia_sparc64_crypt(&ctx->encrypt_key[0],
|
||||
(const u32 *) src,
|
||||
(u32 *) dst, ctx->key_len);
|
||||
}
|
||||
|
||||
static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
camellia_sparc64_crypt(&ctx->decrypt_key[0],
|
||||
(const u32 *) src,
|
||||
(u32 *) dst, ctx->key_len);
|
||||
}
|
||||
|
||||
extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len);
|
||||
|
||||
typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
|
||||
const u64 *key);
|
||||
|
||||
extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
|
||||
extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
|
||||
|
||||
#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
|
||||
|
||||
static int __ecb_crypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes, bool encrypt)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
ecb_crypt_op *op;
|
||||
const u64 *key;
|
||||
int err;
|
||||
|
||||
op = camellia_sparc64_ecb_crypt_3_grand_rounds;
|
||||
if (ctx->key_len != 16)
|
||||
op = camellia_sparc64_ecb_crypt_4_grand_rounds;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
if (encrypt)
|
||||
key = &ctx->encrypt_key[0];
|
||||
else
|
||||
key = &ctx->decrypt_key[0];
|
||||
camellia_sparc64_load_keys(key, ctx->key_len);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64;
|
||||
u64 *dst64;
|
||||
|
||||
src64 = (const u64 *)walk.src.virt.addr;
|
||||
dst64 = (u64 *) walk.dst.virt.addr;
|
||||
op(src64, dst64, block_len, key);
|
||||
}
|
||||
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb_crypt(desc, dst, src, nbytes, true);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb_crypt(desc, dst, src, nbytes, false);
|
||||
}
|
||||
|
||||
typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
|
||||
const u64 *key, u64 *iv);
|
||||
|
||||
extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds;
|
||||
extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
|
||||
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
|
||||
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
cbc_crypt_op *op;
|
||||
const u64 *key;
|
||||
int err;
|
||||
|
||||
op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
|
||||
if (ctx->key_len != 16)
|
||||
op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
key = &ctx->encrypt_key[0];
|
||||
camellia_sparc64_load_keys(key, ctx->key_len);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64;
|
||||
u64 *dst64;
|
||||
|
||||
src64 = (const u64 *)walk.src.virt.addr;
|
||||
dst64 = (u64 *) walk.dst.virt.addr;
|
||||
op(src64, dst64, block_len, key,
|
||||
(u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
cbc_crypt_op *op;
|
||||
const u64 *key;
|
||||
int err;
|
||||
|
||||
op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
|
||||
if (ctx->key_len != 16)
|
||||
op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
key = &ctx->decrypt_key[0];
|
||||
camellia_sparc64_load_keys(key, ctx->key_len);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64;
|
||||
u64 *dst64;
|
||||
|
||||
src64 = (const u64 *)walk.src.virt.addr;
|
||||
dst64 = (u64 *) walk.dst.virt.addr;
|
||||
op(src64, dst64, block_len, key,
|
||||
(u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= CAMELLIA_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg algs[] = { {
|
||||
.cra_name = "camellia",
|
||||
.cra_driver_name = "camellia-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.cia_setkey = camellia_set_key,
|
||||
.cia_encrypt = camellia_encrypt,
|
||||
.cia_decrypt = camellia_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(camellia)",
|
||||
.cra_driver_name = "ecb-camellia-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_set_key,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(camellia)",
|
||||
.cra_driver_name = "cbc-camellia-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
|
||||
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
|
||||
.setkey = camellia_set_key,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_camellia_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_CAMELLIA))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init camellia_sparc64_mod_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(algs); i++)
|
||||
INIT_LIST_HEAD(&algs[i].cra_list);
|
||||
|
||||
if (sparc64_has_camellia_opcode()) {
|
||||
pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
|
||||
return crypto_register_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
pr_info("sparc64 camellia opcodes not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit camellia_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(camellia_sparc64_mod_init);
|
||||
module_exit(camellia_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("aes");
|
20
arch/sparc/crypto/crc32c_asm.S
Normal file
20
arch/sparc/crypto/crc32c_asm.S
Normal file
@@ -0,0 +1,20 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
#include <asm/asi.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
ENTRY(crc32c_sparc64)
|
||||
/* %o0=crc32p, %o1=data_ptr, %o2=len */
|
||||
VISEntryHalf
|
||||
lda [%o0] ASI_PL, %f1
|
||||
1: ldd [%o1], %f2
|
||||
CRC32C(0,2,0)
|
||||
subcc %o2, 8, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x8, %o1
|
||||
sta %f1, [%o0] ASI_PL
|
||||
VISExitHalf
|
||||
2: retl
|
||||
nop
|
||||
ENDPROC(crc32c_sparc64)
|
179
arch/sparc/crypto/crc32c_glue.c
Normal file
179
arch/sparc/crypto/crc32c_glue.c
Normal file
@@ -0,0 +1,179 @@
|
||||
/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon arch/x86/crypto/crc32c-intel.c
|
||||
*
|
||||
* Copyright (C) 2008 Intel Corporation
|
||||
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
|
||||
* Kent Liu <kent.liu@intel.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crc32.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
/*
|
||||
* Setting the seed allows arbitrary accumulators and flexible XOR policy
|
||||
* If your algorithm starts with ~0, then XOR with ~0 before you set
|
||||
* the seed.
|
||||
*/
|
||||
static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*(__le32 *)mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*crcp = *mctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
|
||||
|
||||
static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len)
|
||||
{
|
||||
unsigned int asm_len;
|
||||
|
||||
asm_len = len & ~7U;
|
||||
if (asm_len) {
|
||||
crc32c_sparc64(crcp, data, asm_len);
|
||||
data += asm_len / 8;
|
||||
len -= asm_len;
|
||||
}
|
||||
if (len)
|
||||
*crcp = __crc32c_le(*crcp, (const unsigned char *) data, len);
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
crc32c_compute(crcp, (const u64 *) data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
u32 tmp = *crcp;
|
||||
|
||||
crc32c_compute(&tmp, (const u64 *) data, len);
|
||||
|
||||
*(__le32 *) out = ~cpu_to_le32(tmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crcp = shash_desc_ctx(desc);
|
||||
|
||||
*(__le32 *) out = ~cpu_to_le32p(crcp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
|
||||
out);
|
||||
}
|
||||
|
||||
static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = ~0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.setkey = crc32c_sparc64_setkey,
|
||||
.init = crc32c_sparc64_init,
|
||||
.update = crc32c_sparc64_update,
|
||||
.final = crc32c_sparc64_final,
|
||||
.finup = crc32c_sparc64_finup,
|
||||
.digest = crc32c_sparc64_digest,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "crc32c",
|
||||
.cra_driver_name = "crc32c-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(u32),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32c_sparc64_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_crc32c_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_CRC32C))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init crc32c_sparc64_mod_init(void)
|
||||
{
|
||||
if (sparc64_has_crc32c_opcode()) {
|
||||
pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
pr_info("sparc64 crc32c opcode not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit crc32c_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crc32c_sparc64_mod_init);
|
||||
module_exit(crc32c_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("crc32c");
|
14
arch/sparc/crypto/crop_devid.c
Normal file
14
arch/sparc/crypto/crop_devid.c
Normal file
@@ -0,0 +1,14 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_device.h>
|
||||
|
||||
/* This is a dummy device table linked into all of the crypto
|
||||
* opcode drivers. It serves to trigger the module autoloading
|
||||
* mechanisms in userspace which scan the OF device tree and
|
||||
* load any modules which have device table entries that
|
||||
* match OF device nodes.
|
||||
*/
|
||||
static const struct of_device_id crypto_opcode_match[] = {
|
||||
{ .name = "cpu", .compatible = "sun4v", },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, crypto_opcode_match);
|
418
arch/sparc/crypto/des_asm.S
Normal file
418
arch/sparc/crypto/des_asm.S
Normal file
@@ -0,0 +1,418 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_key_expand)
|
||||
/* %o0=input_key, %o1=output_key */
|
||||
VISEntryHalf
|
||||
ld [%o0 + 0x00], %f0
|
||||
ld [%o0 + 0x04], %f1
|
||||
DES_KEXPAND(0, 0, 0)
|
||||
DES_KEXPAND(0, 1, 2)
|
||||
DES_KEXPAND(2, 3, 6)
|
||||
DES_KEXPAND(2, 2, 4)
|
||||
DES_KEXPAND(6, 3, 10)
|
||||
DES_KEXPAND(6, 2, 8)
|
||||
DES_KEXPAND(10, 3, 14)
|
||||
DES_KEXPAND(10, 2, 12)
|
||||
DES_KEXPAND(14, 1, 16)
|
||||
DES_KEXPAND(16, 3, 20)
|
||||
DES_KEXPAND(16, 2, 18)
|
||||
DES_KEXPAND(20, 3, 24)
|
||||
DES_KEXPAND(20, 2, 22)
|
||||
DES_KEXPAND(24, 3, 28)
|
||||
DES_KEXPAND(24, 2, 26)
|
||||
DES_KEXPAND(28, 1, 30)
|
||||
std %f0, [%o1 + 0x00]
|
||||
std %f2, [%o1 + 0x08]
|
||||
std %f4, [%o1 + 0x10]
|
||||
std %f6, [%o1 + 0x18]
|
||||
std %f8, [%o1 + 0x20]
|
||||
std %f10, [%o1 + 0x28]
|
||||
std %f12, [%o1 + 0x30]
|
||||
std %f14, [%o1 + 0x38]
|
||||
std %f16, [%o1 + 0x40]
|
||||
std %f18, [%o1 + 0x48]
|
||||
std %f20, [%o1 + 0x50]
|
||||
std %f22, [%o1 + 0x58]
|
||||
std %f24, [%o1 + 0x60]
|
||||
std %f26, [%o1 + 0x68]
|
||||
std %f28, [%o1 + 0x70]
|
||||
std %f30, [%o1 + 0x78]
|
||||
retl
|
||||
VISExitHalf
|
||||
ENDPROC(des_sparc64_key_expand)
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_crypt)
|
||||
/* %o0=key, %o1=input, %o2=output */
|
||||
VISEntry
|
||||
ldd [%o1 + 0x00], %f32
|
||||
ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
ldd [%o0 + 0x10], %f4
|
||||
ldd [%o0 + 0x18], %f6
|
||||
ldd [%o0 + 0x20], %f8
|
||||
ldd [%o0 + 0x28], %f10
|
||||
ldd [%o0 + 0x30], %f12
|
||||
ldd [%o0 + 0x38], %f14
|
||||
ldd [%o0 + 0x40], %f16
|
||||
ldd [%o0 + 0x48], %f18
|
||||
ldd [%o0 + 0x50], %f20
|
||||
ldd [%o0 + 0x58], %f22
|
||||
ldd [%o0 + 0x60], %f24
|
||||
ldd [%o0 + 0x68], %f26
|
||||
ldd [%o0 + 0x70], %f28
|
||||
ldd [%o0 + 0x78], %f30
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
DES_IIP(32, 32)
|
||||
std %f32, [%o2 + 0x00]
|
||||
retl
|
||||
VISExit
|
||||
ENDPROC(des_sparc64_crypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_load_keys)
|
||||
/* %o0=key */
|
||||
VISEntry
|
||||
ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
ldd [%o0 + 0x10], %f4
|
||||
ldd [%o0 + 0x18], %f6
|
||||
ldd [%o0 + 0x20], %f8
|
||||
ldd [%o0 + 0x28], %f10
|
||||
ldd [%o0 + 0x30], %f12
|
||||
ldd [%o0 + 0x38], %f14
|
||||
ldd [%o0 + 0x40], %f16
|
||||
ldd [%o0 + 0x48], %f18
|
||||
ldd [%o0 + 0x50], %f20
|
||||
ldd [%o0 + 0x58], %f22
|
||||
ldd [%o0 + 0x60], %f24
|
||||
ldd [%o0 + 0x68], %f26
|
||||
ldd [%o0 + 0x70], %f28
|
||||
retl
|
||||
ldd [%o0 + 0x78], %f30
|
||||
ENDPROC(des_sparc64_load_keys)
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_ecb_crypt)
|
||||
/* %o0=input, %o1=output, %o2=len */
|
||||
1: ldd [%o0 + 0x00], %f32
|
||||
add %o0, 0x08, %o0
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
DES_IIP(32, 32)
|
||||
std %f32, [%o1 + 0x00]
|
||||
subcc %o2, 0x08, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x08, %o1
|
||||
retl
|
||||
nop
|
||||
ENDPROC(des_sparc64_ecb_crypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_cbc_encrypt)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=IV */
|
||||
ldd [%o3 + 0x00], %f32
|
||||
1: ldd [%o0 + 0x00], %f34
|
||||
fxor %f32, %f34, %f32
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
DES_IIP(32, 32)
|
||||
std %f32, [%o1 + 0x00]
|
||||
add %o0, 0x08, %o0
|
||||
subcc %o2, 0x08, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x08, %o1
|
||||
retl
|
||||
std %f32, [%o3 + 0x00]
|
||||
ENDPROC(des_sparc64_cbc_encrypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des_sparc64_cbc_decrypt)
|
||||
/* %o0=input, %o1=output, %o2=len, %o3=IV */
|
||||
ldd [%o3 + 0x00], %f34
|
||||
1: ldd [%o0 + 0x00], %f36
|
||||
DES_IP(36, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
DES_IIP(32, 32)
|
||||
fxor %f32, %f34, %f32
|
||||
fsrc2 %f36, %f34
|
||||
std %f32, [%o1 + 0x00]
|
||||
add %o0, 0x08, %o0
|
||||
subcc %o2, 0x08, %o2
|
||||
bne,pt %icc, 1b
|
||||
add %o1, 0x08, %o1
|
||||
retl
|
||||
std %f36, [%o3 + 0x00]
|
||||
ENDPROC(des_sparc64_cbc_decrypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des3_ede_sparc64_crypt)
|
||||
/* %o0=key, %o1=input, %o2=output */
|
||||
VISEntry
|
||||
ldd [%o1 + 0x00], %f32
|
||||
ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
ldd [%o0 + 0x10], %f4
|
||||
ldd [%o0 + 0x18], %f6
|
||||
ldd [%o0 + 0x20], %f8
|
||||
ldd [%o0 + 0x28], %f10
|
||||
ldd [%o0 + 0x30], %f12
|
||||
ldd [%o0 + 0x38], %f14
|
||||
ldd [%o0 + 0x40], %f16
|
||||
ldd [%o0 + 0x48], %f18
|
||||
ldd [%o0 + 0x50], %f20
|
||||
ldd [%o0 + 0x58], %f22
|
||||
ldd [%o0 + 0x60], %f24
|
||||
ldd [%o0 + 0x68], %f26
|
||||
ldd [%o0 + 0x70], %f28
|
||||
ldd [%o0 + 0x78], %f30
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
ldd [%o0 + 0x80], %f0
|
||||
ldd [%o0 + 0x88], %f2
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
ldd [%o0 + 0x90], %f4
|
||||
ldd [%o0 + 0x98], %f6
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
ldd [%o0 + 0xa0], %f8
|
||||
ldd [%o0 + 0xa8], %f10
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
ldd [%o0 + 0xb0], %f12
|
||||
ldd [%o0 + 0xb8], %f14
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
ldd [%o0 + 0xc0], %f16
|
||||
ldd [%o0 + 0xc8], %f18
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
ldd [%o0 + 0xd0], %f20
|
||||
ldd [%o0 + 0xd8], %f22
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
ldd [%o0 + 0xe0], %f24
|
||||
ldd [%o0 + 0xe8], %f26
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
ldd [%o0 + 0xf0], %f28
|
||||
ldd [%o0 + 0xf8], %f30
|
||||
DES_IIP(32, 32)
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
ldd [%o0 + 0x100], %f0
|
||||
ldd [%o0 + 0x108], %f2
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
ldd [%o0 + 0x110], %f4
|
||||
ldd [%o0 + 0x118], %f6
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
ldd [%o0 + 0x120], %f8
|
||||
ldd [%o0 + 0x128], %f10
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
ldd [%o0 + 0x130], %f12
|
||||
ldd [%o0 + 0x138], %f14
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
ldd [%o0 + 0x140], %f16
|
||||
ldd [%o0 + 0x148], %f18
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
ldd [%o0 + 0x150], %f20
|
||||
ldd [%o0 + 0x158], %f22
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
ldd [%o0 + 0x160], %f24
|
||||
ldd [%o0 + 0x168], %f26
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
ldd [%o0 + 0x170], %f28
|
||||
ldd [%o0 + 0x178], %f30
|
||||
DES_IIP(32, 32)
|
||||
DES_IP(32, 32)
|
||||
DES_ROUND(0, 2, 32, 32)
|
||||
DES_ROUND(4, 6, 32, 32)
|
||||
DES_ROUND(8, 10, 32, 32)
|
||||
DES_ROUND(12, 14, 32, 32)
|
||||
DES_ROUND(16, 18, 32, 32)
|
||||
DES_ROUND(20, 22, 32, 32)
|
||||
DES_ROUND(24, 26, 32, 32)
|
||||
DES_ROUND(28, 30, 32, 32)
|
||||
DES_IIP(32, 32)
|
||||
|
||||
std %f32, [%o2 + 0x00]
|
||||
retl
|
||||
VISExit
|
||||
ENDPROC(des3_ede_sparc64_crypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des3_ede_sparc64_load_keys)
|
||||
/* %o0=key */
|
||||
VISEntry
|
||||
ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
ldd [%o0 + 0x10], %f4
|
||||
ldd [%o0 + 0x18], %f6
|
||||
ldd [%o0 + 0x20], %f8
|
||||
ldd [%o0 + 0x28], %f10
|
||||
ldd [%o0 + 0x30], %f12
|
||||
ldd [%o0 + 0x38], %f14
|
||||
ldd [%o0 + 0x40], %f16
|
||||
ldd [%o0 + 0x48], %f18
|
||||
ldd [%o0 + 0x50], %f20
|
||||
ldd [%o0 + 0x58], %f22
|
||||
ldd [%o0 + 0x60], %f24
|
||||
ldd [%o0 + 0x68], %f26
|
||||
ldd [%o0 + 0x70], %f28
|
||||
ldd [%o0 + 0x78], %f30
|
||||
ldd [%o0 + 0x80], %f32
|
||||
ldd [%o0 + 0x88], %f34
|
||||
ldd [%o0 + 0x90], %f36
|
||||
ldd [%o0 + 0x98], %f38
|
||||
ldd [%o0 + 0xa0], %f40
|
||||
ldd [%o0 + 0xa8], %f42
|
||||
ldd [%o0 + 0xb0], %f44
|
||||
ldd [%o0 + 0xb8], %f46
|
||||
ldd [%o0 + 0xc0], %f48
|
||||
ldd [%o0 + 0xc8], %f50
|
||||
ldd [%o0 + 0xd0], %f52
|
||||
ldd [%o0 + 0xd8], %f54
|
||||
ldd [%o0 + 0xe0], %f56
|
||||
retl
|
||||
ldd [%o0 + 0xe8], %f58
|
||||
ENDPROC(des3_ede_sparc64_load_keys)
|
||||
|
||||
#define DES3_LOOP_BODY(X) \
|
||||
DES_IP(X, X) \
|
||||
DES_ROUND(0, 2, X, X) \
|
||||
DES_ROUND(4, 6, X, X) \
|
||||
DES_ROUND(8, 10, X, X) \
|
||||
DES_ROUND(12, 14, X, X) \
|
||||
DES_ROUND(16, 18, X, X) \
|
||||
ldd [%o0 + 0xf0], %f16; \
|
||||
ldd [%o0 + 0xf8], %f18; \
|
||||
DES_ROUND(20, 22, X, X) \
|
||||
ldd [%o0 + 0x100], %f20; \
|
||||
ldd [%o0 + 0x108], %f22; \
|
||||
DES_ROUND(24, 26, X, X) \
|
||||
ldd [%o0 + 0x110], %f24; \
|
||||
ldd [%o0 + 0x118], %f26; \
|
||||
DES_ROUND(28, 30, X, X) \
|
||||
ldd [%o0 + 0x120], %f28; \
|
||||
ldd [%o0 + 0x128], %f30; \
|
||||
DES_IIP(X, X) \
|
||||
DES_IP(X, X) \
|
||||
DES_ROUND(32, 34, X, X) \
|
||||
ldd [%o0 + 0x130], %f0; \
|
||||
ldd [%o0 + 0x138], %f2; \
|
||||
DES_ROUND(36, 38, X, X) \
|
||||
ldd [%o0 + 0x140], %f4; \
|
||||
ldd [%o0 + 0x148], %f6; \
|
||||
DES_ROUND(40, 42, X, X) \
|
||||
ldd [%o0 + 0x150], %f8; \
|
||||
ldd [%o0 + 0x158], %f10; \
|
||||
DES_ROUND(44, 46, X, X) \
|
||||
ldd [%o0 + 0x160], %f12; \
|
||||
ldd [%o0 + 0x168], %f14; \
|
||||
DES_ROUND(48, 50, X, X) \
|
||||
DES_ROUND(52, 54, X, X) \
|
||||
DES_ROUND(56, 58, X, X) \
|
||||
DES_ROUND(16, 18, X, X) \
|
||||
ldd [%o0 + 0x170], %f16; \
|
||||
ldd [%o0 + 0x178], %f18; \
|
||||
DES_IIP(X, X) \
|
||||
DES_IP(X, X) \
|
||||
DES_ROUND(20, 22, X, X) \
|
||||
ldd [%o0 + 0x50], %f20; \
|
||||
ldd [%o0 + 0x58], %f22; \
|
||||
DES_ROUND(24, 26, X, X) \
|
||||
ldd [%o0 + 0x60], %f24; \
|
||||
ldd [%o0 + 0x68], %f26; \
|
||||
DES_ROUND(28, 30, X, X) \
|
||||
ldd [%o0 + 0x70], %f28; \
|
||||
ldd [%o0 + 0x78], %f30; \
|
||||
DES_ROUND(0, 2, X, X) \
|
||||
ldd [%o0 + 0x00], %f0; \
|
||||
ldd [%o0 + 0x08], %f2; \
|
||||
DES_ROUND(4, 6, X, X) \
|
||||
ldd [%o0 + 0x10], %f4; \
|
||||
ldd [%o0 + 0x18], %f6; \
|
||||
DES_ROUND(8, 10, X, X) \
|
||||
ldd [%o0 + 0x20], %f8; \
|
||||
ldd [%o0 + 0x28], %f10; \
|
||||
DES_ROUND(12, 14, X, X) \
|
||||
ldd [%o0 + 0x30], %f12; \
|
||||
ldd [%o0 + 0x38], %f14; \
|
||||
DES_ROUND(16, 18, X, X) \
|
||||
ldd [%o0 + 0x40], %f16; \
|
||||
ldd [%o0 + 0x48], %f18; \
|
||||
DES_IIP(X, X)
|
||||
|
||||
.align 32
|
||||
ENTRY(des3_ede_sparc64_ecb_crypt)
|
||||
/* %o0=key, %o1=input, %o2=output, %o3=len */
|
||||
1: ldd [%o1 + 0x00], %f60
|
||||
DES3_LOOP_BODY(60)
|
||||
std %f60, [%o2 + 0x00]
|
||||
subcc %o3, 0x08, %o3
|
||||
bne,pt %icc, 1b
|
||||
add %o2, 0x08, %o2
|
||||
retl
|
||||
nop
|
||||
ENDPROC(des3_ede_sparc64_ecb_crypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des3_ede_sparc64_cbc_encrypt)
|
||||
/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f60
|
||||
1: ldd [%o1 + 0x00], %f62
|
||||
fxor %f60, %f62, %f60
|
||||
DES3_LOOP_BODY(60)
|
||||
std %f60, [%o2 + 0x00]
|
||||
add %o1, 0x08, %o1
|
||||
subcc %o3, 0x08, %o3
|
||||
bne,pt %icc, 1b
|
||||
add %o2, 0x08, %o2
|
||||
retl
|
||||
std %f60, [%o4 + 0x00]
|
||||
ENDPROC(des3_ede_sparc64_cbc_encrypt)
|
||||
|
||||
.align 32
|
||||
ENTRY(des3_ede_sparc64_cbc_decrypt)
|
||||
/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
|
||||
ldd [%o4 + 0x00], %f62
|
||||
1: ldx [%o1 + 0x00], %g1
|
||||
MOVXTOD_G1_F60
|
||||
DES3_LOOP_BODY(60)
|
||||
fxor %f62, %f60, %f60
|
||||
MOVXTOD_G1_F62
|
||||
std %f60, [%o2 + 0x00]
|
||||
add %o1, 0x08, %o1
|
||||
subcc %o3, 0x08, %o3
|
||||
bne,pt %icc, 1b
|
||||
add %o2, 0x08, %o2
|
||||
retl
|
||||
stx %g1, [%o4 + 0x00]
|
||||
ENDPROC(des3_ede_sparc64_cbc_decrypt)
|
529
arch/sparc/crypto/des_glue.c
Normal file
529
arch/sparc/crypto/des_glue.c
Normal file
@@ -0,0 +1,529 @@
|
||||
/* Glue code for DES encryption optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/des.h>
|
||||
|
||||
#include <asm/fpumacro.h>
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
struct des_sparc64_ctx {
|
||||
u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
|
||||
u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
|
||||
};
|
||||
|
||||
struct des3_ede_sparc64_ctx {
|
||||
u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
|
||||
u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
|
||||
};
|
||||
|
||||
static void encrypt_to_decrypt(u64 *d, const u64 *e)
|
||||
{
|
||||
const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DES_EXPKEY_WORDS / 2; i++)
|
||||
*d++ = *s--;
|
||||
}
|
||||
|
||||
extern void des_sparc64_key_expand(const u32 *input_key, u64 *key);
|
||||
|
||||
static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
u32 tmp[DES_EXPKEY_WORDS];
|
||||
int ret;
|
||||
|
||||
/* Even though we have special instructions for key expansion,
|
||||
* we call des_ekey() so that we don't have to write our own
|
||||
* weak key detection code.
|
||||
*/
|
||||
ret = des_ekey(tmp, key);
|
||||
if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
|
||||
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
|
||||
encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void des_sparc64_crypt(const u64 *key, const u64 *input,
|
||||
u64 *output);
|
||||
|
||||
static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u64 *K = ctx->encrypt_expkey;
|
||||
|
||||
des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
|
||||
}
|
||||
|
||||
static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u64 *K = ctx->decrypt_expkey;
|
||||
|
||||
des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
|
||||
}
|
||||
|
||||
extern void des_sparc64_load_keys(const u64 *key);
|
||||
|
||||
extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
|
||||
unsigned int len);
|
||||
|
||||
#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
|
||||
|
||||
static int __ecb_crypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes, bool encrypt)
|
||||
{
|
||||
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
if (encrypt)
|
||||
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
|
||||
else
|
||||
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb_crypt(desc, dst, src, nbytes, true);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb_crypt(desc, dst, src, nbytes, false);
|
||||
}
|
||||
|
||||
extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
|
||||
unsigned int len, u64 *iv);
|
||||
|
||||
static int cbc_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len, (u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
|
||||
unsigned int len, u64 *iv);
|
||||
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len, (u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *K = (const u32 *)key;
|
||||
u32 *flags = &tfm->crt_flags;
|
||||
u64 k1[DES_EXPKEY_WORDS / 2];
|
||||
u64 k2[DES_EXPKEY_WORDS / 2];
|
||||
u64 k3[DES_EXPKEY_WORDS / 2];
|
||||
|
||||
if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
|
||||
!((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
|
||||
(*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
|
||||
*flags |= CRYPTO_TFM_RES_WEAK_KEY;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
des_sparc64_key_expand((const u32 *)key, k1);
|
||||
key += DES_KEY_SIZE;
|
||||
des_sparc64_key_expand((const u32 *)key, k2);
|
||||
key += DES_KEY_SIZE;
|
||||
des_sparc64_key_expand((const u32 *)key, k3);
|
||||
|
||||
memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1));
|
||||
encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]);
|
||||
memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
|
||||
&k3[0], sizeof(k3));
|
||||
|
||||
encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]);
|
||||
memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2],
|
||||
&k2[0], sizeof(k2));
|
||||
encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
|
||||
&k1[0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
|
||||
u64 *output);
|
||||
|
||||
static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u64 *K = ctx->encrypt_expkey;
|
||||
|
||||
des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
|
||||
}
|
||||
|
||||
static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u64 *K = ctx->decrypt_expkey;
|
||||
|
||||
des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
|
||||
}
|
||||
|
||||
extern void des3_ede_sparc64_load_keys(const u64 *key);
|
||||
|
||||
extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
|
||||
u64 *output, unsigned int len);
|
||||
|
||||
static int __ecb3_crypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes, bool encrypt)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
const u64 *K;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
if (encrypt)
|
||||
K = &ctx->encrypt_expkey[0];
|
||||
else
|
||||
K = &ctx->decrypt_expkey[0];
|
||||
des3_ede_sparc64_load_keys(K);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64 = (const u64 *)walk.src.virt.addr;
|
||||
des3_ede_sparc64_ecb_crypt(K, src64,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb3_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb3_crypt(desc, dst, src, nbytes, true);
|
||||
}
|
||||
|
||||
static int ecb3_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
return __ecb3_crypt(desc, dst, src, nbytes, false);
|
||||
}
|
||||
|
||||
extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
static int cbc3_encrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
const u64 *K;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
K = &ctx->encrypt_expkey[0];
|
||||
des3_ede_sparc64_load_keys(K);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64 = (const u64 *)walk.src.virt.addr;
|
||||
des3_ede_sparc64_cbc_encrypt(K, src64,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len,
|
||||
(u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
|
||||
u64 *output, unsigned int len,
|
||||
u64 *iv);
|
||||
|
||||
static int cbc3_decrypt(struct blkcipher_desc *desc,
|
||||
struct scatterlist *dst, struct scatterlist *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
struct blkcipher_walk walk;
|
||||
const u64 *K;
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt(desc, &walk);
|
||||
|
||||
K = &ctx->decrypt_expkey[0];
|
||||
des3_ede_sparc64_load_keys(K);
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
unsigned int block_len = nbytes & DES_BLOCK_MASK;
|
||||
|
||||
if (likely(block_len)) {
|
||||
const u64 *src64 = (const u64 *)walk.src.virt.addr;
|
||||
des3_ede_sparc64_cbc_decrypt(K, src64,
|
||||
(u64 *) walk.dst.virt.addr,
|
||||
block_len,
|
||||
(u64 *) walk.iv);
|
||||
}
|
||||
nbytes &= DES_BLOCK_SIZE - 1;
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
fprs_write(0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg algs[] = { {
|
||||
.cra_name = "des",
|
||||
.cra_driver_name = "des-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = DES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = DES_KEY_SIZE,
|
||||
.cia_max_keysize = DES_KEY_SIZE,
|
||||
.cia_setkey = des_set_key,
|
||||
.cia_encrypt = des_encrypt,
|
||||
.cia_decrypt = des_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(des)",
|
||||
.cra_driver_name = "ecb-des-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES_KEY_SIZE,
|
||||
.max_keysize = DES_KEY_SIZE,
|
||||
.setkey = des_set_key,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(des)",
|
||||
.cra_driver_name = "cbc-des-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES_KEY_SIZE,
|
||||
.max_keysize = DES_KEY_SIZE,
|
||||
.setkey = des_set_key,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "des3_ede",
|
||||
.cra_driver_name = "des3_ede-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.cia_max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.cia_setkey = des3_ede_set_key,
|
||||
.cia_encrypt = des3_ede_encrypt,
|
||||
.cia_decrypt = des3_ede_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "ecb(des3_ede)",
|
||||
.cra_driver_name = "ecb-des3_ede-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.setkey = des3_ede_set_key,
|
||||
.encrypt = ecb3_encrypt,
|
||||
.decrypt = ecb3_decrypt,
|
||||
},
|
||||
},
|
||||
}, {
|
||||
.cra_name = "cbc(des3_ede)",
|
||||
.cra_driver_name = "cbc-des3_ede-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = DES3_EDE_KEY_SIZE,
|
||||
.max_keysize = DES3_EDE_KEY_SIZE,
|
||||
.setkey = des3_ede_set_key,
|
||||
.encrypt = cbc3_encrypt,
|
||||
.decrypt = cbc3_decrypt,
|
||||
},
|
||||
},
|
||||
} };
|
||||
|
||||
static bool __init sparc64_has_des_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_DES))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init des_sparc64_mod_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(algs); i++)
|
||||
INIT_LIST_HEAD(&algs[i].cra_list);
|
||||
|
||||
if (sparc64_has_des_opcode()) {
|
||||
pr_info("Using sparc64 des opcodes optimized DES implementation\n");
|
||||
return crypto_register_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
pr_info("sparc64 des opcodes not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit des_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_algs(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(des_sparc64_mod_init);
|
||||
module_exit(des_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("des");
|
70
arch/sparc/crypto/md5_asm.S
Normal file
70
arch/sparc/crypto/md5_asm.S
Normal file
@@ -0,0 +1,70 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
ENTRY(md5_sparc64_transform)
|
||||
/* %o0 = digest, %o1 = data, %o2 = rounds */
|
||||
VISEntryHalf
|
||||
ld [%o0 + 0x00], %f0
|
||||
ld [%o0 + 0x04], %f1
|
||||
andcc %o1, 0x7, %g0
|
||||
ld [%o0 + 0x08], %f2
|
||||
bne,pn %xcc, 10f
|
||||
ld [%o0 + 0x0c], %f3
|
||||
|
||||
1:
|
||||
ldd [%o1 + 0x00], %f8
|
||||
ldd [%o1 + 0x08], %f10
|
||||
ldd [%o1 + 0x10], %f12
|
||||
ldd [%o1 + 0x18], %f14
|
||||
ldd [%o1 + 0x20], %f16
|
||||
ldd [%o1 + 0x28], %f18
|
||||
ldd [%o1 + 0x30], %f20
|
||||
ldd [%o1 + 0x38], %f22
|
||||
|
||||
MD5
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
5:
|
||||
st %f0, [%o0 + 0x00]
|
||||
st %f1, [%o0 + 0x04]
|
||||
st %f2, [%o0 + 0x08]
|
||||
st %f3, [%o0 + 0x0c]
|
||||
retl
|
||||
VISExitHalf
|
||||
10:
|
||||
alignaddr %o1, %g0, %o1
|
||||
|
||||
ldd [%o1 + 0x00], %f10
|
||||
1:
|
||||
ldd [%o1 + 0x08], %f12
|
||||
ldd [%o1 + 0x10], %f14
|
||||
ldd [%o1 + 0x18], %f16
|
||||
ldd [%o1 + 0x20], %f18
|
||||
ldd [%o1 + 0x28], %f20
|
||||
ldd [%o1 + 0x30], %f22
|
||||
ldd [%o1 + 0x38], %f24
|
||||
ldd [%o1 + 0x40], %f26
|
||||
|
||||
faligndata %f10, %f12, %f8
|
||||
faligndata %f12, %f14, %f10
|
||||
faligndata %f14, %f16, %f12
|
||||
faligndata %f16, %f18, %f14
|
||||
faligndata %f18, %f20, %f16
|
||||
faligndata %f20, %f22, %f18
|
||||
faligndata %f22, %f24, %f20
|
||||
faligndata %f24, %f26, %f22
|
||||
|
||||
MD5
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
fsrc2 %f26, %f10
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
ba,a,pt %xcc, 5b
|
||||
ENDPROC(md5_sparc64_transform)
|
188
arch/sparc/crypto/md5_glue.c
Normal file
188
arch/sparc/crypto/md5_glue.c
Normal file
@@ -0,0 +1,188 @@
|
||||
/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
|
||||
* and crypto/md5.c which are:
|
||||
*
|
||||
* Copyright (c) Alan Smithee.
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
||||
* Copyright (c) Mathias Krause <minipli@googlemail.com>
|
||||
* Copyright (c) Cryptoapi developers.
|
||||
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/md5.h>
|
||||
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
|
||||
unsigned int rounds);
|
||||
|
||||
static int md5_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct md5_state *mctx = shash_desc_ctx(desc);
|
||||
|
||||
mctx->hash[0] = cpu_to_le32(0x67452301);
|
||||
mctx->hash[1] = cpu_to_le32(0xefcdab89);
|
||||
mctx->hash[2] = cpu_to_le32(0x98badcfe);
|
||||
mctx->hash[3] = cpu_to_le32(0x10325476);
|
||||
mctx->byte_count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->byte_count += len;
|
||||
if (partial) {
|
||||
done = MD5_HMAC_BLOCK_SIZE - partial;
|
||||
memcpy((u8 *)sctx->block + partial, data, done);
|
||||
md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
|
||||
}
|
||||
if (len - done >= MD5_HMAC_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
|
||||
|
||||
md5_sparc64_transform(sctx->hash, data + done, rounds);
|
||||
done += rounds * MD5_HMAC_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->block, data + done, len - done);
|
||||
}
|
||||
|
||||
static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct md5_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < MD5_HMAC_BLOCK_SIZE) {
|
||||
sctx->byte_count += len;
|
||||
memcpy((u8 *)sctx->block + partial, data, len);
|
||||
} else
|
||||
__md5_sparc64_update(sctx, data, len, partial);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct md5_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
u32 *dst = (u32 *)out;
|
||||
__le64 bits;
|
||||
static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
bits = cpu_to_le64(sctx->byte_count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
|
||||
|
||||
/* We need to fill a whole block for __md5_sparc64_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->byte_count += padlen;
|
||||
memcpy((u8 *)sctx->block + index, padding, padlen);
|
||||
} else {
|
||||
__md5_sparc64_update(sctx, padding, padlen, index);
|
||||
}
|
||||
__md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < MD5_HASH_WORDS; i++)
|
||||
dst[i] = sctx->hash[i];
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int md5_sparc64_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct md5_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int md5_sparc64_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct md5_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = MD5_DIGEST_SIZE,
|
||||
.init = md5_sparc64_init,
|
||||
.update = md5_sparc64_update,
|
||||
.final = md5_sparc64_final,
|
||||
.export = md5_sparc64_export,
|
||||
.import = md5_sparc64_import,
|
||||
.descsize = sizeof(struct md5_state),
|
||||
.statesize = sizeof(struct md5_state),
|
||||
.base = {
|
||||
.cra_name = "md5",
|
||||
.cra_driver_name= "md5-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_md5_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_MD5))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init md5_sparc64_mod_init(void)
|
||||
{
|
||||
if (sparc64_has_md5_opcode()) {
|
||||
pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
pr_info("sparc64 md5 opcode not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit md5_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(md5_sparc64_mod_init);
|
||||
module_exit(md5_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("md5");
|
99
arch/sparc/crypto/opcodes.h
Normal file
99
arch/sparc/crypto/opcodes.h
Normal file
@@ -0,0 +1,99 @@
|
||||
#ifndef _OPCODES_H
|
||||
#define _OPCODES_H
|
||||
|
||||
#define SPARC_CR_OPCODE_PRIORITY 300
|
||||
|
||||
#define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5))
|
||||
|
||||
#define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20)))
|
||||
|
||||
#define RS1(x) (FPD_ENCODE(x) << 14)
|
||||
#define RS2(x) (FPD_ENCODE(x) << 0)
|
||||
#define RS3(x) (FPD_ENCODE(x) << 9)
|
||||
#define RD(x) (FPD_ENCODE(x) << 25)
|
||||
#define IMM5_0(x) ((x) << 0)
|
||||
#define IMM5_9(x) ((x) << 9)
|
||||
|
||||
#define CRC32C(a,b,c) \
|
||||
.word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c));
|
||||
|
||||
#define MD5 \
|
||||
.word 0x81b02800;
|
||||
#define SHA1 \
|
||||
.word 0x81b02820;
|
||||
#define SHA256 \
|
||||
.word 0x81b02840;
|
||||
#define SHA512 \
|
||||
.word 0x81b02860;
|
||||
|
||||
#define AES_EROUND01(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_EROUND23(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_DROUND01(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_DROUND23(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_EROUND01_L(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_EROUND23_L(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_DROUND01_L(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_DROUND23_L(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define AES_KEXPAND1(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d));
|
||||
#define AES_KEXPAND0(a,b,c) \
|
||||
.word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c));
|
||||
#define AES_KEXPAND2(a,b,c) \
|
||||
.word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
|
||||
|
||||
#define DES_IP(a,b) \
|
||||
.word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
|
||||
#define DES_IIP(a,b) \
|
||||
.word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
|
||||
#define DES_KEXPAND(a,b,c) \
|
||||
.word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
|
||||
#define DES_ROUND(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
|
||||
#define CAMELLIA_F(a,b,c,d) \
|
||||
.word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
|
||||
#define CAMELLIA_FL(a,b,c) \
|
||||
.word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
|
||||
#define CAMELLIA_FLI(a,b,c) \
|
||||
.word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
|
||||
|
||||
#define MOVDTOX_F0_O4 \
|
||||
.word 0x99b02200
|
||||
#define MOVDTOX_F2_O5 \
|
||||
.word 0x9bb02202
|
||||
#define MOVXTOD_G1_F60 \
|
||||
.word 0xbbb02301
|
||||
#define MOVXTOD_G1_F62 \
|
||||
.word 0xbfb02301
|
||||
#define MOVXTOD_G3_F4 \
|
||||
.word 0x89b02303;
|
||||
#define MOVXTOD_G7_F6 \
|
||||
.word 0x8db02307;
|
||||
#define MOVXTOD_G3_F0 \
|
||||
.word 0x81b02303;
|
||||
#define MOVXTOD_G7_F2 \
|
||||
.word 0x85b02307;
|
||||
#define MOVXTOD_O0_F0 \
|
||||
.word 0x81b02308;
|
||||
#define MOVXTOD_O5_F0 \
|
||||
.word 0x81b0230d;
|
||||
#define MOVXTOD_O5_F2 \
|
||||
.word 0x85b0230d;
|
||||
#define MOVXTOD_O5_F4 \
|
||||
.word 0x89b0230d;
|
||||
#define MOVXTOD_O5_F6 \
|
||||
.word 0x8db0230d;
|
||||
#define MOVXTOD_G3_F60 \
|
||||
.word 0xbbb02303;
|
||||
#define MOVXTOD_G7_F62 \
|
||||
.word 0xbfb02307;
|
||||
|
||||
#endif /* _OPCODES_H */
|
72
arch/sparc/crypto/sha1_asm.S
Normal file
72
arch/sparc/crypto/sha1_asm.S
Normal file
@@ -0,0 +1,72 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
ENTRY(sha1_sparc64_transform)
|
||||
/* %o0 = digest, %o1 = data, %o2 = rounds */
|
||||
VISEntryHalf
|
||||
ld [%o0 + 0x00], %f0
|
||||
ld [%o0 + 0x04], %f1
|
||||
ld [%o0 + 0x08], %f2
|
||||
andcc %o1, 0x7, %g0
|
||||
ld [%o0 + 0x0c], %f3
|
||||
bne,pn %xcc, 10f
|
||||
ld [%o0 + 0x10], %f4
|
||||
|
||||
1:
|
||||
ldd [%o1 + 0x00], %f8
|
||||
ldd [%o1 + 0x08], %f10
|
||||
ldd [%o1 + 0x10], %f12
|
||||
ldd [%o1 + 0x18], %f14
|
||||
ldd [%o1 + 0x20], %f16
|
||||
ldd [%o1 + 0x28], %f18
|
||||
ldd [%o1 + 0x30], %f20
|
||||
ldd [%o1 + 0x38], %f22
|
||||
|
||||
SHA1
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
5:
|
||||
st %f0, [%o0 + 0x00]
|
||||
st %f1, [%o0 + 0x04]
|
||||
st %f2, [%o0 + 0x08]
|
||||
st %f3, [%o0 + 0x0c]
|
||||
st %f4, [%o0 + 0x10]
|
||||
retl
|
||||
VISExitHalf
|
||||
10:
|
||||
alignaddr %o1, %g0, %o1
|
||||
|
||||
ldd [%o1 + 0x00], %f10
|
||||
1:
|
||||
ldd [%o1 + 0x08], %f12
|
||||
ldd [%o1 + 0x10], %f14
|
||||
ldd [%o1 + 0x18], %f16
|
||||
ldd [%o1 + 0x20], %f18
|
||||
ldd [%o1 + 0x28], %f20
|
||||
ldd [%o1 + 0x30], %f22
|
||||
ldd [%o1 + 0x38], %f24
|
||||
ldd [%o1 + 0x40], %f26
|
||||
|
||||
faligndata %f10, %f12, %f8
|
||||
faligndata %f12, %f14, %f10
|
||||
faligndata %f14, %f16, %f12
|
||||
faligndata %f16, %f18, %f14
|
||||
faligndata %f18, %f20, %f16
|
||||
faligndata %f20, %f22, %f18
|
||||
faligndata %f22, %f24, %f20
|
||||
faligndata %f24, %f26, %f22
|
||||
|
||||
SHA1
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
fsrc2 %f26, %f10
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
ba,a,pt %xcc, 5b
|
||||
ENDPROC(sha1_sparc64_transform)
|
183
arch/sparc/crypto/sha1_glue.c
Normal file
183
arch/sparc/crypto/sha1_glue.c
Normal file
@@ -0,0 +1,183 @@
|
||||
/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
|
||||
*
|
||||
* Copyright (c) Alan Smithee.
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
||||
* Copyright (c) Mathias Krause <minipli@googlemail.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
|
||||
unsigned int rounds);
|
||||
|
||||
static int sha1_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha1_state){
|
||||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
if (partial) {
|
||||
done = SHA1_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buffer + partial, data, done);
|
||||
sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
|
||||
}
|
||||
if (len - done >= SHA1_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
|
||||
|
||||
sha1_sparc64_transform(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA1_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buffer, data + done, len - done);
|
||||
}
|
||||
|
||||
static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA1_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->buffer + partial, data, len);
|
||||
} else
|
||||
__sha1_sparc64_update(sctx, data, len, partial);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA1_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
|
||||
|
||||
/* We need to fill a whole block for __sha1_sparc64_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->buffer + index, padding, padlen);
|
||||
} else {
|
||||
__sha1_sparc64_update(sctx, padding, padlen, index);
|
||||
}
|
||||
__sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 5; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_sparc64_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.init = sha1_sparc64_init,
|
||||
.update = sha1_sparc64_update,
|
||||
.final = sha1_sparc64_final,
|
||||
.export = sha1_sparc64_export,
|
||||
.import = sha1_sparc64_import,
|
||||
.descsize = sizeof(struct sha1_state),
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_sha1_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_SHA1))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init sha1_sparc64_mod_init(void)
|
||||
{
|
||||
if (sparc64_has_sha1_opcode()) {
|
||||
pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n");
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
pr_info("sparc64 sha1 opcode not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit sha1_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(sha1_sparc64_mod_init);
|
||||
module_exit(sha1_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("sha1");
|
78
arch/sparc/crypto/sha256_asm.S
Normal file
78
arch/sparc/crypto/sha256_asm.S
Normal file
@@ -0,0 +1,78 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
ENTRY(sha256_sparc64_transform)
|
||||
/* %o0 = digest, %o1 = data, %o2 = rounds */
|
||||
VISEntryHalf
|
||||
ld [%o0 + 0x00], %f0
|
||||
ld [%o0 + 0x04], %f1
|
||||
ld [%o0 + 0x08], %f2
|
||||
ld [%o0 + 0x0c], %f3
|
||||
ld [%o0 + 0x10], %f4
|
||||
ld [%o0 + 0x14], %f5
|
||||
andcc %o1, 0x7, %g0
|
||||
ld [%o0 + 0x18], %f6
|
||||
bne,pn %xcc, 10f
|
||||
ld [%o0 + 0x1c], %f7
|
||||
|
||||
1:
|
||||
ldd [%o1 + 0x00], %f8
|
||||
ldd [%o1 + 0x08], %f10
|
||||
ldd [%o1 + 0x10], %f12
|
||||
ldd [%o1 + 0x18], %f14
|
||||
ldd [%o1 + 0x20], %f16
|
||||
ldd [%o1 + 0x28], %f18
|
||||
ldd [%o1 + 0x30], %f20
|
||||
ldd [%o1 + 0x38], %f22
|
||||
|
||||
SHA256
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
5:
|
||||
st %f0, [%o0 + 0x00]
|
||||
st %f1, [%o0 + 0x04]
|
||||
st %f2, [%o0 + 0x08]
|
||||
st %f3, [%o0 + 0x0c]
|
||||
st %f4, [%o0 + 0x10]
|
||||
st %f5, [%o0 + 0x14]
|
||||
st %f6, [%o0 + 0x18]
|
||||
st %f7, [%o0 + 0x1c]
|
||||
retl
|
||||
VISExitHalf
|
||||
10:
|
||||
alignaddr %o1, %g0, %o1
|
||||
|
||||
ldd [%o1 + 0x00], %f10
|
||||
1:
|
||||
ldd [%o1 + 0x08], %f12
|
||||
ldd [%o1 + 0x10], %f14
|
||||
ldd [%o1 + 0x18], %f16
|
||||
ldd [%o1 + 0x20], %f18
|
||||
ldd [%o1 + 0x28], %f20
|
||||
ldd [%o1 + 0x30], %f22
|
||||
ldd [%o1 + 0x38], %f24
|
||||
ldd [%o1 + 0x40], %f26
|
||||
|
||||
faligndata %f10, %f12, %f8
|
||||
faligndata %f12, %f14, %f10
|
||||
faligndata %f14, %f16, %f12
|
||||
faligndata %f16, %f18, %f14
|
||||
faligndata %f18, %f20, %f16
|
||||
faligndata %f20, %f22, %f18
|
||||
faligndata %f22, %f24, %f20
|
||||
faligndata %f24, %f26, %f22
|
||||
|
||||
SHA256
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
fsrc2 %f26, %f10
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x40, %o1
|
||||
|
||||
ba,a,pt %xcc, 5b
|
||||
ENDPROC(sha256_sparc64_transform)
|
241
arch/sparc/crypto/sha256_glue.c
Normal file
241
arch/sparc/crypto/sha256_glue.c
Normal file
@@ -0,0 +1,241 @@
|
||||
/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon crypto/sha256_generic.c
|
||||
*
|
||||
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
|
||||
* SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
|
||||
unsigned int rounds);
|
||||
|
||||
static int sha224_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
sctx->state[0] = SHA224_H0;
|
||||
sctx->state[1] = SHA224_H1;
|
||||
sctx->state[2] = SHA224_H2;
|
||||
sctx->state[3] = SHA224_H3;
|
||||
sctx->state[4] = SHA224_H4;
|
||||
sctx->state[5] = SHA224_H5;
|
||||
sctx->state[6] = SHA224_H6;
|
||||
sctx->state[7] = SHA224_H7;
|
||||
sctx->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
sctx->state[0] = SHA256_H0;
|
||||
sctx->state[1] = SHA256_H1;
|
||||
sctx->state[2] = SHA256_H2;
|
||||
sctx->state[3] = SHA256_H3;
|
||||
sctx->state[4] = SHA256_H4;
|
||||
sctx->state[5] = SHA256_H5;
|
||||
sctx->state[6] = SHA256_H6;
|
||||
sctx->state[7] = SHA256_H7;
|
||||
sctx->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
if (partial) {
|
||||
done = SHA256_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buf + partial, data, done);
|
||||
sha256_sparc64_transform(sctx->state, sctx->buf, 1);
|
||||
}
|
||||
if (len - done >= SHA256_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
|
||||
|
||||
sha256_sparc64_transform(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA256_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buf, data + done, len - done);
|
||||
}
|
||||
|
||||
static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA256_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
} else
|
||||
__sha256_sparc64_update(sctx, data, len, partial);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA256_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
|
||||
|
||||
/* We need to fill a whole block for __sha256_sparc64_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->buf + index, padding, padlen);
|
||||
} else {
|
||||
__sha256_sparc64_update(sctx, padding, padlen, index);
|
||||
}
|
||||
__sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
|
||||
{
|
||||
u8 D[SHA256_DIGEST_SIZE];
|
||||
|
||||
sha256_sparc64_final(desc, D);
|
||||
|
||||
memcpy(hash, D, SHA224_DIGEST_SIZE);
|
||||
memset(D, 0, SHA256_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_sparc64_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg sha256 = {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_sparc64_init,
|
||||
.update = sha256_sparc64_update,
|
||||
.final = sha256_sparc64_final,
|
||||
.export = sha256_sparc64_export,
|
||||
.import = sha256_sparc64_import,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name= "sha256-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static struct shash_alg sha224 = {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_sparc64_init,
|
||||
.update = sha256_sparc64_update,
|
||||
.final = sha224_sparc64_final,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name= "sha224-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_sha256_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_SHA256))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init sha256_sparc64_mod_init(void)
|
||||
{
|
||||
if (sparc64_has_sha256_opcode()) {
|
||||
int ret = crypto_register_shash(&sha224);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = crypto_register_shash(&sha256);
|
||||
if (ret < 0) {
|
||||
crypto_unregister_shash(&sha224);
|
||||
return ret;
|
||||
}
|
||||
|
||||
pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
|
||||
return 0;
|
||||
}
|
||||
pr_info("sparc64 sha256 opcode not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit sha256_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&sha224);
|
||||
crypto_unregister_shash(&sha256);
|
||||
}
|
||||
|
||||
module_init(sha256_sparc64_mod_init);
|
||||
module_exit(sha256_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("sha224");
|
||||
MODULE_ALIAS("sha256");
|
102
arch/sparc/crypto/sha512_asm.S
Normal file
102
arch/sparc/crypto/sha512_asm.S
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/visasm.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
ENTRY(sha512_sparc64_transform)
|
||||
/* %o0 = digest, %o1 = data, %o2 = rounds */
|
||||
VISEntry
|
||||
ldd [%o0 + 0x00], %f0
|
||||
ldd [%o0 + 0x08], %f2
|
||||
ldd [%o0 + 0x10], %f4
|
||||
ldd [%o0 + 0x18], %f6
|
||||
ldd [%o0 + 0x20], %f8
|
||||
ldd [%o0 + 0x28], %f10
|
||||
andcc %o1, 0x7, %g0
|
||||
ldd [%o0 + 0x30], %f12
|
||||
bne,pn %xcc, 10f
|
||||
ldd [%o0 + 0x38], %f14
|
||||
|
||||
1:
|
||||
ldd [%o1 + 0x00], %f16
|
||||
ldd [%o1 + 0x08], %f18
|
||||
ldd [%o1 + 0x10], %f20
|
||||
ldd [%o1 + 0x18], %f22
|
||||
ldd [%o1 + 0x20], %f24
|
||||
ldd [%o1 + 0x28], %f26
|
||||
ldd [%o1 + 0x30], %f28
|
||||
ldd [%o1 + 0x38], %f30
|
||||
ldd [%o1 + 0x40], %f32
|
||||
ldd [%o1 + 0x48], %f34
|
||||
ldd [%o1 + 0x50], %f36
|
||||
ldd [%o1 + 0x58], %f38
|
||||
ldd [%o1 + 0x60], %f40
|
||||
ldd [%o1 + 0x68], %f42
|
||||
ldd [%o1 + 0x70], %f44
|
||||
ldd [%o1 + 0x78], %f46
|
||||
|
||||
SHA512
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x80, %o1
|
||||
|
||||
5:
|
||||
std %f0, [%o0 + 0x00]
|
||||
std %f2, [%o0 + 0x08]
|
||||
std %f4, [%o0 + 0x10]
|
||||
std %f6, [%o0 + 0x18]
|
||||
std %f8, [%o0 + 0x20]
|
||||
std %f10, [%o0 + 0x28]
|
||||
std %f12, [%o0 + 0x30]
|
||||
std %f14, [%o0 + 0x38]
|
||||
retl
|
||||
VISExit
|
||||
10:
|
||||
alignaddr %o1, %g0, %o1
|
||||
|
||||
ldd [%o1 + 0x00], %f18
|
||||
1:
|
||||
ldd [%o1 + 0x08], %f20
|
||||
ldd [%o1 + 0x10], %f22
|
||||
ldd [%o1 + 0x18], %f24
|
||||
ldd [%o1 + 0x20], %f26
|
||||
ldd [%o1 + 0x28], %f28
|
||||
ldd [%o1 + 0x30], %f30
|
||||
ldd [%o1 + 0x38], %f32
|
||||
ldd [%o1 + 0x40], %f34
|
||||
ldd [%o1 + 0x48], %f36
|
||||
ldd [%o1 + 0x50], %f38
|
||||
ldd [%o1 + 0x58], %f40
|
||||
ldd [%o1 + 0x60], %f42
|
||||
ldd [%o1 + 0x68], %f44
|
||||
ldd [%o1 + 0x70], %f46
|
||||
ldd [%o1 + 0x78], %f48
|
||||
ldd [%o1 + 0x80], %f50
|
||||
|
||||
faligndata %f18, %f20, %f16
|
||||
faligndata %f20, %f22, %f18
|
||||
faligndata %f22, %f24, %f20
|
||||
faligndata %f24, %f26, %f22
|
||||
faligndata %f26, %f28, %f24
|
||||
faligndata %f28, %f30, %f26
|
||||
faligndata %f30, %f32, %f28
|
||||
faligndata %f32, %f34, %f30
|
||||
faligndata %f34, %f36, %f32
|
||||
faligndata %f36, %f38, %f34
|
||||
faligndata %f38, %f40, %f36
|
||||
faligndata %f40, %f42, %f38
|
||||
faligndata %f42, %f44, %f40
|
||||
faligndata %f44, %f46, %f42
|
||||
faligndata %f46, %f48, %f44
|
||||
faligndata %f48, %f50, %f46
|
||||
|
||||
SHA512
|
||||
|
||||
subcc %o2, 1, %o2
|
||||
fsrc2 %f50, %f18
|
||||
bne,pt %xcc, 1b
|
||||
add %o1, 0x80, %o1
|
||||
|
||||
ba,a,pt %xcc, 5b
|
||||
ENDPROC(sha512_sparc64_transform)
|
226
arch/sparc/crypto/sha512_glue.c
Normal file
226
arch/sparc/crypto/sha512_glue.c
Normal file
@@ -0,0 +1,226 @@
|
||||
/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes.
|
||||
*
|
||||
* This is based largely upon crypto/sha512_generic.c
|
||||
*
|
||||
* Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
|
||||
#include <asm/pstate.h>
|
||||
#include <asm/elf.h>
|
||||
|
||||
#include "opcodes.h"
|
||||
|
||||
asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
|
||||
unsigned int rounds);
|
||||
|
||||
static int sha512_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
sctx->state[0] = SHA512_H0;
|
||||
sctx->state[1] = SHA512_H1;
|
||||
sctx->state[2] = SHA512_H2;
|
||||
sctx->state[3] = SHA512_H3;
|
||||
sctx->state[4] = SHA512_H4;
|
||||
sctx->state[5] = SHA512_H5;
|
||||
sctx->state[6] = SHA512_H6;
|
||||
sctx->state[7] = SHA512_H7;
|
||||
sctx->count[0] = sctx->count[1] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha384_sparc64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
sctx->state[0] = SHA384_H0;
|
||||
sctx->state[1] = SHA384_H1;
|
||||
sctx->state[2] = SHA384_H2;
|
||||
sctx->state[3] = SHA384_H3;
|
||||
sctx->state[4] = SHA384_H4;
|
||||
sctx->state[5] = SHA384_H5;
|
||||
sctx->state[6] = SHA384_H6;
|
||||
sctx->state[7] = SHA384_H7;
|
||||
sctx->count[0] = sctx->count[1] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
if ((sctx->count[0] += len) < len)
|
||||
sctx->count[1]++;
|
||||
if (partial) {
|
||||
done = SHA512_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buf + partial, data, done);
|
||||
sha512_sparc64_transform(sctx->state, sctx->buf, 1);
|
||||
}
|
||||
if (len - done >= SHA512_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
|
||||
|
||||
sha512_sparc64_transform(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA512_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buf, data + done, len - done);
|
||||
}
|
||||
|
||||
static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA512_BLOCK_SIZE) {
|
||||
if ((sctx->count[0] += len) < len)
|
||||
sctx->count[1]++;
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
} else
|
||||
__sha512_sparc64_update(sctx, data, len, partial);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be64 *dst = (__be64 *)out;
|
||||
__be64 bits[2];
|
||||
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
/* Save number of bits */
|
||||
bits[1] = cpu_to_be64(sctx->count[0] << 3);
|
||||
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
|
||||
|
||||
/* Pad out to 112 mod 128 and append length */
|
||||
index = sctx->count[0] % SHA512_BLOCK_SIZE;
|
||||
padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
|
||||
|
||||
/* We need to fill a whole block for __sha512_sparc64_update() */
|
||||
if (padlen <= 112) {
|
||||
if ((sctx->count[0] += padlen) < padlen)
|
||||
sctx->count[1]++;
|
||||
memcpy(sctx->buf + index, padding, padlen);
|
||||
} else {
|
||||
__sha512_sparc64_update(sctx, padding, padlen, index);
|
||||
}
|
||||
__sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = cpu_to_be64(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
|
||||
{
|
||||
u8 D[64];
|
||||
|
||||
sha512_sparc64_final(desc, D);
|
||||
|
||||
memcpy(hash, D, 48);
|
||||
memset(D, 0, 64);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg sha512 = {
|
||||
.digestsize = SHA512_DIGEST_SIZE,
|
||||
.init = sha512_sparc64_init,
|
||||
.update = sha512_sparc64_update,
|
||||
.final = sha512_sparc64_final,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name= "sha512-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static struct shash_alg sha384 = {
|
||||
.digestsize = SHA384_DIGEST_SIZE,
|
||||
.init = sha384_sparc64_init,
|
||||
.update = sha512_sparc64_update,
|
||||
.final = sha384_sparc64_final,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name= "sha384-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static bool __init sparc64_has_sha512_opcode(void)
|
||||
{
|
||||
unsigned long cfr;
|
||||
|
||||
if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
|
||||
return false;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
if (!(cfr & CFR_SHA512))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __init sha512_sparc64_mod_init(void)
|
||||
{
|
||||
if (sparc64_has_sha512_opcode()) {
|
||||
int ret = crypto_register_shash(&sha384);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = crypto_register_shash(&sha512);
|
||||
if (ret < 0) {
|
||||
crypto_unregister_shash(&sha384);
|
||||
return ret;
|
||||
}
|
||||
|
||||
pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n");
|
||||
return 0;
|
||||
}
|
||||
pr_info("sparc64 sha512 opcode not available.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void __exit sha512_sparc64_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&sha384);
|
||||
crypto_unregister_shash(&sha512);
|
||||
}
|
||||
|
||||
module_init(sha512_sparc64_mod_init);
|
||||
module_exit(sha512_sparc64_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
|
||||
|
||||
MODULE_ALIAS("sha384");
|
||||
MODULE_ALIAS("sha512");
|
@@ -141,7 +141,8 @@
|
||||
/* SpitFire and later extended ASIs. The "(III)" marker designates
|
||||
* UltraSparc-III and later specific ASIs. The "(CMT)" marker designates
|
||||
* Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific
|
||||
* ASIs, "(4V)" designates SUN4V specific ASIs.
|
||||
* ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4
|
||||
* and later ASIs.
|
||||
*/
|
||||
#define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */
|
||||
#define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */
|
||||
@@ -243,6 +244,7 @@
|
||||
#define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/
|
||||
#define ASI_INTR_R 0x7f /* IRQ vector dispatch read */
|
||||
#define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */
|
||||
#define ASI_PIC 0xb0 /* (NG4) PIC registers */
|
||||
#define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */
|
||||
#define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */
|
||||
#define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */
|
||||
|
@@ -86,6 +86,15 @@
|
||||
#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */
|
||||
#define AV_SPARC_ASI_CACHE_SPARING \
|
||||
0x00800000 /* cache sparing ASIs available */
|
||||
#define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */
|
||||
#define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */
|
||||
|
||||
/* Solaris decided to enumerate every single crypto instruction type
|
||||
* in the AT_HWCAP bits. This is wasteful, since if crypto is present,
|
||||
* you still need to look in the CFR register to see if the opcode is
|
||||
* really available. So we simply advertise only "crypto" support.
|
||||
*/
|
||||
#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */
|
||||
|
||||
#define CORE_DUMP_USE_REGSET
|
||||
|
||||
|
@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
|
||||
unsigned long len);
|
||||
#endif
|
||||
|
||||
#define HV_FAST_VT_GET_PERFREG 0x184
|
||||
#define HV_FAST_VT_SET_PERFREG 0x185
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num,
|
||||
unsigned long *reg_val);
|
||||
extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
|
||||
unsigned long reg_val);
|
||||
#endif
|
||||
|
||||
/* Function numbers for HV_CORE_TRAP. */
|
||||
#define HV_CORE_SET_VER 0x00
|
||||
#define HV_CORE_PUTCHAR 0x01
|
||||
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra,
|
||||
#define HV_GRP_NIU 0x0204
|
||||
#define HV_GRP_VF_CPU 0x0205
|
||||
#define HV_GRP_KT_CPU 0x0209
|
||||
#define HV_GRP_VT_CPU 0x020c
|
||||
#define HV_GRP_DIAG 0x0300
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
|
||||
|
||||
extern void mdesc_fill_in_cpu_data(cpumask_t *mask);
|
||||
extern void mdesc_populate_present_mask(cpumask_t *mask);
|
||||
extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask);
|
||||
|
||||
extern void sun4v_mdesc_init(void);
|
||||
|
||||
|
@@ -2,8 +2,13 @@
|
||||
#define __PCR_H
|
||||
|
||||
struct pcr_ops {
|
||||
u64 (*read)(void);
|
||||
void (*write)(u64);
|
||||
u64 (*read_pcr)(unsigned long);
|
||||
void (*write_pcr)(unsigned long, u64);
|
||||
u64 (*read_pic)(unsigned long);
|
||||
void (*write_pic)(unsigned long, u64);
|
||||
u64 (*nmi_picl_value)(unsigned int nmi_hz);
|
||||
u64 pcr_nmi_enable;
|
||||
u64 pcr_nmi_disable;
|
||||
};
|
||||
extern const struct pcr_ops *pcr_ops;
|
||||
|
||||
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void);
|
||||
#define PCR_N2_SL1_SHIFT 27
|
||||
#define PCR_N2_OV1 0x80000000
|
||||
|
||||
extern unsigned int picl_shift;
|
||||
|
||||
/* In order to commonize as much of the implementation as
|
||||
* possible, we use PICH as our counter. Mostly this is
|
||||
* to accommodate Niagara-1 which can only count insn cycles
|
||||
* in PICH.
|
||||
*/
|
||||
static inline u64 picl_value(unsigned int nmi_hz)
|
||||
{
|
||||
u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift);
|
||||
|
||||
return ((u64)((0 - delta) & 0xffffffff)) << 32;
|
||||
}
|
||||
|
||||
extern u64 pcr_enable;
|
||||
#define PCR_N4_OV 0x00000001 /* PIC overflow */
|
||||
#define PCR_N4_TOE 0x00000002 /* Trap On Event */
|
||||
#define PCR_N4_UTRACE 0x00000004 /* Trace user events */
|
||||
#define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */
|
||||
#define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */
|
||||
#define PCR_N4_MASK 0x000007e0 /* Event mask */
|
||||
#define PCR_N4_MASK_SHIFT 5
|
||||
#define PCR_N4_SL 0x0000f800 /* Event Select */
|
||||
#define PCR_N4_SL_SHIFT 11
|
||||
#define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */
|
||||
#define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */
|
||||
#define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */
|
||||
|
||||
extern int pcr_arch_init(void);
|
||||
|
||||
|
@@ -54,11 +54,6 @@ enum perfctr_opcode {
|
||||
PERFCTR_GETPCR
|
||||
};
|
||||
|
||||
/* I don't want the kernel's namespace to be polluted with this
|
||||
* stuff when this file is included. --DaveM
|
||||
*/
|
||||
#ifndef __KERNEL__
|
||||
|
||||
#define PRIV 0x00000001
|
||||
#define SYS 0x00000002
|
||||
#define USR 0x00000004
|
||||
@@ -168,29 +163,4 @@ struct vcounter_struct {
|
||||
unsigned long long vcnt1;
|
||||
};
|
||||
|
||||
#else /* !(__KERNEL__) */
|
||||
|
||||
#ifndef CONFIG_SPARC32
|
||||
|
||||
/* Performance counter register access. */
|
||||
#define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p))
|
||||
#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
|
||||
#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
|
||||
|
||||
/* Blackbird errata workaround. See commentary in
|
||||
* arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
|
||||
* for more information.
|
||||
*/
|
||||
#define write_pic(__p) \
|
||||
__asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
|
||||
" nop\n\t" \
|
||||
".align 64\n" \
|
||||
"99:wr %0, 0x0, %%pic\n\t" \
|
||||
"rd %%pic, %%g0" : : "r" (__p))
|
||||
#define reset_pic() write_pic(0)
|
||||
|
||||
#endif /* !CONFIG_SPARC32 */
|
||||
|
||||
#endif /* !(__KERNEL__) */
|
||||
|
||||
#endif /* !(PERF_COUNTER_API) */
|
||||
|
@@ -88,4 +88,18 @@
|
||||
#define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */
|
||||
#define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/
|
||||
|
||||
/* Compatability Feature Register (%asr26), SPARC-T4 and later */
|
||||
#define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */
|
||||
#define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */
|
||||
#define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */
|
||||
#define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/
|
||||
#define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */
|
||||
#define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */
|
||||
#define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */
|
||||
#define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */
|
||||
#define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */
|
||||
#define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */
|
||||
#define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */
|
||||
#define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */
|
||||
|
||||
#endif /* !(_SPARC64_PSTATE_H) */
|
||||
|
@@ -559,10 +559,10 @@ niagara_tlb_fixup:
|
||||
be,pt %xcc, niagara2_patch
|
||||
nop
|
||||
cmp %g1, SUN4V_CHIP_NIAGARA4
|
||||
be,pt %xcc, niagara2_patch
|
||||
be,pt %xcc, niagara4_patch
|
||||
nop
|
||||
cmp %g1, SUN4V_CHIP_NIAGARA5
|
||||
be,pt %xcc, niagara2_patch
|
||||
be,pt %xcc, niagara4_patch
|
||||
nop
|
||||
|
||||
call generic_patch_copyops
|
||||
@@ -573,6 +573,16 @@ niagara_tlb_fixup:
|
||||
nop
|
||||
|
||||
ba,a,pt %xcc, 80f
|
||||
niagara4_patch:
|
||||
call niagara4_patch_copyops
|
||||
nop
|
||||
call niagara_patch_bzero
|
||||
nop
|
||||
call niagara4_patch_pageops
|
||||
nop
|
||||
|
||||
ba,a,pt %xcc, 80f
|
||||
|
||||
niagara2_patch:
|
||||
call niagara2_patch_copyops
|
||||
nop
|
||||
|
@@ -45,6 +45,7 @@ static struct api_info api_table[] = {
|
||||
{ .group = HV_GRP_NIU, },
|
||||
{ .group = HV_GRP_VF_CPU, },
|
||||
{ .group = HV_GRP_KT_CPU, },
|
||||
{ .group = HV_GRP_VT_CPU, },
|
||||
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
|
||||
};
|
||||
|
||||
|
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)
|
||||
retl
|
||||
nop
|
||||
ENDPROC(sun4v_reboot_data_set)
|
||||
|
||||
ENTRY(sun4v_vt_get_perfreg)
|
||||
mov %o1, %o4
|
||||
mov HV_FAST_VT_GET_PERFREG, %o5
|
||||
ta HV_FAST_TRAP
|
||||
stx %o1, [%o4]
|
||||
retl
|
||||
nop
|
||||
ENDPROC(sun4v_vt_get_perfreg)
|
||||
|
||||
ENTRY(sun4v_vt_set_perfreg)
|
||||
mov HV_FAST_VT_SET_PERFREG, %o5
|
||||
ta HV_FAST_TRAP
|
||||
retl
|
||||
nop
|
||||
ENDPROC(sun4v_vt_set_perfreg)
|
||||
|
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
|
||||
be,pn %xcc, kvmap_dtlb_longpath
|
||||
|
||||
2: sethi %hi(kpte_linear_bitmap), %g2
|
||||
or %g2, %lo(kpte_linear_bitmap), %g2
|
||||
|
||||
/* Get the 256MB physical address index. */
|
||||
sllx %g4, 21, %g5
|
||||
mov 1, %g7
|
||||
or %g2, %lo(kpte_linear_bitmap), %g2
|
||||
srlx %g5, 21 + 28, %g5
|
||||
and %g5, (32 - 1), %g7
|
||||
|
||||
/* Don't try this at home kids... this depends upon srlx
|
||||
* only taking the low 6 bits of the shift count in %g5.
|
||||
*/
|
||||
sllx %g7, %g5, %g7
|
||||
|
||||
/* Divide by 64 to get the offset into the bitmask. */
|
||||
srlx %g5, 6, %g5
|
||||
/* Divide by 32 to get the offset into the bitmask. */
|
||||
srlx %g5, 5, %g5
|
||||
add %g7, %g7, %g7
|
||||
sllx %g5, 3, %g5
|
||||
|
||||
/* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */
|
||||
/* kern_linear_pte_xor[(mask >> shift) & 3)] */
|
||||
ldx [%g2 + %g5], %g2
|
||||
andcc %g2, %g7, %g0
|
||||
srlx %g2, %g7, %g7
|
||||
sethi %hi(kern_linear_pte_xor), %g5
|
||||
and %g7, 3, %g7
|
||||
or %g5, %lo(kern_linear_pte_xor), %g5
|
||||
bne,a,pt %xcc, 1f
|
||||
add %g5, 8, %g5
|
||||
|
||||
1: ldx [%g5], %g2
|
||||
sllx %g7, 3, %g7
|
||||
ldx [%g5 + %g7], %g2
|
||||
|
||||
.globl kvmap_linear_patch
|
||||
kvmap_linear_patch:
|
||||
|
@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
|
||||
return pci_enable_resources(dev, mask);
|
||||
}
|
||||
|
||||
void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
|
||||
{
|
||||
#ifdef CONFIG_PCI_DEBUG
|
||||
printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
|
||||
pci_name(dev));
|
||||
#endif
|
||||
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
|
||||
}
|
||||
|
||||
/* in/out routines taken from pcic.c
|
||||
*
|
||||
* This probably belongs here rather than ioport.c because
|
||||
|
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
|
||||
mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
|
||||
}
|
||||
|
||||
static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
|
||||
{
|
||||
const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
|
||||
unsigned long *pgsz_mask = arg;
|
||||
u64 val;
|
||||
|
||||
val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
|
||||
HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
|
||||
if (pgsz_prop)
|
||||
val = *pgsz_prop;
|
||||
|
||||
if (!*pgsz_mask)
|
||||
*pgsz_mask = val;
|
||||
else
|
||||
*pgsz_mask &= val;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
|
||||
{
|
||||
*pgsz_mask = 0;
|
||||
mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
|
||||
}
|
||||
|
||||
static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
|
||||
{
|
||||
const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
|
||||
|
@@ -22,7 +22,6 @@
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/pcr.h>
|
||||
#include <asm/perfctr.h>
|
||||
|
||||
#include "kstack.h"
|
||||
|
||||
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
|
||||
pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
|
||||
touched = 1;
|
||||
else
|
||||
pcr_ops->write(PCR_PIC_PRIV);
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
|
||||
|
||||
sum = local_cpu_data().irq0_irqs;
|
||||
if (__get_cpu_var(nmi_touch)) {
|
||||
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
|
||||
__this_cpu_write(alert_counter, 0);
|
||||
}
|
||||
if (__get_cpu_var(wd_enabled)) {
|
||||
write_pic(picl_value(nmi_hz));
|
||||
pcr_ops->write(pcr_enable);
|
||||
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
|
||||
}
|
||||
|
||||
restore_hardirq_stack(orig_sp);
|
||||
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
|
||||
|
||||
void stop_nmi_watchdog(void *unused)
|
||||
{
|
||||
pcr_ops->write(PCR_PIC_PRIV);
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)
|
||||
__get_cpu_var(wd_enabled) = 1;
|
||||
atomic_inc(&nmi_active);
|
||||
|
||||
pcr_ops->write(PCR_PIC_PRIV);
|
||||
write_pic(picl_value(nmi_hz));
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
|
||||
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
|
||||
|
||||
pcr_ops->write(pcr_enable);
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
|
||||
}
|
||||
|
||||
static void nmi_adjust_hz_one(void *unused)
|
||||
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)
|
||||
if (!__get_cpu_var(wd_enabled))
|
||||
return;
|
||||
|
||||
pcr_ops->write(PCR_PIC_PRIV);
|
||||
write_pic(picl_value(nmi_hz));
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
|
||||
pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
|
||||
|
||||
pcr_ops->write(pcr_enable);
|
||||
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
|
||||
}
|
||||
|
||||
void nmi_adjust_hz(unsigned int new_hz)
|
||||
|
@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
|
||||
{
|
||||
}
|
||||
|
||||
void pcibios_update_irq(struct pci_dev *pdev, int irq)
|
||||
{
|
||||
}
|
||||
|
||||
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
|
||||
resource_size_t size, resource_size_t align)
|
||||
{
|
||||
|
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
|
||||
printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
|
||||
vdma[0], vdma[1]);
|
||||
return -EINVAL;
|
||||
};
|
||||
}
|
||||
|
||||
dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
|
||||
num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
|
||||
|
@@ -13,23 +13,14 @@
|
||||
#include <asm/pil.h>
|
||||
#include <asm/pcr.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/asi.h>
|
||||
#include <asm/spitfire.h>
|
||||
#include <asm/perfctr.h>
|
||||
|
||||
/* This code is shared between various users of the performance
|
||||
* counters. Users will be oprofile, pseudo-NMI watchdog, and the
|
||||
* perf_event support layer.
|
||||
*/
|
||||
|
||||
#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
|
||||
#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \
|
||||
PCR_N2_TOE_OV1 | \
|
||||
(2 << PCR_N2_SL1_SHIFT) | \
|
||||
(0xff << PCR_N2_MASK1_SHIFT))
|
||||
|
||||
u64 pcr_enable;
|
||||
unsigned int picl_shift;
|
||||
|
||||
/* Performance counter interrupts run unmasked at PIL level 15.
|
||||
* Therefore we can't do things like wakeups and other work
|
||||
* that expects IRQ disabling to be adhered to in locking etc.
|
||||
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void)
|
||||
const struct pcr_ops *pcr_ops;
|
||||
EXPORT_SYMBOL_GPL(pcr_ops);
|
||||
|
||||
static u64 direct_pcr_read(void)
|
||||
static u64 direct_pcr_read(unsigned long reg_num)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
read_pcr(val);
|
||||
WARN_ON_ONCE(reg_num != 0);
|
||||
__asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void direct_pcr_write(u64 val)
|
||||
static void direct_pcr_write(unsigned long reg_num, u64 val)
|
||||
{
|
||||
write_pcr(val);
|
||||
WARN_ON_ONCE(reg_num != 0);
|
||||
__asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
|
||||
}
|
||||
|
||||
static u64 direct_pic_read(unsigned long reg_num)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
WARN_ON_ONCE(reg_num != 0);
|
||||
__asm__ __volatile__("rd %%pic, %0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void direct_pic_write(unsigned long reg_num, u64 val)
|
||||
{
|
||||
WARN_ON_ONCE(reg_num != 0);
|
||||
|
||||
/* Blackbird errata workaround. See commentary in
|
||||
* arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
|
||||
* for more information.
|
||||
*/
|
||||
__asm__ __volatile__("ba,pt %%xcc, 99f\n\t"
|
||||
" nop\n\t"
|
||||
".align 64\n"
|
||||
"99:wr %0, 0x0, %%pic\n\t"
|
||||
"rd %%pic, %%g0" : : "r" (val));
|
||||
}
|
||||
|
||||
static u64 direct_picl_value(unsigned int nmi_hz)
|
||||
{
|
||||
u32 delta = local_cpu_data().clock_tick / nmi_hz;
|
||||
|
||||
return ((u64)((0 - delta) & 0xffffffff)) << 32;
|
||||
}
|
||||
|
||||
static const struct pcr_ops direct_pcr_ops = {
|
||||
.read = direct_pcr_read,
|
||||
.write = direct_pcr_write,
|
||||
.read_pcr = direct_pcr_read,
|
||||
.write_pcr = direct_pcr_write,
|
||||
.read_pic = direct_pic_read,
|
||||
.write_pic = direct_pic_write,
|
||||
.nmi_picl_value = direct_picl_value,
|
||||
.pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
|
||||
.pcr_nmi_disable = PCR_PIC_PRIV,
|
||||
};
|
||||
|
||||
static void n2_pcr_write(u64 val)
|
||||
static void n2_pcr_write(unsigned long reg_num, u64 val)
|
||||
{
|
||||
unsigned long ret;
|
||||
|
||||
WARN_ON_ONCE(reg_num != 0);
|
||||
if (val & PCR_N2_HTRACE) {
|
||||
ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
|
||||
if (ret != HV_EOK)
|
||||
write_pcr(val);
|
||||
direct_pcr_write(reg_num, val);
|
||||
} else
|
||||
write_pcr(val);
|
||||
direct_pcr_write(reg_num, val);
|
||||
}
|
||||
|
||||
static u64 n2_picl_value(unsigned int nmi_hz)
|
||||
{
|
||||
u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
|
||||
|
||||
return ((u64)((0 - delta) & 0xffffffff)) << 32;
|
||||
}
|
||||
|
||||
static const struct pcr_ops n2_pcr_ops = {
|
||||
.read = direct_pcr_read,
|
||||
.write = n2_pcr_write,
|
||||
.read_pcr = direct_pcr_read,
|
||||
.write_pcr = n2_pcr_write,
|
||||
.read_pic = direct_pic_read,
|
||||
.write_pic = direct_pic_write,
|
||||
.nmi_picl_value = n2_picl_value,
|
||||
.pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
|
||||
PCR_N2_TOE_OV1 |
|
||||
(2 << PCR_N2_SL1_SHIFT) |
|
||||
(0xff << PCR_N2_MASK1_SHIFT)),
|
||||
.pcr_nmi_disable = PCR_PIC_PRIV,
|
||||
};
|
||||
|
||||
static u64 n4_pcr_read(unsigned long reg_num)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
(void) sun4v_vt_get_perfreg(reg_num, &val);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static void n4_pcr_write(unsigned long reg_num, u64 val)
|
||||
{
|
||||
(void) sun4v_vt_set_perfreg(reg_num, val);
|
||||
}
|
||||
|
||||
static u64 n4_pic_read(unsigned long reg_num)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
__asm__ __volatile__("ldxa [%1] %2, %0"
|
||||
: "=r" (val)
|
||||
: "r" (reg_num * 0x8UL), "i" (ASI_PIC));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static void n4_pic_write(unsigned long reg_num, u64 val)
|
||||
{
|
||||
__asm__ __volatile__("stxa %0, [%1] %2"
|
||||
: /* no outputs */
|
||||
: "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
|
||||
}
|
||||
|
||||
static u64 n4_picl_value(unsigned int nmi_hz)
|
||||
{
|
||||
u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
|
||||
|
||||
return ((u64)((0 - delta) & 0xffffffff));
|
||||
}
|
||||
|
||||
static const struct pcr_ops n4_pcr_ops = {
|
||||
.read_pcr = n4_pcr_read,
|
||||
.write_pcr = n4_pcr_write,
|
||||
.read_pic = n4_pic_read,
|
||||
.write_pic = n4_pic_write,
|
||||
.nmi_picl_value = n4_picl_value,
|
||||
.pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
|
||||
PCR_N4_UTRACE | PCR_N4_TOE |
|
||||
(26 << PCR_N4_SL_SHIFT)),
|
||||
.pcr_nmi_disable = PCR_N4_PICNPT,
|
||||
};
|
||||
|
||||
static unsigned long perf_hsvc_group;
|
||||
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)
|
||||
perf_hsvc_group = HV_GRP_KT_CPU;
|
||||
break;
|
||||
|
||||
case SUN4V_CHIP_NIAGARA4:
|
||||
perf_hsvc_group = HV_GRP_VT_CPU;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)
|
||||
sun4v_hvapi_unregister(perf_hsvc_group);
|
||||
}
|
||||
|
||||
static int __init setup_sun4v_pcr_ops(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (sun4v_chip_type) {
|
||||
case SUN4V_CHIP_NIAGARA1:
|
||||
case SUN4V_CHIP_NIAGARA2:
|
||||
case SUN4V_CHIP_NIAGARA3:
|
||||
pcr_ops = &n2_pcr_ops;
|
||||
break;
|
||||
|
||||
case SUN4V_CHIP_NIAGARA4:
|
||||
pcr_ops = &n4_pcr_ops;
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = -ENODEV;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __init pcr_arch_init(void)
|
||||
{
|
||||
int err = register_perf_hsvc();
|
||||
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void)
|
||||
|
||||
switch (tlb_type) {
|
||||
case hypervisor:
|
||||
pcr_ops = &n2_pcr_ops;
|
||||
pcr_enable = PCR_N2_ENABLE;
|
||||
picl_shift = 2;
|
||||
err = setup_sun4v_pcr_ops();
|
||||
if (err)
|
||||
goto out_unregister;
|
||||
break;
|
||||
|
||||
case cheetah:
|
||||
case cheetah_plus:
|
||||
pcr_ops = &direct_pcr_ops;
|
||||
pcr_enable = PCR_SUN4U_ENABLE;
|
||||
break;
|
||||
|
||||
case spitfire:
|
||||
|
@@ -25,36 +25,48 @@
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/pcr.h>
|
||||
#include <asm/perfctr.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#include "kernel.h"
|
||||
#include "kstack.h"
|
||||
|
||||
/* Sparc64 chips have two performance counters, 32-bits each, with
|
||||
* overflow interrupts generated on transition from 0xffffffff to 0.
|
||||
* The counters are accessed in one go using a 64-bit register.
|
||||
/* Two classes of sparc64 chips currently exist. All of which have
|
||||
* 32-bit counters which can generate overflow interrupts on the
|
||||
* transition from 0xffffffff to 0.
|
||||
*
|
||||
* Both counters are controlled using a single control register. The
|
||||
* only way to stop all sampling is to clear all of the context (user,
|
||||
* supervisor, hypervisor) sampling enable bits. But these bits apply
|
||||
* to both counters, thus the two counters can't be enabled/disabled
|
||||
* individually.
|
||||
* All chips upto and including SPARC-T3 have two performance
|
||||
* counters. The two 32-bit counters are accessed in one go using a
|
||||
* single 64-bit register.
|
||||
*
|
||||
* The control register has two event fields, one for each of the two
|
||||
* counters. It's thus nearly impossible to have one counter going
|
||||
* while keeping the other one stopped. Therefore it is possible to
|
||||
* get overflow interrupts for counters not currently "in use" and
|
||||
* that condition must be checked in the overflow interrupt handler.
|
||||
* On these older chips both counters are controlled using a single
|
||||
* control register. The only way to stop all sampling is to clear
|
||||
* all of the context (user, supervisor, hypervisor) sampling enable
|
||||
* bits. But these bits apply to both counters, thus the two counters
|
||||
* can't be enabled/disabled individually.
|
||||
*
|
||||
* Furthermore, the control register on these older chips have two
|
||||
* event fields, one for each of the two counters. It's thus nearly
|
||||
* impossible to have one counter going while keeping the other one
|
||||
* stopped. Therefore it is possible to get overflow interrupts for
|
||||
* counters not currently "in use" and that condition must be checked
|
||||
* in the overflow interrupt handler.
|
||||
*
|
||||
* So we use a hack, in that we program inactive counters with the
|
||||
* "sw_count0" and "sw_count1" events. These count how many times
|
||||
* the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
|
||||
* unusual way to encode a NOP and therefore will not trigger in
|
||||
* normal code.
|
||||
*
|
||||
* Starting with SPARC-T4 we have one control register per counter.
|
||||
* And the counters are stored in individual registers. The registers
|
||||
* for the counters are 64-bit but only a 32-bit counter is
|
||||
* implemented. The event selections on SPARC-T4 lack any
|
||||
* restrictions, therefore we can elide all of the complicated
|
||||
* conflict resolution code we have for SPARC-T3 and earlier chips.
|
||||
*/
|
||||
|
||||
#define MAX_HWEVENTS 2
|
||||
#define MAX_HWEVENTS 4
|
||||
#define MAX_PCRS 4
|
||||
#define MAX_PERIOD ((1UL << 32) - 1)
|
||||
|
||||
#define PIC_UPPER_INDEX 0
|
||||
@@ -90,8 +102,8 @@ struct cpu_hw_events {
|
||||
*/
|
||||
int current_idx[MAX_HWEVENTS];
|
||||
|
||||
/* Software copy of %pcr register on this cpu. */
|
||||
u64 pcr;
|
||||
/* Software copy of %pcr register(s) on this cpu. */
|
||||
u64 pcr[MAX_HWEVENTS];
|
||||
|
||||
/* Enabled/disable state. */
|
||||
int enabled;
|
||||
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
|
||||
/* An event map describes the characteristics of a performance
|
||||
* counter event. In particular it gives the encoding as well as
|
||||
* a mask telling which counters the event can be measured on.
|
||||
*
|
||||
* The mask is unused on SPARC-T4 and later.
|
||||
*/
|
||||
struct perf_event_map {
|
||||
u16 encoding;
|
||||
@@ -142,15 +156,53 @@ struct sparc_pmu {
|
||||
const struct perf_event_map *(*event_map)(int);
|
||||
const cache_map_t *cache_map;
|
||||
int max_events;
|
||||
u32 (*read_pmc)(int);
|
||||
void (*write_pmc)(int, u64);
|
||||
int upper_shift;
|
||||
int lower_shift;
|
||||
int event_mask;
|
||||
int user_bit;
|
||||
int priv_bit;
|
||||
int hv_bit;
|
||||
int irq_bit;
|
||||
int upper_nop;
|
||||
int lower_nop;
|
||||
unsigned int flags;
|
||||
#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
|
||||
#define SPARC_PMU_HAS_CONFLICTS 0x00000002
|
||||
int max_hw_events;
|
||||
int num_pcrs;
|
||||
int num_pic_regs;
|
||||
};
|
||||
|
||||
static u32 sparc_default_read_pmc(int idx)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
val = pcr_ops->read_pic(0);
|
||||
if (idx == PIC_UPPER_INDEX)
|
||||
val >>= 32;
|
||||
|
||||
return val & 0xffffffff;
|
||||
}
|
||||
|
||||
static void sparc_default_write_pmc(int idx, u64 val)
|
||||
{
|
||||
u64 shift, mask, pic;
|
||||
|
||||
shift = 0;
|
||||
if (idx == PIC_UPPER_INDEX)
|
||||
shift = 32;
|
||||
|
||||
mask = ((u64) 0xffffffff) << shift;
|
||||
val <<= shift;
|
||||
|
||||
pic = pcr_ops->read_pic(0);
|
||||
pic &= ~mask;
|
||||
pic |= val;
|
||||
pcr_ops->write_pic(0, pic);
|
||||
}
|
||||
|
||||
static const struct perf_event_map ultra3_perfmon_event_map[] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
|
||||
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {
|
||||
.event_map = ultra3_event_map,
|
||||
.cache_map = &ultra3_cache_map,
|
||||
.max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
|
||||
.read_pmc = sparc_default_read_pmc,
|
||||
.write_pmc = sparc_default_write_pmc,
|
||||
.upper_shift = 11,
|
||||
.lower_shift = 4,
|
||||
.event_mask = 0x3f,
|
||||
.user_bit = PCR_UTRACE,
|
||||
.priv_bit = PCR_STRACE,
|
||||
.upper_nop = 0x1c,
|
||||
.lower_nop = 0x14,
|
||||
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
|
||||
SPARC_PMU_HAS_CONFLICTS),
|
||||
.max_hw_events = 2,
|
||||
.num_pcrs = 1,
|
||||
.num_pic_regs = 1,
|
||||
};
|
||||
|
||||
/* Niagara1 is very limited. The upper PIC is hard-locked to count
|
||||
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {
|
||||
.event_map = niagara1_event_map,
|
||||
.cache_map = &niagara1_cache_map,
|
||||
.max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
|
||||
.read_pmc = sparc_default_read_pmc,
|
||||
.write_pmc = sparc_default_write_pmc,
|
||||
.upper_shift = 0,
|
||||
.lower_shift = 4,
|
||||
.event_mask = 0x7,
|
||||
.user_bit = PCR_UTRACE,
|
||||
.priv_bit = PCR_STRACE,
|
||||
.upper_nop = 0x0,
|
||||
.lower_nop = 0x0,
|
||||
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
|
||||
SPARC_PMU_HAS_CONFLICTS),
|
||||
.max_hw_events = 2,
|
||||
.num_pcrs = 1,
|
||||
.num_pic_regs = 1,
|
||||
};
|
||||
|
||||
static const struct perf_event_map niagara2_perfmon_event_map[] = {
|
||||
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {
|
||||
.event_map = niagara2_event_map,
|
||||
.cache_map = &niagara2_cache_map,
|
||||
.max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
|
||||
.read_pmc = sparc_default_read_pmc,
|
||||
.write_pmc = sparc_default_write_pmc,
|
||||
.upper_shift = 19,
|
||||
.lower_shift = 6,
|
||||
.event_mask = 0xfff,
|
||||
.hv_bit = 0x8,
|
||||
.user_bit = PCR_UTRACE,
|
||||
.priv_bit = PCR_STRACE,
|
||||
.hv_bit = PCR_N2_HTRACE,
|
||||
.irq_bit = 0x30,
|
||||
.upper_nop = 0x220,
|
||||
.lower_nop = 0x220,
|
||||
.flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
|
||||
SPARC_PMU_HAS_CONFLICTS),
|
||||
.max_hw_events = 2,
|
||||
.num_pcrs = 1,
|
||||
.num_pic_regs = 1,
|
||||
};
|
||||
|
||||
static const struct perf_event_map niagara4_perfmon_event_map[] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
|
||||
};
|
||||
|
||||
static const struct perf_event_map *niagara4_event_map(int event_id)
|
||||
{
|
||||
return &niagara4_perfmon_event_map[event_id];
|
||||
}
|
||||
|
||||
static const cache_map_t niagara4_cache_map = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
|
||||
[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
|
||||
[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
|
||||
[C(RESULT_MISS)] = { (11 << 6) | 0x03 },
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
|
||||
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
|
||||
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS)] = { (17 << 6) | 0x3f },
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS)] = { (6 << 6) | 0x3f },
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
[C(NODE)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
|
||||
[C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
[ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static u32 sparc_vt_read_pmc(int idx)
|
||||
{
|
||||
u64 val = pcr_ops->read_pic(idx);
|
||||
|
||||
return val & 0xffffffff;
|
||||
}
|
||||
|
||||
static void sparc_vt_write_pmc(int idx, u64 val)
|
||||
{
|
||||
u64 pcr;
|
||||
|
||||
/* There seems to be an internal latch on the overflow event
|
||||
* on SPARC-T4 that prevents it from triggering unless you
|
||||
* update the PIC exactly as we do here. The requirement
|
||||
* seems to be that you have to turn off event counting in the
|
||||
* PCR around the PIC update.
|
||||
*
|
||||
* For example, after the following sequence:
|
||||
*
|
||||
* 1) set PIC to -1
|
||||
* 2) enable event counting and overflow reporting in PCR
|
||||
* 3) overflow triggers, softint 15 handler invoked
|
||||
* 4) clear OV bit in PCR
|
||||
* 5) write PIC to -1
|
||||
*
|
||||
* a subsequent overflow event will not trigger. This
|
||||
* sequence works on SPARC-T3 and previous chips.
|
||||
*/
|
||||
pcr = pcr_ops->read_pcr(idx);
|
||||
pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
|
||||
|
||||
pcr_ops->write_pic(idx, val & 0xffffffff);
|
||||
|
||||
pcr_ops->write_pcr(idx, pcr);
|
||||
}
|
||||
|
||||
static const struct sparc_pmu niagara4_pmu = {
|
||||
.event_map = niagara4_event_map,
|
||||
.cache_map = &niagara4_cache_map,
|
||||
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
|
||||
.read_pmc = sparc_vt_read_pmc,
|
||||
.write_pmc = sparc_vt_write_pmc,
|
||||
.upper_shift = 5,
|
||||
.lower_shift = 5,
|
||||
.event_mask = 0x7ff,
|
||||
.user_bit = PCR_N4_UTRACE,
|
||||
.priv_bit = PCR_N4_STRACE,
|
||||
|
||||
/* We explicitly don't support hypervisor tracing. The T4
|
||||
* generates the overflow event for precise events via a trap
|
||||
* which will not be generated (ie. it's completely lost) if
|
||||
* we happen to be in the hypervisor when the event triggers.
|
||||
* Essentially, the overflow event reporting is completely
|
||||
* unusable when you have hypervisor mode tracing enabled.
|
||||
*/
|
||||
.hv_bit = 0,
|
||||
|
||||
.irq_bit = PCR_N4_TOE,
|
||||
.upper_nop = 0,
|
||||
.lower_nop = 0,
|
||||
.flags = 0,
|
||||
.max_hw_events = 4,
|
||||
.num_pcrs = 4,
|
||||
.num_pic_regs = 4,
|
||||
};
|
||||
|
||||
static const struct sparc_pmu *sparc_pmu __read_mostly;
|
||||
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)
|
||||
static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
u64 val, mask = mask_for_index(idx);
|
||||
int pcr_index = 0;
|
||||
|
||||
val = cpuc->pcr;
|
||||
if (sparc_pmu->num_pcrs > 1)
|
||||
pcr_index = idx;
|
||||
|
||||
val = cpuc->pcr[pcr_index];
|
||||
val &= ~mask;
|
||||
val |= hwc->config;
|
||||
cpuc->pcr = val;
|
||||
cpuc->pcr[pcr_index] = val;
|
||||
|
||||
pcr_ops->write(cpuc->pcr);
|
||||
pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
|
||||
}
|
||||
|
||||
static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
u64 mask = mask_for_index(idx);
|
||||
u64 nop = nop_for_index(idx);
|
||||
int pcr_index = 0;
|
||||
u64 val;
|
||||
|
||||
val = cpuc->pcr;
|
||||
if (sparc_pmu->num_pcrs > 1)
|
||||
pcr_index = idx;
|
||||
|
||||
val = cpuc->pcr[pcr_index];
|
||||
val &= ~mask;
|
||||
val |= nop;
|
||||
cpuc->pcr = val;
|
||||
cpuc->pcr[pcr_index] = val;
|
||||
|
||||
pcr_ops->write(cpuc->pcr);
|
||||
}
|
||||
|
||||
static u32 read_pmc(int idx)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
read_pic(val);
|
||||
if (idx == PIC_UPPER_INDEX)
|
||||
val >>= 32;
|
||||
|
||||
return val & 0xffffffff;
|
||||
}
|
||||
|
||||
static void write_pmc(int idx, u64 val)
|
||||
{
|
||||
u64 shift, mask, pic;
|
||||
|
||||
shift = 0;
|
||||
if (idx == PIC_UPPER_INDEX)
|
||||
shift = 32;
|
||||
|
||||
mask = ((u64) 0xffffffff) << shift;
|
||||
val <<= shift;
|
||||
|
||||
read_pic(pic);
|
||||
pic &= ~mask;
|
||||
pic |= val;
|
||||
write_pic(pic);
|
||||
pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
|
||||
}
|
||||
|
||||
static u64 sparc_perf_event_update(struct perf_event *event,
|
||||
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,
|
||||
|
||||
again:
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
new_raw_count = read_pmc(idx);
|
||||
new_raw_count = sparc_pmu->read_pmc(idx);
|
||||
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count)
|
||||
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,
|
||||
|
||||
local64_set(&hwc->prev_count, (u64)-left);
|
||||
|
||||
write_pmc(idx, (u64)(-left) & 0xffffffff);
|
||||
sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* If performance event entries have been added, move existing
|
||||
* events around (if necessary) and then assign new entries to
|
||||
* counters.
|
||||
*/
|
||||
static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
|
||||
static void read_in_all_counters(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!cpuc->n_added)
|
||||
goto out;
|
||||
|
||||
/* Read in the counters which are moving. */
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
struct perf_event *cp = cpuc->event[i];
|
||||
|
||||
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
|
||||
cpuc->current_idx[i] = PIC_NO_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* On this PMU all PICs are programmed using a single PCR. Calculate
|
||||
* the combined control register value.
|
||||
*
|
||||
* For such chips we require that all of the events have the same
|
||||
* configuration, so just fetch the settings from the first entry.
|
||||
*/
|
||||
static void calculate_single_pcr(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!cpuc->n_added)
|
||||
goto out;
|
||||
|
||||
/* Assign to counters all unassigned events. */
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
|
||||
cpuc->current_idx[i] = idx;
|
||||
|
||||
enc = perf_event_get_enc(cpuc->events[i]);
|
||||
pcr &= ~mask_for_index(idx);
|
||||
cpuc->pcr[0] &= ~mask_for_index(idx);
|
||||
if (hwc->state & PERF_HES_STOPPED)
|
||||
pcr |= nop_for_index(idx);
|
||||
cpuc->pcr[0] |= nop_for_index(idx);
|
||||
else
|
||||
pcr |= event_encoding(enc, idx);
|
||||
cpuc->pcr[0] |= event_encoding(enc, idx);
|
||||
}
|
||||
out:
|
||||
return pcr;
|
||||
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
|
||||
}
|
||||
|
||||
/* On this PMU each PIC has it's own PCR control register. */
|
||||
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!cpuc->n_added)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
struct perf_event *cp = cpuc->event[i];
|
||||
struct hw_perf_event *hwc = &cp->hw;
|
||||
int idx = hwc->idx;
|
||||
u64 enc;
|
||||
|
||||
if (cpuc->current_idx[i] != PIC_NO_INDEX)
|
||||
continue;
|
||||
|
||||
sparc_perf_event_set_period(cp, hwc, idx);
|
||||
cpuc->current_idx[i] = idx;
|
||||
|
||||
enc = perf_event_get_enc(cpuc->events[i]);
|
||||
cpuc->pcr[idx] &= ~mask_for_index(idx);
|
||||
if (hwc->state & PERF_HES_STOPPED)
|
||||
cpuc->pcr[idx] |= nop_for_index(idx);
|
||||
else
|
||||
cpuc->pcr[idx] |= event_encoding(enc, idx);
|
||||
}
|
||||
out:
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
struct perf_event *cp = cpuc->event[i];
|
||||
int idx = cp->hw.idx;
|
||||
|
||||
cpuc->pcr[idx] |= cp->hw.config_base;
|
||||
}
|
||||
}
|
||||
|
||||
/* If performance event entries have been added, move existing events
|
||||
* around (if necessary) and then assign new entries to counters.
|
||||
*/
|
||||
static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
if (cpuc->n_added)
|
||||
read_in_all_counters(cpuc);
|
||||
|
||||
if (sparc_pmu->num_pcrs == 1) {
|
||||
calculate_single_pcr(cpuc);
|
||||
} else {
|
||||
calculate_multiple_pcrs(cpuc);
|
||||
}
|
||||
}
|
||||
|
||||
static void sparc_pmu_enable(struct pmu *pmu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
u64 pcr;
|
||||
int i;
|
||||
|
||||
if (cpuc->enabled)
|
||||
return;
|
||||
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)
|
||||
cpuc->enabled = 1;
|
||||
barrier();
|
||||
|
||||
pcr = cpuc->pcr;
|
||||
if (!cpuc->n_events) {
|
||||
pcr = 0;
|
||||
} else {
|
||||
pcr = maybe_change_configuration(cpuc, pcr);
|
||||
if (cpuc->n_events)
|
||||
update_pcrs_for_enable(cpuc);
|
||||
|
||||
/* We require that all of the events have the same
|
||||
* configuration, so just fetch the settings from the
|
||||
* first entry.
|
||||
*/
|
||||
cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
|
||||
}
|
||||
|
||||
pcr_ops->write(cpuc->pcr);
|
||||
for (i = 0; i < sparc_pmu->num_pcrs; i++)
|
||||
pcr_ops->write_pcr(i, cpuc->pcr[i]);
|
||||
}
|
||||
|
||||
static void sparc_pmu_disable(struct pmu *pmu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
u64 val;
|
||||
int i;
|
||||
|
||||
if (!cpuc->enabled)
|
||||
return;
|
||||
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)
|
||||
cpuc->enabled = 0;
|
||||
cpuc->n_added = 0;
|
||||
|
||||
val = cpuc->pcr;
|
||||
val &= ~(PCR_UTRACE | PCR_STRACE |
|
||||
sparc_pmu->hv_bit | sparc_pmu->irq_bit);
|
||||
cpuc->pcr = val;
|
||||
for (i = 0; i < sparc_pmu->num_pcrs; i++) {
|
||||
u64 val = cpuc->pcr[i];
|
||||
|
||||
pcr_ops->write(cpuc->pcr);
|
||||
val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
|
||||
sparc_pmu->hv_bit | sparc_pmu->irq_bit);
|
||||
cpuc->pcr[i] = val;
|
||||
pcr_ops->write_pcr(i, cpuc->pcr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static int active_event_index(struct cpu_hw_events *cpuc,
|
||||
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);
|
||||
static void perf_stop_nmi_watchdog(void *unused)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
int i;
|
||||
|
||||
stop_nmi_watchdog(NULL);
|
||||
cpuc->pcr = pcr_ops->read();
|
||||
for (i = 0; i < sparc_pmu->num_pcrs; i++)
|
||||
cpuc->pcr[i] = pcr_ops->read_pcr(i);
|
||||
}
|
||||
|
||||
void perf_event_grab_pmc(void)
|
||||
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,
|
||||
if (!n_ev)
|
||||
return 0;
|
||||
|
||||
if (n_ev > MAX_HWEVENTS)
|
||||
if (n_ev > sparc_pmu->max_hw_events)
|
||||
return -1;
|
||||
|
||||
if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n_ev; i++)
|
||||
evts[i]->hw.idx = i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
msk0 = perf_event_get_msk(events[0]);
|
||||
if (n_ev == 1) {
|
||||
if (msk0 & PIC_LOWER)
|
||||
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
|
||||
struct perf_event *event;
|
||||
int i, n, first;
|
||||
|
||||
if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
|
||||
return 0;
|
||||
|
||||
n = n_prev + n_new;
|
||||
if (n <= 1)
|
||||
return 0;
|
||||
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
n0 = cpuc->n_events;
|
||||
if (n0 >= MAX_HWEVENTS)
|
||||
if (n0 >= sparc_pmu->max_hw_events)
|
||||
goto out;
|
||||
|
||||
cpuc->event[n0] = event;
|
||||
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
|
||||
/* We save the enable bits in the config_base. */
|
||||
hwc->config_base = sparc_pmu->irq_bit;
|
||||
if (!attr->exclude_user)
|
||||
hwc->config_base |= PCR_UTRACE;
|
||||
hwc->config_base |= sparc_pmu->user_bit;
|
||||
if (!attr->exclude_kernel)
|
||||
hwc->config_base |= PCR_STRACE;
|
||||
hwc->config_base |= sparc_pmu->priv_bit;
|
||||
if (!attr->exclude_hv)
|
||||
hwc->config_base |= sparc_pmu->hv_bit;
|
||||
|
||||
n = 0;
|
||||
if (event->group_leader != event) {
|
||||
n = collect_events(event->group_leader,
|
||||
MAX_HWEVENTS - 1,
|
||||
sparc_pmu->max_hw_events - 1,
|
||||
evts, events, current_idx_dmy);
|
||||
if (n < 0)
|
||||
return -EINVAL;
|
||||
@@ -1254,8 +1557,7 @@ static struct pmu pmu = {
|
||||
void perf_event_print_debug(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 pcr, pic;
|
||||
int cpu;
|
||||
int cpu, i;
|
||||
|
||||
if (!sparc_pmu)
|
||||
return;
|
||||
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)
|
||||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
pcr = pcr_ops->read();
|
||||
read_pic(pic);
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
|
||||
cpu, pcr, pic);
|
||||
for (i = 0; i < sparc_pmu->num_pcrs; i++)
|
||||
pr_info("CPU#%d: PCR%d[%016llx]\n",
|
||||
cpu, i, pcr_ops->read_pcr(i));
|
||||
for (i = 0; i < sparc_pmu->num_pic_regs; i++)
|
||||
pr_info("CPU#%d: PIC%d[%016llx]\n",
|
||||
cpu, i, pcr_ops->read_pic(i));
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
|
||||
* Do this before we peek at the counters to determine
|
||||
* overflow so we don't lose any events.
|
||||
*/
|
||||
if (sparc_pmu->irq_bit)
|
||||
pcr_ops->write(cpuc->pcr);
|
||||
if (sparc_pmu->irq_bit &&
|
||||
sparc_pmu->num_pcrs == 1)
|
||||
pcr_ops->write_pcr(0, cpuc->pcr[0]);
|
||||
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
struct perf_event *event = cpuc->event[i];
|
||||
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
|
||||
struct hw_perf_event *hwc;
|
||||
u64 val;
|
||||
|
||||
if (sparc_pmu->irq_bit &&
|
||||
sparc_pmu->num_pcrs > 1)
|
||||
pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
|
||||
|
||||
hwc = &event->hw;
|
||||
val = sparc_perf_event_update(event, hwc, idx);
|
||||
if (val & (1ULL << 31))
|
||||
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)
|
||||
sparc_pmu = &niagara2_pmu;
|
||||
return true;
|
||||
}
|
||||
if (!strcmp(sparc_pmu_type, "niagara4")) {
|
||||
sparc_pmu = &niagara4_pmu;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -340,7 +340,12 @@ static const char *hwcaps[] = {
|
||||
*/
|
||||
"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
|
||||
"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
|
||||
"ima", "cspare",
|
||||
"ima", "cspare", "pause", "cbcond",
|
||||
};
|
||||
|
||||
static const char *crypto_hwcaps[] = {
|
||||
"aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
|
||||
"sha512", "mpmul", "montmul", "montsqr", "crc32c",
|
||||
};
|
||||
|
||||
void cpucap_info(struct seq_file *m)
|
||||
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)
|
||||
printed++;
|
||||
}
|
||||
}
|
||||
if (caps & HWCAP_SPARC_CRYPTO) {
|
||||
unsigned long cfr;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
|
||||
unsigned long bit = 1UL << i;
|
||||
if (cfr & bit) {
|
||||
seq_printf(m, "%s%s",
|
||||
printed ? "," : "", crypto_hwcaps[i]);
|
||||
printed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
static void __init report_one_hwcap(int *printed, const char *name)
|
||||
{
|
||||
if ((*printed) == 0)
|
||||
printk(KERN_INFO "CPU CAPS: [");
|
||||
printk(KERN_CONT "%s%s",
|
||||
(*printed) ? "," : "", name);
|
||||
if (++(*printed) == 8) {
|
||||
printk(KERN_CONT "]\n");
|
||||
*printed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init report_crypto_hwcaps(int *printed)
|
||||
{
|
||||
unsigned long cfr;
|
||||
int i;
|
||||
|
||||
__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
|
||||
unsigned long bit = 1UL << i;
|
||||
if (cfr & bit)
|
||||
report_one_hwcap(printed, crypto_hwcaps[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init report_hwcaps(unsigned long caps)
|
||||
{
|
||||
int i, printed = 0;
|
||||
|
||||
printk(KERN_INFO "CPU CAPS: [");
|
||||
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
|
||||
unsigned long bit = 1UL << i;
|
||||
if (caps & bit) {
|
||||
printk(KERN_CONT "%s%s",
|
||||
printed ? "," : "", hwcaps[i]);
|
||||
if (++printed == 8) {
|
||||
printk(KERN_CONT "]\n");
|
||||
printk(KERN_INFO "CPU CAPS: [");
|
||||
printed = 0;
|
||||
}
|
||||
}
|
||||
if (caps & bit)
|
||||
report_one_hwcap(&printed, hwcaps[i]);
|
||||
}
|
||||
printk(KERN_CONT "]\n");
|
||||
if (caps & HWCAP_SPARC_CRYPTO)
|
||||
report_crypto_hwcaps(&printed);
|
||||
if (printed != 0)
|
||||
printk(KERN_CONT "]\n");
|
||||
}
|
||||
|
||||
static unsigned long __init mdesc_cpu_hwcap_list(void)
|
||||
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
|
||||
if (!strcmp(prop, crypto_hwcaps[i]))
|
||||
caps |= HWCAP_SPARC_CRYPTO;
|
||||
}
|
||||
|
||||
plen = strlen(prop) + 1;
|
||||
prop += plen;
|
||||
|
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
|
||||
lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
|
||||
lib-$(CONFIG_SPARC64) += NG2patch.o
|
||||
|
||||
lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
|
||||
lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o
|
||||
|
||||
lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
|
||||
lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
|
||||
|
||||
|
30
arch/sparc/lib/NG4copy_from_user.S
Normal file
30
arch/sparc/lib/NG4copy_from_user.S
Normal file
@@ -0,0 +1,30 @@
|
||||
/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
|
||||
*/
|
||||
|
||||
#define EX_LD(x) \
|
||||
98: x; \
|
||||
.section __ex_table,"a";\
|
||||
.align 4; \
|
||||
.word 98b, __retl_one_asi;\
|
||||
.text; \
|
||||
.align 4;
|
||||
|
||||
#ifndef ASI_AIUS
|
||||
#define ASI_AIUS 0x11
|
||||
#endif
|
||||
|
||||
#define FUNC_NAME NG4copy_from_user
|
||||
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
|
||||
#define EX_RETVAL(x) 0
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define PREAMBLE \
|
||||
rd %asi, %g1; \
|
||||
cmp %g1, ASI_AIUS; \
|
||||
bne,pn %icc, ___copy_in_user; \
|
||||
nop
|
||||
#endif
|
||||
|
||||
#include "NG4memcpy.S"
|
57
arch/sparc/lib/NG4copy_page.S
Normal file
57
arch/sparc/lib/NG4copy_page.S
Normal file
@@ -0,0 +1,57 @@
|
||||
/* NG4copy_page.S: Niagara-4 optimized copy page.
|
||||
*
|
||||
* Copyright (C) 2012 (davem@davemloft.net)
|
||||
*/
|
||||
|
||||
#include <asm/asi.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
.text
|
||||
.align 32
|
||||
|
||||
.register %g2, #scratch
|
||||
.register %g3, #scratch
|
||||
|
||||
.globl NG4copy_user_page
|
||||
NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
|
||||
prefetch [%o1 + 0x000], #n_reads_strong
|
||||
prefetch [%o1 + 0x040], #n_reads_strong
|
||||
prefetch [%o1 + 0x080], #n_reads_strong
|
||||
prefetch [%o1 + 0x0c0], #n_reads_strong
|
||||
set PAGE_SIZE, %g7
|
||||
prefetch [%o1 + 0x100], #n_reads_strong
|
||||
prefetch [%o1 + 0x140], #n_reads_strong
|
||||
prefetch [%o1 + 0x180], #n_reads_strong
|
||||
prefetch [%o1 + 0x1c0], #n_reads_strong
|
||||
1:
|
||||
ldx [%o1 + 0x00], %o2
|
||||
subcc %g7, 0x40, %g7
|
||||
ldx [%o1 + 0x08], %o3
|
||||
ldx [%o1 + 0x10], %o4
|
||||
ldx [%o1 + 0x18], %o5
|
||||
ldx [%o1 + 0x20], %g1
|
||||
stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
ldx [%o1 + 0x28], %g2
|
||||
stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
ldx [%o1 + 0x30], %g3
|
||||
stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
ldx [%o1 + 0x38], %o2
|
||||
add %o1, 0x40, %o1
|
||||
stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P
|
||||
add %o0, 0x08, %o0
|
||||
bne,pt %icc, 1b
|
||||
prefetch [%o1 + 0x200], #n_reads_strong
|
||||
retl
|
||||
membar #StoreLoad | #StoreStore
|
||||
.size NG4copy_user_page,.-NG4copy_user_page
|
39
arch/sparc/lib/NG4copy_to_user.S
Normal file
39
arch/sparc/lib/NG4copy_to_user.S
Normal file
@@ -0,0 +1,39 @@
|
||||
/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
|
||||
*/
|
||||
|
||||
#define EX_ST(x) \
|
||||
98: x; \
|
||||
.section __ex_table,"a";\
|
||||
.align 4; \
|
||||
.word 98b, __retl_one_asi;\
|
||||
.text; \
|
||||
.align 4;
|
||||
|
||||
#ifndef ASI_AIUS
|
||||
#define ASI_AIUS 0x11
|
||||
#endif
|
||||
|
||||
#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
|
||||
#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
|
||||
#endif
|
||||
|
||||
#define FUNC_NAME NG4copy_to_user
|
||||
#define STORE(type,src,addr) type##a src, [addr] %asi
|
||||
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
|
||||
#define EX_RETVAL(x) 0
|
||||
|
||||
#ifdef __KERNEL__
|
||||
/* Writing to %asi is _expensive_ so we hardcode it.
|
||||
* Reading %asi to check for KERNEL_DS is comparatively
|
||||
* cheap.
|
||||
*/
|
||||
#define PREAMBLE \
|
||||
rd %asi, %g1; \
|
||||
cmp %g1, ASI_AIUS; \
|
||||
bne,pn %icc, ___copy_in_user; \
|
||||
nop
|
||||
#endif
|
||||
|
||||
#include "NG4memcpy.S"
|
360
arch/sparc/lib/NG4memcpy.S
Normal file
360
arch/sparc/lib/NG4memcpy.S
Normal file
@@ -0,0 +1,360 @@
|
||||
/* NG4memcpy.S: Niagara-4 optimized memcpy.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller (davem@davemloft.net)
|
||||
*/
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/visasm.h>
|
||||
#include <asm/asi.h>
|
||||
#define GLOBAL_SPARE %g7
|
||||
#else
|
||||
#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
|
||||
#define FPRS_FEF 0x04
|
||||
|
||||
/* On T4 it is very expensive to access ASRs like %fprs and
|
||||
* %asi, avoiding a read or a write can save ~50 cycles.
|
||||
*/
|
||||
#define FPU_ENTER \
|
||||
rd %fprs, %o5; \
|
||||
andcc %o5, FPRS_FEF, %g0; \
|
||||
be,a,pn %icc, 999f; \
|
||||
wr %g0, FPRS_FEF, %fprs; \
|
||||
999:
|
||||
|
||||
#ifdef MEMCPY_DEBUG
|
||||
#define VISEntryHalf FPU_ENTER; \
|
||||
clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
|
||||
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
|
||||
#else
|
||||
#define VISEntryHalf FPU_ENTER
|
||||
#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
|
||||
#endif
|
||||
|
||||
#define GLOBAL_SPARE %g5
|
||||
#endif
|
||||
|
||||
#ifndef STORE_ASI
|
||||
#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
|
||||
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
|
||||
#else
|
||||
#define STORE_ASI 0x80 /* ASI_P */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef EX_LD
|
||||
#define EX_LD(x) x
|
||||
#endif
|
||||
|
||||
#ifndef EX_ST
|
||||
#define EX_ST(x) x
|
||||
#endif
|
||||
|
||||
#ifndef EX_RETVAL
|
||||
#define EX_RETVAL(x) x
|
||||
#endif
|
||||
|
||||
#ifndef LOAD
|
||||
#define LOAD(type,addr,dest) type [addr], dest
|
||||
#endif
|
||||
|
||||
#ifndef STORE
|
||||
#ifndef MEMCPY_DEBUG
|
||||
#define STORE(type,src,addr) type src, [addr]
|
||||
#else
|
||||
#define STORE(type,src,addr) type##a src, [addr] %asi
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef STORE_INIT
|
||||
#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
|
||||
#endif
|
||||
|
||||
#ifndef FUNC_NAME
|
||||
#define FUNC_NAME NG4memcpy
|
||||
#endif
|
||||
#ifndef PREAMBLE
|
||||
#define PREAMBLE
|
||||
#endif
|
||||
|
||||
#ifndef XCC
|
||||
#define XCC xcc
|
||||
#endif
|
||||
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
|
||||
.text
|
||||
.align 64
|
||||
|
||||
.globl FUNC_NAME
|
||||
.type FUNC_NAME,#function
|
||||
FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
|
||||
#ifdef MEMCPY_DEBUG
|
||||
wr %g0, 0x80, %asi
|
||||
#endif
|
||||
srlx %o2, 31, %g2
|
||||
cmp %g2, 0
|
||||
tne %XCC, 5
|
||||
PREAMBLE
|
||||
mov %o0, %o3
|
||||
brz,pn %o2, .Lexit
|
||||
cmp %o2, 3
|
||||
ble,pn %icc, .Ltiny
|
||||
cmp %o2, 19
|
||||
ble,pn %icc, .Lsmall
|
||||
or %o0, %o1, %g2
|
||||
cmp %o2, 128
|
||||
bl,pn %icc, .Lmedium
|
||||
nop
|
||||
|
||||
.Llarge:/* len >= 0x80 */
|
||||
/* First get dest 8 byte aligned. */
|
||||
sub %g0, %o0, %g1
|
||||
and %g1, 0x7, %g1
|
||||
brz,pt %g1, 51f
|
||||
sub %o2, %g1, %o2
|
||||
|
||||
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
|
||||
add %o1, 1, %o1
|
||||
subcc %g1, 1, %g1
|
||||
add %o0, 1, %o0
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stb, %g2, %o0 - 0x01))
|
||||
|
||||
51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
|
||||
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
|
||||
|
||||
/* Check if we can use the straight fully aligned
|
||||
* loop, or we require the alignaddr/faligndata variant.
|
||||
*/
|
||||
andcc %o1, 0x7, %o5
|
||||
bne,pn %icc, .Llarge_src_unaligned
|
||||
sub %g0, %o0, %g1
|
||||
|
||||
/* Legitimize the use of initializing stores by getting dest
|
||||
* to be 64-byte aligned.
|
||||
*/
|
||||
and %g1, 0x3f, %g1
|
||||
brz,pt %g1, .Llarge_aligned
|
||||
sub %o2, %g1, %o2
|
||||
|
||||
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
|
||||
add %o1, 8, %o1
|
||||
subcc %g1, 8, %g1
|
||||
add %o0, 8, %o0
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stx, %g2, %o0 - 0x08))
|
||||
|
||||
.Llarge_aligned:
|
||||
/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
|
||||
andn %o2, 0x3f, %o4
|
||||
sub %o2, %o4, %o2
|
||||
|
||||
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
|
||||
add %o1, 0x40, %o1
|
||||
EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
|
||||
subcc %o4, 0x40, %o4
|
||||
EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
|
||||
EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
|
||||
EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
|
||||
EX_ST(STORE_INIT(%g1, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_ST(STORE_INIT(%g2, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
|
||||
EX_ST(STORE_INIT(%g3, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
|
||||
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
|
||||
EX_ST(STORE_INIT(%o5, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_ST(STORE_INIT(%g2, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_ST(STORE_INIT(%g3, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
|
||||
add %o0, 0x08, %o0
|
||||
bne,pt %icc, 1b
|
||||
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
|
||||
|
||||
membar #StoreLoad | #StoreStore
|
||||
|
||||
brz,pn %o2, .Lexit
|
||||
cmp %o2, 19
|
||||
ble,pn %icc, .Lsmall_unaligned
|
||||
nop
|
||||
ba,a,pt %icc, .Lmedium_noprefetch
|
||||
|
||||
.Lexit: retl
|
||||
mov EX_RETVAL(%o3), %o0
|
||||
|
||||
.Llarge_src_unaligned:
|
||||
andn %o2, 0x3f, %o4
|
||||
sub %o2, %o4, %o2
|
||||
VISEntryHalf
|
||||
alignaddr %o1, %g0, %g1
|
||||
add %o1, %o4, %o1
|
||||
EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
|
||||
1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
|
||||
subcc %o4, 0x40, %o4
|
||||
EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
|
||||
EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
|
||||
EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
|
||||
EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
|
||||
EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
|
||||
EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
|
||||
faligndata %f0, %f2, %f16
|
||||
EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
|
||||
faligndata %f2, %f4, %f18
|
||||
add %g1, 0x40, %g1
|
||||
faligndata %f4, %f6, %f20
|
||||
faligndata %f6, %f8, %f22
|
||||
faligndata %f8, %f10, %f24
|
||||
faligndata %f10, %f12, %f26
|
||||
faligndata %f12, %f14, %f28
|
||||
faligndata %f14, %f0, %f30
|
||||
EX_ST(STORE(std, %f16, %o0 + 0x00))
|
||||
EX_ST(STORE(std, %f18, %o0 + 0x08))
|
||||
EX_ST(STORE(std, %f20, %o0 + 0x10))
|
||||
EX_ST(STORE(std, %f22, %o0 + 0x18))
|
||||
EX_ST(STORE(std, %f24, %o0 + 0x20))
|
||||
EX_ST(STORE(std, %f26, %o0 + 0x28))
|
||||
EX_ST(STORE(std, %f28, %o0 + 0x30))
|
||||
EX_ST(STORE(std, %f30, %o0 + 0x38))
|
||||
add %o0, 0x40, %o0
|
||||
bne,pt %icc, 1b
|
||||
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
|
||||
VISExitHalf
|
||||
|
||||
brz,pn %o2, .Lexit
|
||||
cmp %o2, 19
|
||||
ble,pn %icc, .Lsmall_unaligned
|
||||
nop
|
||||
ba,a,pt %icc, .Lmedium_unaligned
|
||||
|
||||
.Lmedium:
|
||||
LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
|
||||
andcc %g2, 0x7, %g0
|
||||
bne,pn %icc, .Lmedium_unaligned
|
||||
nop
|
||||
.Lmedium_noprefetch:
|
||||
andncc %o2, 0x20 - 1, %o5
|
||||
be,pn %icc, 2f
|
||||
sub %o2, %o5, %o2
|
||||
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
|
||||
EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
|
||||
EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
|
||||
EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
|
||||
add %o1, 0x20, %o1
|
||||
subcc %o5, 0x20, %o5
|
||||
EX_ST(STORE(stx, %g1, %o0 + 0x00))
|
||||
EX_ST(STORE(stx, %g2, %o0 + 0x08))
|
||||
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
|
||||
EX_ST(STORE(stx, %o4, %o0 + 0x18))
|
||||
bne,pt %icc, 1b
|
||||
add %o0, 0x20, %o0
|
||||
2: andcc %o2, 0x18, %o5
|
||||
be,pt %icc, 3f
|
||||
sub %o2, %o5, %o2
|
||||
1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
|
||||
add %o1, 0x08, %o1
|
||||
add %o0, 0x08, %o0
|
||||
subcc %o5, 0x08, %o5
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stx, %g1, %o0 - 0x08))
|
||||
3: brz,pt %o2, .Lexit
|
||||
cmp %o2, 0x04
|
||||
bl,pn %icc, .Ltiny
|
||||
nop
|
||||
EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
|
||||
add %o1, 0x04, %o1
|
||||
add %o0, 0x04, %o0
|
||||
subcc %o2, 0x04, %o2
|
||||
bne,pn %icc, .Ltiny
|
||||
EX_ST(STORE(stw, %g1, %o0 - 0x04))
|
||||
ba,a,pt %icc, .Lexit
|
||||
.Lmedium_unaligned:
|
||||
/* First get dest 8 byte aligned. */
|
||||
sub %g0, %o0, %g1
|
||||
and %g1, 0x7, %g1
|
||||
brz,pt %g1, 2f
|
||||
sub %o2, %g1, %o2
|
||||
|
||||
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
|
||||
add %o1, 1, %o1
|
||||
subcc %g1, 1, %g1
|
||||
add %o0, 1, %o0
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stb, %g2, %o0 - 0x01))
|
||||
2:
|
||||
and %o1, 0x7, %g1
|
||||
brz,pn %g1, .Lmedium_noprefetch
|
||||
sll %g1, 3, %g1
|
||||
mov 64, %g2
|
||||
sub %g2, %g1, %g2
|
||||
andn %o1, 0x7, %o1
|
||||
EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
|
||||
sllx %o4, %g1, %o4
|
||||
andn %o2, 0x08 - 1, %o5
|
||||
sub %o2, %o5, %o2
|
||||
1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
|
||||
add %o1, 0x08, %o1
|
||||
subcc %o5, 0x08, %o5
|
||||
srlx %g3, %g2, GLOBAL_SPARE
|
||||
or GLOBAL_SPARE, %o4, GLOBAL_SPARE
|
||||
EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
|
||||
add %o0, 0x08, %o0
|
||||
bne,pt %icc, 1b
|
||||
sllx %g3, %g1, %o4
|
||||
srl %g1, 3, %g1
|
||||
add %o1, %g1, %o1
|
||||
brz,pn %o2, .Lexit
|
||||
nop
|
||||
ba,pt %icc, .Lsmall_unaligned
|
||||
|
||||
.Ltiny:
|
||||
EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
|
||||
subcc %o2, 1, %o2
|
||||
be,pn %icc, .Lexit
|
||||
EX_ST(STORE(stb, %g1, %o0 + 0x00))
|
||||
EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
|
||||
subcc %o2, 1, %o2
|
||||
be,pn %icc, .Lexit
|
||||
EX_ST(STORE(stb, %g1, %o0 + 0x01))
|
||||
EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
|
||||
ba,pt %icc, .Lexit
|
||||
EX_ST(STORE(stb, %g1, %o0 + 0x02))
|
||||
|
||||
.Lsmall:
|
||||
andcc %g2, 0x3, %g0
|
||||
bne,pn %icc, .Lsmall_unaligned
|
||||
andn %o2, 0x4 - 1, %o5
|
||||
sub %o2, %o5, %o2
|
||||
1:
|
||||
EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
|
||||
add %o1, 0x04, %o1
|
||||
subcc %o5, 0x04, %o5
|
||||
add %o0, 0x04, %o0
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stw, %g1, %o0 - 0x04))
|
||||
brz,pt %o2, .Lexit
|
||||
nop
|
||||
ba,a,pt %icc, .Ltiny
|
||||
|
||||
.Lsmall_unaligned:
|
||||
1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
|
||||
add %o1, 1, %o1
|
||||
add %o0, 1, %o0
|
||||
subcc %o2, 1, %o2
|
||||
bne,pt %icc, 1b
|
||||
EX_ST(STORE(stb, %g1, %o0 - 0x01))
|
||||
ba,a,pt %icc, .Lexit
|
||||
.size FUNC_NAME, .-FUNC_NAME
|
43
arch/sparc/lib/NG4patch.S
Normal file
43
arch/sparc/lib/NG4patch.S
Normal file
@@ -0,0 +1,43 @@
|
||||
/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
|
||||
*
|
||||
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
|
||||
*/
|
||||
|
||||
#define BRANCH_ALWAYS 0x10680000
|
||||
#define NOP 0x01000000
|
||||
#define NG_DO_PATCH(OLD, NEW) \
|
||||
sethi %hi(NEW), %g1; \
|
||||
or %g1, %lo(NEW), %g1; \
|
||||
sethi %hi(OLD), %g2; \
|
||||
or %g2, %lo(OLD), %g2; \
|
||||
sub %g1, %g2, %g1; \
|
||||
sethi %hi(BRANCH_ALWAYS), %g3; \
|
||||
sll %g1, 11, %g1; \
|
||||
srl %g1, 11 + 2, %g1; \
|
||||
or %g3, %lo(BRANCH_ALWAYS), %g3; \
|
||||
or %g3, %g1, %g3; \
|
||||
stw %g3, [%g2]; \
|
||||
sethi %hi(NOP), %g3; \
|
||||
or %g3, %lo(NOP), %g3; \
|
||||
stw %g3, [%g2 + 0x4]; \
|
||||
flush %g2;
|
||||
|
||||
.globl niagara4_patch_copyops
|
||||
.type niagara4_patch_copyops,#function
|
||||
niagara4_patch_copyops:
|
||||
NG_DO_PATCH(memcpy, NG4memcpy)
|
||||
NG_DO_PATCH(___copy_from_user, NG4copy_from_user)
|
||||
NG_DO_PATCH(___copy_to_user, NG4copy_to_user)
|
||||
retl
|
||||
nop
|
||||
.size niagara4_patch_copyops,.-niagara4_patch_copyops
|
||||
|
||||
.globl niagara4_patch_pageops
|
||||
.type niagara4_patch_pageops,#function
|
||||
niagara4_patch_pageops:
|
||||
NG_DO_PATCH(copy_user_page, NG4copy_user_page)
|
||||
NG_DO_PATCH(_clear_page, NGclear_page)
|
||||
NG_DO_PATCH(clear_user_page, NGclear_user_page)
|
||||
retl
|
||||
nop
|
||||
.size niagara4_patch_pageops,.-niagara4_patch_pageops
|
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
|
||||
restore
|
||||
|
||||
.align 32
|
||||
.globl NGclear_page
|
||||
.globl NGclear_user_page
|
||||
NGclear_page: /* %o0=dest */
|
||||
NGclear_user_page: /* %o0=dest, %o1=vaddr */
|
||||
rd %asi, %g3
|
||||
|
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page);
|
||||
void VISenter(void);
|
||||
EXPORT_SYMBOL(VISenter);
|
||||
|
||||
/* CRYPTO code needs this */
|
||||
void VISenterhalf(void);
|
||||
EXPORT_SYMBOL(VISenterhalf);
|
||||
|
||||
extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
|
||||
extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
|
||||
unsigned long *);
|
||||
|
@@ -51,22 +51,40 @@
|
||||
|
||||
#include "init_64.h"
|
||||
|
||||
unsigned long kern_linear_pte_xor[2] __read_mostly;
|
||||
unsigned long kern_linear_pte_xor[4] __read_mostly;
|
||||
|
||||
/* A bitmap, one bit for every 256MB of physical memory. If the bit
|
||||
* is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
|
||||
* if set we should use a 256MB page (via kern_linear_pte_xor[1]).
|
||||
/* A bitmap, two bits for every 256MB of physical memory. These two
|
||||
* bits determine what page size we use for kernel linear
|
||||
* translations. They form an index into kern_linear_pte_xor[]. The
|
||||
* value in the indexed slot is XOR'd with the TLB miss virtual
|
||||
* address to form the resulting TTE. The mapping is:
|
||||
*
|
||||
* 0 ==> 4MB
|
||||
* 1 ==> 256MB
|
||||
* 2 ==> 2GB
|
||||
* 3 ==> 16GB
|
||||
*
|
||||
* All sun4v chips support 256MB pages. Only SPARC-T4 and later
|
||||
* support 2GB pages, and hopefully future cpus will support the 16GB
|
||||
* pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
|
||||
* if these larger page sizes are not supported by the cpu.
|
||||
*
|
||||
* It would be nice to determine this from the machine description
|
||||
* 'cpu' properties, but we need to have this table setup before the
|
||||
* MDESC is initialized.
|
||||
*/
|
||||
unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
|
||||
|
||||
#ifndef CONFIG_DEBUG_PAGEALLOC
|
||||
/* A special kernel TSB for 4MB and 256MB linear mappings.
|
||||
* Space is allocated for this right after the trap table
|
||||
* in arch/sparc64/kernel/head.S
|
||||
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
|
||||
* Space is allocated for this right after the trap table in
|
||||
* arch/sparc64/kernel/head.S
|
||||
*/
|
||||
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
|
||||
#endif
|
||||
|
||||
static unsigned long cpu_pgsz_mask;
|
||||
|
||||
#define MAX_BANKS 32
|
||||
|
||||
static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata;
|
||||
@@ -403,6 +421,12 @@ EXPORT_SYMBOL(flush_icache_range);
|
||||
|
||||
void mmu_info(struct seq_file *m)
|
||||
{
|
||||
static const char *pgsz_strings[] = {
|
||||
"8K", "64K", "512K", "4MB", "32MB",
|
||||
"256MB", "2GB", "16GB",
|
||||
};
|
||||
int i, printed;
|
||||
|
||||
if (tlb_type == cheetah)
|
||||
seq_printf(m, "MMU Type\t: Cheetah\n");
|
||||
else if (tlb_type == cheetah_plus)
|
||||
@@ -414,6 +438,17 @@ void mmu_info(struct seq_file *m)
|
||||
else
|
||||
seq_printf(m, "MMU Type\t: ???\n");
|
||||
|
||||
seq_printf(m, "MMU PGSZs\t: ");
|
||||
printed = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
|
||||
if (cpu_pgsz_mask & (1UL << i)) {
|
||||
seq_printf(m, "%s%s",
|
||||
printed ? "," : "", pgsz_strings[i]);
|
||||
printed++;
|
||||
}
|
||||
}
|
||||
seq_putc(m, '\n');
|
||||
|
||||
#ifdef CONFIG_DEBUG_DCFLUSH
|
||||
seq_printf(m, "DCPageFlushes\t: %d\n",
|
||||
atomic_read(&dcpage_flushes));
|
||||
@@ -1358,32 +1393,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
|
||||
extern unsigned int kvmap_linear_patch[1];
|
||||
#endif /* CONFIG_DEBUG_PAGEALLOC */
|
||||
|
||||
static void __init kpte_set_val(unsigned long index, unsigned long val)
|
||||
{
|
||||
unsigned long *ptr = kpte_linear_bitmap;
|
||||
|
||||
val <<= ((index % (BITS_PER_LONG / 2)) * 2);
|
||||
ptr += (index / (BITS_PER_LONG / 2));
|
||||
|
||||
*ptr |= val;
|
||||
}
|
||||
|
||||
static const unsigned long kpte_shift_min = 28; /* 256MB */
|
||||
static const unsigned long kpte_shift_max = 34; /* 16GB */
|
||||
static const unsigned long kpte_shift_incr = 3;
|
||||
|
||||
static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
|
||||
unsigned long shift)
|
||||
{
|
||||
unsigned long size = (1UL << shift);
|
||||
unsigned long mask = (size - 1UL);
|
||||
unsigned long remains = end - start;
|
||||
unsigned long val;
|
||||
|
||||
if (remains < size || (start & mask))
|
||||
return start;
|
||||
|
||||
/* VAL maps:
|
||||
*
|
||||
* shift 28 --> kern_linear_pte_xor index 1
|
||||
* shift 31 --> kern_linear_pte_xor index 2
|
||||
* shift 34 --> kern_linear_pte_xor index 3
|
||||
*/
|
||||
val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
|
||||
|
||||
remains &= ~mask;
|
||||
if (shift != kpte_shift_max)
|
||||
remains = size;
|
||||
|
||||
while (remains) {
|
||||
unsigned long index = start >> kpte_shift_min;
|
||||
|
||||
kpte_set_val(index, val);
|
||||
|
||||
start += 1UL << kpte_shift_min;
|
||||
remains -= 1UL << kpte_shift_min;
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
|
||||
{
|
||||
const unsigned long shift_256MB = 28;
|
||||
const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
|
||||
const unsigned long size_256MB = (1UL << shift_256MB);
|
||||
unsigned long smallest_size, smallest_mask;
|
||||
unsigned long s;
|
||||
|
||||
smallest_size = (1UL << kpte_shift_min);
|
||||
smallest_mask = (smallest_size - 1UL);
|
||||
|
||||
while (start < end) {
|
||||
long remains;
|
||||
unsigned long orig_start = start;
|
||||
|
||||
remains = end - start;
|
||||
if (remains < size_256MB)
|
||||
break;
|
||||
for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
|
||||
start = kpte_mark_using_shift(start, end, s);
|
||||
|
||||
if (start & mask_256MB) {
|
||||
start = (start + size_256MB) & ~mask_256MB;
|
||||
continue;
|
||||
if (start != orig_start)
|
||||
break;
|
||||
}
|
||||
|
||||
while (remains >= size_256MB) {
|
||||
unsigned long index = start >> shift_256MB;
|
||||
|
||||
__set_bit(index, kpte_linear_bitmap);
|
||||
|
||||
start += size_256MB;
|
||||
remains -= size_256MB;
|
||||
}
|
||||
if (start == orig_start)
|
||||
start = (start + smallest_size) & ~smallest_mask;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1577,13 +1655,16 @@ static void __init sun4v_ktsb_init(void)
|
||||
ktsb_descr[0].resv = 0;
|
||||
|
||||
#ifndef CONFIG_DEBUG_PAGEALLOC
|
||||
/* Second KTSB for 4MB/256MB mappings. */
|
||||
/* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
|
||||
ktsb_pa = (kern_base +
|
||||
((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
|
||||
|
||||
ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
|
||||
ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
|
||||
HV_PGSZ_MASK_256MB);
|
||||
ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
|
||||
HV_PGSZ_MASK_256MB |
|
||||
HV_PGSZ_MASK_2GB |
|
||||
HV_PGSZ_MASK_16GB) &
|
||||
cpu_pgsz_mask);
|
||||
ktsb_descr[1].assoc = 1;
|
||||
ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
|
||||
ktsb_descr[1].ctx_idx = 0;
|
||||
@@ -1606,6 +1687,47 @@ void __cpuinit sun4v_ktsb_register(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init sun4u_linear_pte_xor_finalize(void)
|
||||
{
|
||||
#ifndef CONFIG_DEBUG_PAGEALLOC
|
||||
/* This is where we would add Panther support for
|
||||
* 32MB and 256MB pages.
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init sun4v_linear_pte_xor_finalize(void)
|
||||
{
|
||||
#ifndef CONFIG_DEBUG_PAGEALLOC
|
||||
if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
|
||||
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
|
||||
0xfffff80000000000UL;
|
||||
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
|
||||
_PAGE_P_4V | _PAGE_W_4V);
|
||||
} else {
|
||||
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
|
||||
}
|
||||
|
||||
if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
|
||||
kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
|
||||
0xfffff80000000000UL;
|
||||
kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
|
||||
_PAGE_P_4V | _PAGE_W_4V);
|
||||
} else {
|
||||
kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
|
||||
}
|
||||
|
||||
if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
|
||||
kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
|
||||
0xfffff80000000000UL;
|
||||
kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
|
||||
_PAGE_P_4V | _PAGE_W_4V);
|
||||
} else {
|
||||
kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* paging_init() sets up the page tables */
|
||||
|
||||
static unsigned long last_valid_pfn;
|
||||
@@ -1665,10 +1787,8 @@ void __init paging_init(void)
|
||||
ktsb_phys_patch();
|
||||
}
|
||||
|
||||
if (tlb_type == hypervisor) {
|
||||
if (tlb_type == hypervisor)
|
||||
sun4v_patch_tlb_handlers();
|
||||
sun4v_ktsb_init();
|
||||
}
|
||||
|
||||
/* Find available physical memory...
|
||||
*
|
||||
@@ -1727,9 +1847,6 @@ void __init paging_init(void)
|
||||
|
||||
__flush_tlb_all();
|
||||
|
||||
if (tlb_type == hypervisor)
|
||||
sun4v_ktsb_register();
|
||||
|
||||
prom_build_devicetree();
|
||||
of_populate_present_mask();
|
||||
#ifndef CONFIG_SMP
|
||||
@@ -1742,8 +1859,36 @@ void __init paging_init(void)
|
||||
#ifndef CONFIG_SMP
|
||||
mdesc_fill_in_cpu_data(cpu_all_mask);
|
||||
#endif
|
||||
mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
|
||||
|
||||
sun4v_linear_pte_xor_finalize();
|
||||
|
||||
sun4v_ktsb_init();
|
||||
sun4v_ktsb_register();
|
||||
} else {
|
||||
unsigned long impl, ver;
|
||||
|
||||
cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
|
||||
HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
|
||||
|
||||
__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
|
||||
impl = ((ver >> 32) & 0xffff);
|
||||
if (impl == PANTHER_IMPL)
|
||||
cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
|
||||
HV_PGSZ_MASK_256MB);
|
||||
|
||||
sun4u_linear_pte_xor_finalize();
|
||||
}
|
||||
|
||||
/* Flush the TLBs and the 4M TSB so that the updated linear
|
||||
* pte XOR settings are realized for all mappings.
|
||||
*/
|
||||
__flush_tlb_all();
|
||||
#ifndef CONFIG_DEBUG_PAGEALLOC
|
||||
memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
|
||||
#endif
|
||||
__flush_tlb_all();
|
||||
|
||||
/* Setup bootmem... */
|
||||
last_valid_pfn = end_pfn = bootmem_init(phys_base);
|
||||
|
||||
@@ -2110,6 +2255,7 @@ static void __init sun4u_pgprot_init(void)
|
||||
{
|
||||
unsigned long page_none, page_shared, page_copy, page_readonly;
|
||||
unsigned long page_exec_bit;
|
||||
int i;
|
||||
|
||||
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
|
||||
_PAGE_CACHE_4U | _PAGE_P_4U |
|
||||
@@ -2137,8 +2283,8 @@ static void __init sun4u_pgprot_init(void)
|
||||
kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
|
||||
_PAGE_P_4U | _PAGE_W_4U);
|
||||
|
||||
/* XXX Should use 256MB on Panther. XXX */
|
||||
kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
|
||||
for (i = 1; i < 4; i++)
|
||||
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
|
||||
|
||||
_PAGE_SZBITS = _PAGE_SZBITS_4U;
|
||||
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
|
||||
@@ -2164,6 +2310,7 @@ static void __init sun4v_pgprot_init(void)
|
||||
{
|
||||
unsigned long page_none, page_shared, page_copy, page_readonly;
|
||||
unsigned long page_exec_bit;
|
||||
int i;
|
||||
|
||||
PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
|
||||
_PAGE_CACHE_4V | _PAGE_P_4V |
|
||||
@@ -2185,15 +2332,8 @@ static void __init sun4v_pgprot_init(void)
|
||||
kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
|
||||
_PAGE_P_4V | _PAGE_W_4V);
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
|
||||
0xfffff80000000000UL;
|
||||
#else
|
||||
kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
|
||||
0xfffff80000000000UL;
|
||||
#endif
|
||||
kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
|
||||
_PAGE_P_4V | _PAGE_W_4V);
|
||||
for (i = 1; i < 4; i++)
|
||||
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
|
||||
|
||||
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
|
||||
__ACCESS_BITS_4V | _PAGE_E_4V);
|
||||
|
@@ -8,12 +8,12 @@
|
||||
#define MAX_PHYS_ADDRESS (1UL << 41UL)
|
||||
#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
|
||||
#define KPTE_BITMAP_BYTES \
|
||||
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
|
||||
((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
|
||||
#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
|
||||
#define VALID_ADDR_BITMAP_BYTES \
|
||||
((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
|
||||
|
||||
extern unsigned long kern_linear_pte_xor[2];
|
||||
extern unsigned long kern_linear_pte_xor[4];
|
||||
extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
|
||||
extern unsigned int sparc64_highest_unlocked_tlb_ent;
|
||||
extern unsigned long sparc64_kern_pri_context;
|
||||
|
@@ -464,8 +464,12 @@ void bpf_jit_compile(struct sk_filter *fp)
|
||||
emit_alu_K(OR, K);
|
||||
break;
|
||||
case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
|
||||
case BPF_S_ALU_XOR_X:
|
||||
emit_alu_X(XOR);
|
||||
break;
|
||||
case BPF_S_ALU_XOR_K: /* A ^= K */
|
||||
emit_alu_K(XOR, K);
|
||||
break;
|
||||
case BPF_S_ALU_LSH_X: /* A <<= X */
|
||||
emit_alu_X(SLL);
|
||||
break;
|
||||
|
Reference in New Issue
Block a user