Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Remove VLA usage - Add cryptostat user-space interface - Add notifier for new crypto algorithms Algorithms: - Add OFB mode - Remove speck Drivers: - Remove x86/sha*-mb as they are buggy - Remove pcbc(aes) from x86/aesni - Improve performance of arm/ghash-ce by up to 85% - Implement CTS-CBC in arm64/aes-blk, faster by up to 50% - Remove PMULL based arm64/crc32 driver - Use PMULL in arm64/crct10dif - Add aes-ctr support in s5p-sss - Add caam/qi2 driver Others: - Pick better transform if one becomes available in crc-t10dif" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (124 commits) crypto: chelsio - Update ntx queue received from cxgb4 crypto: ccree - avoid implicit enum conversion crypto: caam - add SPDX license identifier to all files crypto: caam/qi - simplify CGR allocation, freeing crypto: mxs-dcp - make symbols 'sha1_null_hash' and 'sha256_null_hash' static crypto: arm64/aes-blk - ensure XTS mask is always loaded crypto: testmgr - fix sizeof() on COMP_BUF_SIZE crypto: chtls - remove set but not used variable 'csk' crypto: axis - fix platform_no_drv_owner.cocci warnings crypto: x86/aes-ni - fix build error following fpu template removal crypto: arm64/aes - fix handling sub-block CTS-CBC inputs crypto: caam/qi2 - avoid double export crypto: mxs-dcp - Fix AES issues crypto: mxs-dcp - Fix SHA null hashes and output length crypto: mxs-dcp - Implement sha import/export crypto: aegis/generic - fix for big endian systems crypto: morus/generic - fix for big endian systems crypto: lrw - fix rebase error after out of bounds fix crypto: cavium/nitrox - use pci_alloc_irq_vectors() while enabling MSI-X. crypto: cavium/nitrox - NITROX command queue changes. ...
This commit is contained in:
@@ -60,9 +60,6 @@ endif
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
|
||||
endif
|
||||
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
|
||||
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
|
||||
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||
|
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
const u8 *in, unsigned int len, u8 *iv);
|
||||
|
||||
int crypto_fpu_init(void);
|
||||
void crypto_fpu_exit(void);
|
||||
|
||||
#define AVX_GEN2_OPTSIZE 640
|
||||
#define AVX_GEN4_OPTSIZE 4096
|
||||
|
||||
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
|
||||
/* Linearize assoc, if not already linear */
|
||||
if (req->src->length >= assoclen && req->src->length &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length < PAGE_SIZE)) {
|
||||
req->src->offset + req->src->length <= PAGE_SIZE)) {
|
||||
scatterwalk_start(&assoc_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&assoc_sg_walk);
|
||||
} else {
|
||||
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
|
||||
static
|
||||
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
|
||||
|
||||
static struct {
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
const char *basename;
|
||||
struct simd_skcipher_alg *simd;
|
||||
} aesni_simd_skciphers2[] = {
|
||||
#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
|
||||
IS_BUILTIN(CONFIG_CRYPTO_PCBC)
|
||||
{
|
||||
.algname = "pcbc(aes)",
|
||||
.drvname = "pcbc-aes-aesni",
|
||||
.basename = "fpu(pcbc(__aes-aesni))",
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int key_len)
|
||||
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
|
||||
aesni_simd_skciphers[i]; i++)
|
||||
simd_skcipher_free(aesni_simd_skciphers[i]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
|
||||
if (aesni_simd_skciphers2[i].simd)
|
||||
simd_skcipher_free(aesni_simd_skciphers2[i].simd);
|
||||
}
|
||||
|
||||
static int __init aesni_init(void)
|
||||
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
err = crypto_fpu_init();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
if (err)
|
||||
goto fpu_exit;
|
||||
return err;
|
||||
|
||||
err = crypto_register_skciphers(aesni_skciphers,
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
|
||||
aesni_simd_skciphers[i] = simd;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
|
||||
algname = aesni_simd_skciphers2[i].algname;
|
||||
drvname = aesni_simd_skciphers2[i].drvname;
|
||||
basename = aesni_simd_skciphers2[i].basename;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
continue;
|
||||
|
||||
aesni_simd_skciphers2[i].simd = simd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
@@ -1521,8 +1482,6 @@ unregister_skciphers:
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
unregister_algs:
|
||||
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
fpu_exit:
|
||||
crypto_fpu_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
|
||||
crypto_unregister_skciphers(aesni_skciphers,
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
|
||||
crypto_fpu_exit();
|
||||
}
|
||||
|
||||
late_initcall(aesni_init);
|
||||
|
@@ -1,207 +0,0 @@
|
||||
/*
|
||||
* FPU: Wrapper for blkcipher touching fpu
|
||||
*
|
||||
* Copyright (c) Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct crypto_fpu_ctx {
|
||||
struct crypto_skcipher *child;
|
||||
};
|
||||
|
||||
static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(child, key, keylen);
|
||||
crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, 0, NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
req->iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_skcipher_encrypt(subreq);
|
||||
kernel_fpu_end();
|
||||
|
||||
skcipher_request_zero(subreq);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, 0, NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
req->iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_skcipher_decrypt(subreq);
|
||||
kernel_fpu_end();
|
||||
|
||||
skcipher_request_zero(subreq);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher_spawn *spawn;
|
||||
struct crypto_skcipher *cipher;
|
||||
|
||||
spawn = skcipher_instance_ctx(inst);
|
||||
cipher = crypto_spawn_skcipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(ctx->child);
|
||||
}
|
||||
|
||||
static void crypto_fpu_free(struct skcipher_instance *inst)
|
||||
{
|
||||
crypto_drop_skcipher(skcipher_instance_ctx(inst));
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct crypto_skcipher_spawn *spawn;
|
||||
struct skcipher_instance *inst;
|
||||
struct crypto_attr_type *algt;
|
||||
struct skcipher_alg *alg;
|
||||
const char *cipher_name;
|
||||
int err;
|
||||
|
||||
algt = crypto_get_attr_type(tb);
|
||||
if (IS_ERR(algt))
|
||||
return PTR_ERR(algt);
|
||||
|
||||
if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
|
||||
algt->mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(algt->mask & CRYPTO_ALG_INTERNAL))
|
||||
return -EINVAL;
|
||||
|
||||
cipher_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(cipher_name))
|
||||
return PTR_ERR(cipher_name);
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
|
||||
if (!inst)
|
||||
return -ENOMEM;
|
||||
|
||||
spawn = skcipher_instance_ctx(inst);
|
||||
|
||||
crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
|
||||
err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
alg = crypto_skcipher_spawn_alg(spawn);
|
||||
|
||||
err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
|
||||
&alg->base);
|
||||
if (err)
|
||||
goto out_drop_skcipher;
|
||||
|
||||
inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
|
||||
inst->alg.base.cra_priority = alg->base.cra_priority;
|
||||
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
|
||||
|
||||
inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
|
||||
inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
|
||||
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
|
||||
|
||||
inst->alg.init = crypto_fpu_init_tfm;
|
||||
inst->alg.exit = crypto_fpu_exit_tfm;
|
||||
|
||||
inst->alg.setkey = crypto_fpu_setkey;
|
||||
inst->alg.encrypt = crypto_fpu_encrypt;
|
||||
inst->alg.decrypt = crypto_fpu_decrypt;
|
||||
|
||||
inst->free = crypto_fpu_free;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err)
|
||||
goto out_drop_skcipher;
|
||||
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_drop_skcipher:
|
||||
crypto_drop_skcipher(spawn);
|
||||
out_free_inst:
|
||||
kfree(inst);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_fpu_tmpl = {
|
||||
.name = "fpu",
|
||||
.create = crypto_fpu_create,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
int __init crypto_fpu_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
void crypto_fpu_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_CRYPTO("fpu");
|
@@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
|
||||
sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
|
||||
sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha1_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
|
||||
#ifdef HASH_CTX_DEBUG
|
||||
HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA1_DIGEST_LENGTH 5
|
||||
#define SHA1_LOG2_BLOCK_SIZE 6
|
||||
|
||||
#define SHA1_PADLENGTHFIELD_SIZE 8
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha1_ctx_mgr {
|
||||
struct sha1_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
|
||||
|
||||
struct sha1_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha1 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA1 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA1_DIGEST_WORDS 5
|
||||
|
||||
enum job_sts { STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha1 {
|
||||
u8 *buffer;
|
||||
u32 len;
|
||||
u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
/* SHA1 out-of-order scheduler */
|
||||
|
||||
/* typedef uint32_t sha1_digest_array[5][8]; */
|
||||
|
||||
struct sha1_args_x8 {
|
||||
uint32_t digest[5][8];
|
||||
uint8_t *data_ptr[8];
|
||||
};
|
||||
|
||||
struct sha1_lane_data {
|
||||
struct job_sha1 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha1_mb_mgr {
|
||||
struct sha1_args_x8 args;
|
||||
|
||||
uint32_t lens[8];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha1_lane_data ldata[8];
|
||||
};
|
||||
|
||||
|
||||
#define SHA1_MB_MGR_NUM_LANES_AVX2 8
|
||||
|
||||
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
|
||||
struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
|
||||
struct job_sha1 *job);
|
||||
struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
|
||||
struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
|
||||
|
||||
#endif
|
@@ -1,287 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA1 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
|
||||
#define _SHA1_MB_MGR_DATASTRUCT_ASM_
|
||||
|
||||
## START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
## FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
## END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - %%tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endif
|
||||
.endstruc
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
########################################################################
|
||||
#### Define SHA1 Out Of Order Data Structures
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # SHA1_ARGS_X8
|
||||
### name size align
|
||||
FIELD _digest, 4*5*8, 16 # transposed digest
|
||||
FIELD _data_ptr, 8*8, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA1_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA1_ARGS_X4_align = _STRUCT_ALIGN
|
||||
_SHA1_ARGS_X8_size = _FIELD_OFFSET
|
||||
_SHA1_ARGS_X8_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
|
||||
FIELD _lens, 4*8, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
########################################################################
|
||||
#### Define JOB_SHA1 structure
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA1
|
||||
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 4, 4 # length in bytes
|
||||
FIELD _result_digest, 5*4, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA1_size = _FIELD_OFFSET
|
||||
_JOB_SHA1_align = _STRUCT_ALIGN
|
@@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA1 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
|
||||
.extern sha1_x8_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
# idx must be a register not clobbered by sha1_x8_avx2
|
||||
#define idx %r8
|
||||
#define DWORD_idx %r8d
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
#define tmp2_w %ebx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 %arg1
|
||||
|
||||
#define extra_blocks %arg2
|
||||
#define p %arg2
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha1_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+3, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne four(%rip), idx
|
||||
offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne five(%rip), idx
|
||||
offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne six(%rip), idx
|
||||
offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne seven(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 8
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 4*I)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens+0*16(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _lens+0*16(state)
|
||||
vmovdqu %xmm1, _lens+1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha1_x8_avx2
|
||||
# state and idx are intact
|
||||
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state , idx, 4) , %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), tmp2_w
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
mov tmp2_w, offset(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha1_mb_mgr_flush_avx2)
|
||||
|
||||
|
||||
#################################################################
|
||||
|
||||
.align 16
|
||||
ENTRY(sha1_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
## if bit 32+3 is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+3), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
test $~0xF, idx
|
||||
jnz .return_null
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), tmp2_w
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
movl tmp2_w, _result_digest+1*16(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
.quad 2
|
||||
three:
|
||||
.quad 3
|
||||
four:
|
||||
.quad 4
|
||||
five:
|
||||
.quad 5
|
||||
six:
|
||||
.quad 6
|
||||
seven:
|
||||
.quad 7
|
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA1 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha1_mb_mgr.h"
|
||||
|
||||
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
state->unused_lanes = 0xF76543210ULL;
|
||||
for (j = 0; j < 8; j++) {
|
||||
state->lens[j] = 0xFFFFFFFF;
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
||||
}
|
@@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA1 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
|
||||
.extern sha1_x8_avx
|
||||
|
||||
# LINUX register definitions
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
size_offset = %rcx
|
||||
tmp2 = %rcx
|
||||
extra_blocks = %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job %rsi
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_x8_avx2
|
||||
idx = %r8
|
||||
DWORD_idx = %r8d
|
||||
last_len = %r8
|
||||
|
||||
p = %r11
|
||||
start_offset = %r11
|
||||
|
||||
unused_lanes = %rbx
|
||||
BYTE_unused_lanes = %bl
|
||||
|
||||
job_rax = %rax
|
||||
len = %rax
|
||||
DWORD_len = %eax
|
||||
|
||||
lane = %r12
|
||||
tmp3 = %r12
|
||||
|
||||
tmp = %r9
|
||||
DWORD_tmp = %r9d
|
||||
|
||||
lane_data = %r10
|
||||
|
||||
# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha1_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
mov unused_lanes, lane
|
||||
and $0xF, lane
|
||||
shr $4, unused_lanes
|
||||
imul $_LANE_DATA_size, lane, lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
shl $4, len
|
||||
or lane, len
|
||||
|
||||
movl DWORD_len, _lens(state , lane, 4)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest(job), %xmm0
|
||||
mov _result_digest+1*16(job), DWORD_tmp
|
||||
vmovd %xmm0, _args_digest(state, lane, 4)
|
||||
vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
|
||||
vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
|
||||
vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
|
||||
movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
# Find min length
|
||||
vmovdqa _lens(state), %xmm0
|
||||
vmovdqa _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqa %xmm0, _lens + 0*16(state)
|
||||
vmovdqa %xmm1, _lens + 1*16(state)
|
||||
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha1_x8_avx2
|
||||
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), DWORD_tmp
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
movl DWORD_tmp, _result_digest+1*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
|
||||
ENDPROC(sha1_mb_mgr_submit_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
@@ -1,492 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA1 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
## code to compute oct SHA1 using SSE-256
|
||||
## outer calling routine takes care of save and restore of XMM registers
|
||||
|
||||
## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
|
||||
##
|
||||
## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
|
||||
## Linux preserves: rdi rbp r8
|
||||
##
|
||||
## clobbers ymm0-15
|
||||
|
||||
|
||||
# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
# "transpose" data in {r0...r7} using temps {t0...t1}
|
||||
# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
|
||||
# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
|
||||
# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
|
||||
# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
|
||||
#
|
||||
# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
|
||||
# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
|
||||
# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
|
||||
# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
|
||||
# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
|
||||
# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
|
||||
# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
|
||||
# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
|
||||
#
|
||||
|
||||
.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
|
||||
# process top half (r0..r3) {a...d}
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
|
||||
vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
|
||||
vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
|
||||
vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
|
||||
|
||||
# use r2 in place of t0
|
||||
# process bottom half (r4..r7) {e...h}
|
||||
vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
|
||||
vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
|
||||
vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
|
||||
vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
|
||||
vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
|
||||
vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
|
||||
vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
|
||||
vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
|
||||
|
||||
vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
|
||||
vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
|
||||
vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
|
||||
vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
|
||||
vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
|
||||
vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
|
||||
vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
|
||||
vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
|
||||
|
||||
.endm
|
||||
##
|
||||
## Magic functions defined in FIPS 180-1
|
||||
##
|
||||
# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
|
||||
.macro MAGIC_F0 regF regB regC regD regT
|
||||
vpxor \regD, \regC, \regF
|
||||
vpand \regB, \regF, \regF
|
||||
vpxor \regD, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
|
||||
.macro MAGIC_F1 regF regB regC regD regT
|
||||
vpxor \regC, \regD, \regF
|
||||
vpxor \regB, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
|
||||
.macro MAGIC_F2 regF regB regC regD regT
|
||||
vpor \regC, \regB, \regF
|
||||
vpand \regC, \regB, \regT
|
||||
vpand \regD, \regF, \regF
|
||||
vpor \regT, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
|
||||
.macro MAGIC_F3 regF regB regC regD regT
|
||||
MAGIC_F1 \regF,\regB,\regC,\regD,\regT
|
||||
.endm
|
||||
|
||||
# PROLD reg, imm, tmp
|
||||
.macro PROLD reg imm tmp
|
||||
vpsrld $(32-\imm), \reg, \tmp
|
||||
vpslld $\imm, \reg, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro PROLD_nd reg imm tmp src
|
||||
vpsrld $(32-\imm), \src, \tmp
|
||||
vpslld $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
|
||||
vpaddd \immCNT, \regE, \regE
|
||||
vpaddd \memW*32(%rsp), \regE, \regE
|
||||
PROLD_nd \regT, 5, \regF, \regA
|
||||
vpaddd \regT, \regE, \regE
|
||||
\MAGIC \regF, \regB, \regC, \regD, \regT
|
||||
PROLD \regB, 30, \regT
|
||||
vpaddd \regF, \regE, \regE
|
||||
.endm
|
||||
|
||||
.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
|
||||
vpaddd \immCNT, \regE, \regE
|
||||
offset = ((\memW - 14) & 15) * 32
|
||||
vmovdqu offset(%rsp), W14
|
||||
vpxor W14, W16, W16
|
||||
offset = ((\memW - 8) & 15) * 32
|
||||
vpxor offset(%rsp), W16, W16
|
||||
offset = ((\memW - 3) & 15) * 32
|
||||
vpxor offset(%rsp), W16, W16
|
||||
vpsrld $(32-1), W16, \regF
|
||||
vpslld $1, W16, W16
|
||||
vpor W16, \regF, \regF
|
||||
|
||||
ROTATE_W
|
||||
|
||||
offset = ((\memW - 0) & 15) * 32
|
||||
vmovdqu \regF, offset(%rsp)
|
||||
vpaddd \regF, \regE, \regE
|
||||
PROLD_nd \regT, 5, \regF, \regA
|
||||
vpaddd \regT, \regE, \regE
|
||||
\MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
|
||||
PROLD \regB,30, \regT
|
||||
vpaddd \regF, \regE, \regE
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
########################################################################
|
||||
########################################################################
|
||||
|
||||
## FRAMESZ plus pushes must be an odd multiple of 8
|
||||
YMM_SAVE = (15-15)*32
|
||||
FRAMESZ = 32*16 + YMM_SAVE
|
||||
_YMM = FRAMESZ - YMM_SAVE
|
||||
|
||||
#define VMOVPS vmovups
|
||||
|
||||
IDX = %rax
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
inp4 = %r13
|
||||
inp5 = %r14
|
||||
inp6 = %r15
|
||||
inp7 = %rcx
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
RSP_SAVE = %rdx
|
||||
|
||||
# ymm0 A
|
||||
# ymm1 B
|
||||
# ymm2 C
|
||||
# ymm3 D
|
||||
# ymm4 E
|
||||
# ymm5 F AA
|
||||
# ymm6 T0 BB
|
||||
# ymm7 T1 CC
|
||||
# ymm8 T2 DD
|
||||
# ymm9 T3 EE
|
||||
# ymm10 T4 TMP
|
||||
# ymm11 T5 FUN
|
||||
# ymm12 T6 K
|
||||
# ymm13 T7 W14
|
||||
# ymm14 T8 W15
|
||||
# ymm15 T9 W16
|
||||
|
||||
|
||||
A = %ymm0
|
||||
B = %ymm1
|
||||
C = %ymm2
|
||||
D = %ymm3
|
||||
E = %ymm4
|
||||
F = %ymm5
|
||||
T0 = %ymm6
|
||||
T1 = %ymm7
|
||||
T2 = %ymm8
|
||||
T3 = %ymm9
|
||||
T4 = %ymm10
|
||||
T5 = %ymm11
|
||||
T6 = %ymm12
|
||||
T7 = %ymm13
|
||||
T8 = %ymm14
|
||||
T9 = %ymm15
|
||||
|
||||
AA = %ymm5
|
||||
BB = %ymm6
|
||||
CC = %ymm7
|
||||
DD = %ymm8
|
||||
EE = %ymm9
|
||||
TMP = %ymm10
|
||||
FUN = %ymm11
|
||||
K = %ymm12
|
||||
W14 = %ymm13
|
||||
W15 = %ymm14
|
||||
W16 = %ymm15
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = E
|
||||
E = D
|
||||
D = C
|
||||
C = B
|
||||
B = A
|
||||
A = TMP_
|
||||
.endm
|
||||
|
||||
.macro ROTATE_W
|
||||
TMP_ = W16
|
||||
W16 = W15
|
||||
W15 = W14
|
||||
W14 = TMP_
|
||||
.endm
|
||||
|
||||
# 8 streams x 5 32bit words per digest x 4 bytes per word
|
||||
#define DIGEST_SIZE (8*5*4)
|
||||
|
||||
.align 32
|
||||
|
||||
# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
|
||||
# arg 1 : pointer to array[4] of pointer to input data
|
||||
# arg 2 : size (in blocks) ;; assumed to be >= 1
|
||||
#
|
||||
ENTRY(sha1_x8_avx2)
|
||||
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
#save rsp
|
||||
mov %rsp, RSP_SAVE
|
||||
sub $FRAMESZ, %rsp
|
||||
|
||||
#align rsp to 32 Bytes
|
||||
and $~0x1F, %rsp
|
||||
|
||||
## Initialize digests
|
||||
vmovdqu 0*32(arg1), A
|
||||
vmovdqu 1*32(arg1), B
|
||||
vmovdqu 2*32(arg1), C
|
||||
vmovdqu 3*32(arg1), D
|
||||
vmovdqu 4*32(arg1), E
|
||||
|
||||
## transpose input onto stack
|
||||
mov _data_ptr+0*8(arg1),inp0
|
||||
mov _data_ptr+1*8(arg1),inp1
|
||||
mov _data_ptr+2*8(arg1),inp2
|
||||
mov _data_ptr+3*8(arg1),inp3
|
||||
mov _data_ptr+4*8(arg1),inp4
|
||||
mov _data_ptr+5*8(arg1),inp5
|
||||
mov _data_ptr+6*8(arg1),inp6
|
||||
mov _data_ptr+7*8(arg1),inp7
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
|
||||
I=0
|
||||
.rep 2
|
||||
VMOVPS (inp0, IDX), T0
|
||||
VMOVPS (inp1, IDX), T1
|
||||
VMOVPS (inp2, IDX), T2
|
||||
VMOVPS (inp3, IDX), T3
|
||||
VMOVPS (inp4, IDX), T4
|
||||
VMOVPS (inp5, IDX), T5
|
||||
VMOVPS (inp6, IDX), T6
|
||||
VMOVPS (inp7, IDX), T7
|
||||
|
||||
TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
|
||||
vpshufb F, T0, T0
|
||||
vmovdqu T0, (I*8)*32(%rsp)
|
||||
vpshufb F, T1, T1
|
||||
vmovdqu T1, (I*8+1)*32(%rsp)
|
||||
vpshufb F, T2, T2
|
||||
vmovdqu T2, (I*8+2)*32(%rsp)
|
||||
vpshufb F, T3, T3
|
||||
vmovdqu T3, (I*8+3)*32(%rsp)
|
||||
vpshufb F, T4, T4
|
||||
vmovdqu T4, (I*8+4)*32(%rsp)
|
||||
vpshufb F, T5, T5
|
||||
vmovdqu T5, (I*8+5)*32(%rsp)
|
||||
vpshufb F, T6, T6
|
||||
vmovdqu T6, (I*8+6)*32(%rsp)
|
||||
vpshufb F, T7, T7
|
||||
vmovdqu T7, (I*8+7)*32(%rsp)
|
||||
add $32, IDX
|
||||
I = (I+1)
|
||||
.endr
|
||||
# save old digests
|
||||
vmovdqu A,AA
|
||||
vmovdqu B,BB
|
||||
vmovdqu C,CC
|
||||
vmovdqu D,DD
|
||||
vmovdqu E,EE
|
||||
|
||||
##
|
||||
## perform 0-79 steps
|
||||
##
|
||||
vmovdqu K00_19(%rip), K
|
||||
## do rounds 0...15
|
||||
I = 0
|
||||
.rep 16
|
||||
SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 16...19
|
||||
vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
|
||||
vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
|
||||
.rep 4
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 20...39
|
||||
vmovdqu K20_39(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 40...59
|
||||
vmovdqu K40_59(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 60...79
|
||||
vmovdqu K60_79(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
vpaddd AA,A,A
|
||||
vpaddd BB,B,B
|
||||
vpaddd CC,C,C
|
||||
vpaddd DD,D,D
|
||||
vpaddd EE,E,E
|
||||
|
||||
sub $1, arg2
|
||||
jne lloop
|
||||
|
||||
# write out digests
|
||||
vmovdqu A, 0*32(arg1)
|
||||
vmovdqu B, 1*32(arg1)
|
||||
vmovdqu C, 2*32(arg1)
|
||||
vmovdqu D, 3*32(arg1)
|
||||
vmovdqu E, 4*32(arg1)
|
||||
|
||||
# update input pointers
|
||||
add IDX, inp0
|
||||
add IDX, inp1
|
||||
add IDX, inp2
|
||||
add IDX, inp3
|
||||
add IDX, inp4
|
||||
add IDX, inp5
|
||||
add IDX, inp6
|
||||
add IDX, inp7
|
||||
mov inp0, _data_ptr (arg1)
|
||||
mov inp1, _data_ptr + 1*8(arg1)
|
||||
mov inp2, _data_ptr + 2*8(arg1)
|
||||
mov inp3, _data_ptr + 3*8(arg1)
|
||||
mov inp4, _data_ptr + 4*8(arg1)
|
||||
mov inp5, _data_ptr + 5*8(arg1)
|
||||
mov inp6, _data_ptr + 6*8(arg1)
|
||||
mov inp7, _data_ptr + 7*8(arg1)
|
||||
|
||||
################
|
||||
## Postamble
|
||||
|
||||
mov RSP_SAVE, %rsp
|
||||
|
||||
# restore callee-saved clobbered registers
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
ret
|
||||
ENDPROC(sha1_x8_avx2)
|
||||
|
||||
|
||||
.section .rodata.cst32.K00_19, "aM", @progbits, 32
|
||||
.align 32
|
||||
K00_19:
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
|
||||
.section .rodata.cst32.K20_39, "aM", @progbits, 32
|
||||
.align 32
|
||||
K20_39:
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
|
||||
.section .rodata.cst32.K40_59, "aM", @progbits, 32
|
||||
.align 32
|
||||
K40_59:
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
|
||||
.section .rodata.cst32.K60_79, "aM", @progbits, 32
|
||||
.align 32
|
||||
K60_79:
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
@@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
|
||||
sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
|
||||
sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha256_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
|
||||
#ifdef HASH_CTX_DEBUG
|
||||
HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA256_DIGEST_LENGTH 8
|
||||
#define SHA256_LOG2_BLOCK_SIZE 6
|
||||
|
||||
#define SHA256_PADLENGTHFIELD_SIZE 8
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha256_ctx_mgr {
|
||||
struct sha256_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
|
||||
|
||||
struct sha256_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha256 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,108 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA256_DIGEST_WORDS 8
|
||||
|
||||
enum job_sts { STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha256 {
|
||||
u8 *buffer;
|
||||
u32 len;
|
||||
u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
/* SHA256 out-of-order scheduler */
|
||||
|
||||
/* typedef uint32_t sha8_digest_array[8][8]; */
|
||||
|
||||
struct sha256_args_x8 {
|
||||
uint32_t digest[8][8];
|
||||
uint8_t *data_ptr[8];
|
||||
};
|
||||
|
||||
struct sha256_lane_data {
|
||||
struct job_sha256 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha256_mb_mgr {
|
||||
struct sha256_args_x8 args;
|
||||
|
||||
uint32_t lens[8];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha256_lane_data ldata[8];
|
||||
};
|
||||
|
||||
|
||||
#define SHA256_MB_MGR_NUM_LANES_AVX2 8
|
||||
|
||||
void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
|
||||
struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
|
||||
struct job_sha256 *job);
|
||||
struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
|
||||
struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
|
||||
|
||||
#endif
|
@@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _DATASTRUCT_ASM_
|
||||
#define _DATASTRUCT_ASM_
|
||||
|
||||
#define SZ8 8*SHA256_DIGEST_WORD_SIZE
|
||||
#define ROUNDS 64*SZ8
|
||||
#define PTR_SZ 8
|
||||
#define SHA256_DIGEST_WORD_SIZE 4
|
||||
#define MAX_SHA256_LANES 8
|
||||
#define SHA256_DIGEST_WORDS 8
|
||||
#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
|
||||
#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
|
||||
#define SHA256_BLK_SZ 64
|
||||
|
||||
# START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
# FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - %%tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endif
|
||||
.endstruc
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
########################################################################
|
||||
#### Define SHA256 Out Of Order Data Structures
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # SHA256_ARGS_X4
|
||||
### name size align
|
||||
FIELD _digest, 4*8*8, 4 # transposed digest
|
||||
FIELD _data_ptr, 8*8, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA256_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA256_ARGS_X4_align = _STRUCT_ALIGN
|
||||
_SHA256_ARGS_X8_size = _FIELD_OFFSET
|
||||
_SHA256_ARGS_X8_align = _STRUCT_ALIGN
|
||||
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
|
||||
FIELD _lens, 4*8, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS #STACK_FRAME
|
||||
### name size align
|
||||
FIELD _data, 16*SZ8, 1 # transposed digest
|
||||
FIELD _digest, 8*SZ8, 1 # array of pointers to data
|
||||
FIELD _ytmp, 4*SZ8, 1
|
||||
FIELD _rsp, 8, 1
|
||||
END_FIELDS
|
||||
|
||||
_STACK_FRAME_size = _FIELD_OFFSET
|
||||
_STACK_FRAME_align = _STRUCT_ALIGN
|
||||
|
||||
#######################################################################
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
########################################################################
|
||||
#### Define JOB_SHA256 structure
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA256
|
||||
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 8, 8 # length in bytes
|
||||
FIELD _result_digest, 8*4, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA256_size = _FIELD_OFFSET
|
||||
_JOB_SHA256_align = _STRUCT_ALIGN
|
@@ -1,307 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA256 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha256_x8_avx2
|
||||
|
||||
#LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# Common register definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_mult
|
||||
#define idx %r8
|
||||
#define DWORD_idx %r8d
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
#define tmp2_w %ebx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 %arg1
|
||||
|
||||
#define extra_blocks %arg2
|
||||
#define p %arg2
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha256_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+3, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne four(%rip), idx
|
||||
offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne five(%rip), idx
|
||||
offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne six(%rip), idx
|
||||
offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne seven(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 8
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 4*I)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens+0*16(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _lens+0*16(state)
|
||||
vmovdqu %xmm1, _lens+1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha256_x8_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl $0xFFFFFFFF, _lens(state,idx,4)
|
||||
|
||||
vmovd _args_digest(state , idx, 4) , %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
vmovdqu %xmm1, offset(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha256_mb_mgr_flush_avx2)
|
||||
|
||||
##############################################################################
|
||||
|
||||
.align 16
|
||||
ENTRY(sha256_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
## if bit 32+3 is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+3), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
test $~0xF, idx
|
||||
jnz .return_null
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
vmovdqu %xmm1, offset(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
.quad 2
|
||||
three:
|
||||
.quad 3
|
||||
four:
|
||||
.quad 4
|
||||
five:
|
||||
.quad 5
|
||||
six:
|
||||
.quad 6
|
||||
seven:
|
||||
.quad 7
|
@@ -1,65 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA256 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha256_mb_mgr.h"
|
||||
|
||||
void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
state->unused_lanes = 0xF76543210ULL;
|
||||
for (j = 0; j < 8; j++) {
|
||||
state->lens[j] = 0xFFFFFFFF;
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
||||
}
|
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA256 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha256_x8_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
size_offset = %rcx
|
||||
tmp2 = %rcx
|
||||
extra_blocks = %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job %rsi
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_x8_avx2
|
||||
idx = %r8
|
||||
DWORD_idx = %r8d
|
||||
last_len = %r8
|
||||
|
||||
p = %r11
|
||||
start_offset = %r11
|
||||
|
||||
unused_lanes = %rbx
|
||||
BYTE_unused_lanes = %bl
|
||||
|
||||
job_rax = %rax
|
||||
len = %rax
|
||||
DWORD_len = %eax
|
||||
|
||||
lane = %r12
|
||||
tmp3 = %r12
|
||||
|
||||
tmp = %r9
|
||||
DWORD_tmp = %r9d
|
||||
|
||||
lane_data = %r10
|
||||
|
||||
# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha256_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
mov unused_lanes, lane
|
||||
and $0xF, lane
|
||||
shr $4, unused_lanes
|
||||
imul $_LANE_DATA_size, lane, lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
shl $4, len
|
||||
or lane, len
|
||||
|
||||
movl DWORD_len, _lens(state , lane, 4)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest(job), %xmm0
|
||||
vmovdqu _result_digest+1*16(job), %xmm1
|
||||
vmovd %xmm0, _args_digest(state, lane, 4)
|
||||
vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
|
||||
vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
|
||||
vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
|
||||
vmovd %xmm1, _args_digest+4*32(state , lane, 4)
|
||||
|
||||
vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
|
||||
vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
|
||||
vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
# Find min length
|
||||
vmovdqa _lens(state), %xmm0
|
||||
vmovdqa _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqa %xmm0, _lens + 0*16(state)
|
||||
vmovdqa %xmm1, _lens + 1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha256_x8_avx2
|
||||
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state,idx,4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
|
||||
vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
|
||||
ENDPROC(sha256_mb_mgr_submit_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
@@ -1,598 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA256 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
## code to compute oct SHA256 using SSE-256
|
||||
## outer calling routine takes care of save and restore of XMM registers
|
||||
## Logic designed/laid out by JDG
|
||||
|
||||
## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
|
||||
## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
|
||||
## Linux preserves: rdi rbp r8
|
||||
##
|
||||
## clobbers %ymm0-15
|
||||
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
reg3 = %rcx
|
||||
reg4 = %rdx
|
||||
|
||||
# Common definitions
|
||||
STATE = arg1
|
||||
INP_SIZE = arg2
|
||||
|
||||
IDX = %rax
|
||||
ROUND = %rbx
|
||||
TBL = reg3
|
||||
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
inp4 = %r13
|
||||
inp5 = %r14
|
||||
inp6 = %r15
|
||||
inp7 = reg4
|
||||
|
||||
a = %ymm0
|
||||
b = %ymm1
|
||||
c = %ymm2
|
||||
d = %ymm3
|
||||
e = %ymm4
|
||||
f = %ymm5
|
||||
g = %ymm6
|
||||
h = %ymm7
|
||||
|
||||
T1 = %ymm8
|
||||
|
||||
a0 = %ymm12
|
||||
a1 = %ymm13
|
||||
a2 = %ymm14
|
||||
TMP = %ymm15
|
||||
TMP0 = %ymm6
|
||||
TMP1 = %ymm7
|
||||
|
||||
TT0 = %ymm8
|
||||
TT1 = %ymm9
|
||||
TT2 = %ymm10
|
||||
TT3 = %ymm11
|
||||
TT4 = %ymm12
|
||||
TT5 = %ymm13
|
||||
TT6 = %ymm14
|
||||
TT7 = %ymm15
|
||||
|
||||
# Define stack usage
|
||||
|
||||
# Assume stack aligned to 32 bytes before call
|
||||
# Therefore FRAMESZ mod 32 must be 32-8 = 24
|
||||
|
||||
#define FRAMESZ 0x388
|
||||
|
||||
#define VMOVPS vmovups
|
||||
|
||||
# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
# "transpose" data in {r0...r7} using temps {t0...t1}
|
||||
# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
|
||||
# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
|
||||
# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
|
||||
# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
|
||||
#
|
||||
# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
|
||||
# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
|
||||
# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
|
||||
# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
|
||||
# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
|
||||
# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
|
||||
# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
|
||||
# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
|
||||
#
|
||||
|
||||
.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
|
||||
# process top half (r0..r3) {a...d}
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
|
||||
vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
|
||||
vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
|
||||
vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
|
||||
|
||||
# use r2 in place of t0
|
||||
# process bottom half (r4..r7) {e...h}
|
||||
vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
|
||||
vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
|
||||
vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
|
||||
vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
|
||||
vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
|
||||
vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
|
||||
vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
|
||||
vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
|
||||
|
||||
vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
|
||||
vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
|
||||
vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
|
||||
vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
|
||||
vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
|
||||
vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
|
||||
vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
|
||||
vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
|
||||
|
||||
.endm
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = h
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = TMP_
|
||||
.endm
|
||||
|
||||
.macro _PRORD reg imm tmp
|
||||
vpslld $(32-\imm),\reg,\tmp
|
||||
vpsrld $\imm,\reg, \reg
|
||||
vpor \tmp,\reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORD_nd reg, imm, tmp, src
|
||||
.macro _PRORD_nd reg imm tmp src
|
||||
vpslld $(32-\imm), \src, \tmp
|
||||
vpsrld $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORD dst/src, amt
|
||||
.macro PRORD reg imm
|
||||
_PRORD \reg,\imm,TMP
|
||||
.endm
|
||||
|
||||
# PRORD_nd dst, src, amt
|
||||
.macro PRORD_nd reg tmp imm
|
||||
_PRORD_nd \reg, \imm, TMP, \tmp
|
||||
.endm
|
||||
|
||||
# arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_00_15 _T1 i
|
||||
PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
|
||||
|
||||
vpxor g, f, a2 # ch: a2 = f^g
|
||||
vpand e,a2, a2 # ch: a2 = (f^g)&e
|
||||
vpxor g, a2, a2 # a2 = ch
|
||||
|
||||
PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
|
||||
|
||||
vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
|
||||
vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
|
||||
vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
|
||||
PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
|
||||
vpaddd a2, h, h # h = h + ch
|
||||
PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
|
||||
vpaddd \_T1,h, h # h = h + ch + W + K
|
||||
vpxor a1, a0, a0 # a0 = sigma1
|
||||
PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
|
||||
vpxor c, a, \_T1 # maj: T1 = a^c
|
||||
add $SZ8, ROUND # ROUND++
|
||||
vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
|
||||
vpaddd a0, h, h
|
||||
vpaddd h, d, d
|
||||
vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
|
||||
PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
|
||||
vpxor a1, a2, a2 # a2 = sig0
|
||||
vpand c, a, a1 # maj: a1 = a&c
|
||||
vpor \_T1, a1, a1 # a1 = maj
|
||||
vpaddd a1, h, h # h = h + ch + W + K + maj
|
||||
vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
|
||||
ROTATE_ARGS
|
||||
.endm
|
||||
|
||||
# arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_16_XX _T1 i
|
||||
vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
|
||||
vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
|
||||
vmovdqu \_T1, a0
|
||||
PRORD \_T1,11
|
||||
vmovdqu a1, a2
|
||||
PRORD a1,2
|
||||
vpxor a0, \_T1, \_T1
|
||||
PRORD \_T1, 7
|
||||
vpxor a2, a1, a1
|
||||
PRORD a1, 17
|
||||
vpsrld $3, a0, a0
|
||||
vpxor a0, \_T1, \_T1
|
||||
vpsrld $10, a2, a2
|
||||
vpxor a2, a1, a1
|
||||
vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
|
||||
vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
|
||||
vpaddd a1, \_T1, \_T1
|
||||
|
||||
ROUND_00_15 \_T1,\i
|
||||
.endm
|
||||
|
||||
# SHA256_ARGS:
|
||||
# UINT128 digest[8]; // transposed digests
|
||||
# UINT8 *data_ptr[4];
|
||||
|
||||
# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
|
||||
# arg 1 : STATE : pointer to array of pointers to input data
|
||||
# arg 2 : INP_SIZE : size of input in blocks
|
||||
# general registers preserved in outer calling routine
|
||||
# outer calling routine saves all the XMM registers
|
||||
# save rsp, allocate 32-byte aligned for local variables
|
||||
ENTRY(sha256_x8_avx2)
|
||||
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
mov %rsp, IDX
|
||||
sub $FRAMESZ, %rsp
|
||||
and $~0x1F, %rsp
|
||||
mov IDX, _rsp(%rsp)
|
||||
|
||||
# Load the pre-transposed incoming digest.
|
||||
vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
|
||||
vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
|
||||
vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
|
||||
vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
|
||||
vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
|
||||
vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
|
||||
vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
|
||||
vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
|
||||
|
||||
lea K256_8(%rip),TBL
|
||||
|
||||
# load the address of each of the 4 message lanes
|
||||
# getting ready to transpose input onto stack
|
||||
mov _args_data_ptr+0*PTR_SZ(STATE),inp0
|
||||
mov _args_data_ptr+1*PTR_SZ(STATE),inp1
|
||||
mov _args_data_ptr+2*PTR_SZ(STATE),inp2
|
||||
mov _args_data_ptr+3*PTR_SZ(STATE),inp3
|
||||
mov _args_data_ptr+4*PTR_SZ(STATE),inp4
|
||||
mov _args_data_ptr+5*PTR_SZ(STATE),inp5
|
||||
mov _args_data_ptr+6*PTR_SZ(STATE),inp6
|
||||
mov _args_data_ptr+7*PTR_SZ(STATE),inp7
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
xor ROUND, ROUND
|
||||
|
||||
# save old digest
|
||||
vmovdqu a, _digest(%rsp)
|
||||
vmovdqu b, _digest+1*SZ8(%rsp)
|
||||
vmovdqu c, _digest+2*SZ8(%rsp)
|
||||
vmovdqu d, _digest+3*SZ8(%rsp)
|
||||
vmovdqu e, _digest+4*SZ8(%rsp)
|
||||
vmovdqu f, _digest+5*SZ8(%rsp)
|
||||
vmovdqu g, _digest+6*SZ8(%rsp)
|
||||
vmovdqu h, _digest+7*SZ8(%rsp)
|
||||
i = 0
|
||||
.rep 2
|
||||
VMOVPS i*32(inp0, IDX), TT0
|
||||
VMOVPS i*32(inp1, IDX), TT1
|
||||
VMOVPS i*32(inp2, IDX), TT2
|
||||
VMOVPS i*32(inp3, IDX), TT3
|
||||
VMOVPS i*32(inp4, IDX), TT4
|
||||
VMOVPS i*32(inp5, IDX), TT5
|
||||
VMOVPS i*32(inp6, IDX), TT6
|
||||
VMOVPS i*32(inp7, IDX), TT7
|
||||
vmovdqu g, _ytmp(%rsp)
|
||||
vmovdqu h, _ytmp+1*SZ8(%rsp)
|
||||
TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
|
||||
vmovdqu _ytmp(%rsp), g
|
||||
vpshufb TMP1, TT0, TT0
|
||||
vpshufb TMP1, TT1, TT1
|
||||
vpshufb TMP1, TT2, TT2
|
||||
vpshufb TMP1, TT3, TT3
|
||||
vpshufb TMP1, TT4, TT4
|
||||
vpshufb TMP1, TT5, TT5
|
||||
vpshufb TMP1, TT6, TT6
|
||||
vpshufb TMP1, TT7, TT7
|
||||
vmovdqu _ytmp+1*SZ8(%rsp), h
|
||||
vmovdqu TT4, _ytmp(%rsp)
|
||||
vmovdqu TT5, _ytmp+1*SZ8(%rsp)
|
||||
vmovdqu TT6, _ytmp+2*SZ8(%rsp)
|
||||
vmovdqu TT7, _ytmp+3*SZ8(%rsp)
|
||||
ROUND_00_15 TT0,(i*8+0)
|
||||
vmovdqu _ytmp(%rsp), TT0
|
||||
ROUND_00_15 TT1,(i*8+1)
|
||||
vmovdqu _ytmp+1*SZ8(%rsp), TT1
|
||||
ROUND_00_15 TT2,(i*8+2)
|
||||
vmovdqu _ytmp+2*SZ8(%rsp), TT2
|
||||
ROUND_00_15 TT3,(i*8+3)
|
||||
vmovdqu _ytmp+3*SZ8(%rsp), TT3
|
||||
ROUND_00_15 TT0,(i*8+4)
|
||||
ROUND_00_15 TT1,(i*8+5)
|
||||
ROUND_00_15 TT2,(i*8+6)
|
||||
ROUND_00_15 TT3,(i*8+7)
|
||||
i = (i+1)
|
||||
.endr
|
||||
add $64, IDX
|
||||
i = (i*8)
|
||||
|
||||
jmp Lrounds_16_xx
|
||||
.align 16
|
||||
Lrounds_16_xx:
|
||||
.rep 16
|
||||
ROUND_16_XX T1, i
|
||||
i = (i+1)
|
||||
.endr
|
||||
|
||||
cmp $ROUNDS,ROUND
|
||||
jb Lrounds_16_xx
|
||||
|
||||
# add old digest
|
||||
vpaddd _digest+0*SZ8(%rsp), a, a
|
||||
vpaddd _digest+1*SZ8(%rsp), b, b
|
||||
vpaddd _digest+2*SZ8(%rsp), c, c
|
||||
vpaddd _digest+3*SZ8(%rsp), d, d
|
||||
vpaddd _digest+4*SZ8(%rsp), e, e
|
||||
vpaddd _digest+5*SZ8(%rsp), f, f
|
||||
vpaddd _digest+6*SZ8(%rsp), g, g
|
||||
vpaddd _digest+7*SZ8(%rsp), h, h
|
||||
|
||||
sub $1, INP_SIZE # unit is blocks
|
||||
jne lloop
|
||||
|
||||
# write back to memory (state object) the transposed digest
|
||||
vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
|
||||
# update input pointers
|
||||
add IDX, inp0
|
||||
mov inp0, _args_data_ptr+0*8(STATE)
|
||||
add IDX, inp1
|
||||
mov inp1, _args_data_ptr+1*8(STATE)
|
||||
add IDX, inp2
|
||||
mov inp2, _args_data_ptr+2*8(STATE)
|
||||
add IDX, inp3
|
||||
mov inp3, _args_data_ptr+3*8(STATE)
|
||||
add IDX, inp4
|
||||
mov inp4, _args_data_ptr+4*8(STATE)
|
||||
add IDX, inp5
|
||||
mov inp5, _args_data_ptr+5*8(STATE)
|
||||
add IDX, inp6
|
||||
mov inp6, _args_data_ptr+6*8(STATE)
|
||||
add IDX, inp7
|
||||
mov inp7, _args_data_ptr+7*8(STATE)
|
||||
|
||||
# Postamble
|
||||
mov _rsp(%rsp), %rsp
|
||||
|
||||
# restore callee-saved clobbered registers
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
ret
|
||||
ENDPROC(sha256_x8_avx2)
|
||||
|
||||
.section .rodata.K256_8, "a", @progbits
|
||||
.align 64
|
||||
K256_8:
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
.octa 0x71374491713744917137449171374491
|
||||
.octa 0x71374491713744917137449171374491
|
||||
.octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
|
||||
.octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
|
||||
.octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
|
||||
.octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
|
||||
.octa 0x3956c25b3956c25b3956c25b3956c25b
|
||||
.octa 0x3956c25b3956c25b3956c25b3956c25b
|
||||
.octa 0x59f111f159f111f159f111f159f111f1
|
||||
.octa 0x59f111f159f111f159f111f159f111f1
|
||||
.octa 0x923f82a4923f82a4923f82a4923f82a4
|
||||
.octa 0x923f82a4923f82a4923f82a4923f82a4
|
||||
.octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
|
||||
.octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
|
||||
.octa 0xd807aa98d807aa98d807aa98d807aa98
|
||||
.octa 0xd807aa98d807aa98d807aa98d807aa98
|
||||
.octa 0x12835b0112835b0112835b0112835b01
|
||||
.octa 0x12835b0112835b0112835b0112835b01
|
||||
.octa 0x243185be243185be243185be243185be
|
||||
.octa 0x243185be243185be243185be243185be
|
||||
.octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
|
||||
.octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
|
||||
.octa 0x72be5d7472be5d7472be5d7472be5d74
|
||||
.octa 0x72be5d7472be5d7472be5d7472be5d74
|
||||
.octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
|
||||
.octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
|
||||
.octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
|
||||
.octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
|
||||
.octa 0xc19bf174c19bf174c19bf174c19bf174
|
||||
.octa 0xc19bf174c19bf174c19bf174c19bf174
|
||||
.octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
|
||||
.octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
|
||||
.octa 0xefbe4786efbe4786efbe4786efbe4786
|
||||
.octa 0xefbe4786efbe4786efbe4786efbe4786
|
||||
.octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
|
||||
.octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
|
||||
.octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
|
||||
.octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
|
||||
.octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
|
||||
.octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
|
||||
.octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
|
||||
.octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
|
||||
.octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
|
||||
.octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
|
||||
.octa 0x76f988da76f988da76f988da76f988da
|
||||
.octa 0x76f988da76f988da76f988da76f988da
|
||||
.octa 0x983e5152983e5152983e5152983e5152
|
||||
.octa 0x983e5152983e5152983e5152983e5152
|
||||
.octa 0xa831c66da831c66da831c66da831c66d
|
||||
.octa 0xa831c66da831c66da831c66da831c66d
|
||||
.octa 0xb00327c8b00327c8b00327c8b00327c8
|
||||
.octa 0xb00327c8b00327c8b00327c8b00327c8
|
||||
.octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
|
||||
.octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
|
||||
.octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
|
||||
.octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
|
||||
.octa 0xd5a79147d5a79147d5a79147d5a79147
|
||||
.octa 0xd5a79147d5a79147d5a79147d5a79147
|
||||
.octa 0x06ca635106ca635106ca635106ca6351
|
||||
.octa 0x06ca635106ca635106ca635106ca6351
|
||||
.octa 0x14292967142929671429296714292967
|
||||
.octa 0x14292967142929671429296714292967
|
||||
.octa 0x27b70a8527b70a8527b70a8527b70a85
|
||||
.octa 0x27b70a8527b70a8527b70a8527b70a85
|
||||
.octa 0x2e1b21382e1b21382e1b21382e1b2138
|
||||
.octa 0x2e1b21382e1b21382e1b21382e1b2138
|
||||
.octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
|
||||
.octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
|
||||
.octa 0x53380d1353380d1353380d1353380d13
|
||||
.octa 0x53380d1353380d1353380d1353380d13
|
||||
.octa 0x650a7354650a7354650a7354650a7354
|
||||
.octa 0x650a7354650a7354650a7354650a7354
|
||||
.octa 0x766a0abb766a0abb766a0abb766a0abb
|
||||
.octa 0x766a0abb766a0abb766a0abb766a0abb
|
||||
.octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
|
||||
.octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
|
||||
.octa 0x92722c8592722c8592722c8592722c85
|
||||
.octa 0x92722c8592722c8592722c8592722c85
|
||||
.octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
|
||||
.octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
|
||||
.octa 0xa81a664ba81a664ba81a664ba81a664b
|
||||
.octa 0xa81a664ba81a664ba81a664ba81a664b
|
||||
.octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
|
||||
.octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
|
||||
.octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
|
||||
.octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
|
||||
.octa 0xd192e819d192e819d192e819d192e819
|
||||
.octa 0xd192e819d192e819d192e819d192e819
|
||||
.octa 0xd6990624d6990624d6990624d6990624
|
||||
.octa 0xd6990624d6990624d6990624d6990624
|
||||
.octa 0xf40e3585f40e3585f40e3585f40e3585
|
||||
.octa 0xf40e3585f40e3585f40e3585f40e3585
|
||||
.octa 0x106aa070106aa070106aa070106aa070
|
||||
.octa 0x106aa070106aa070106aa070106aa070
|
||||
.octa 0x19a4c11619a4c11619a4c11619a4c116
|
||||
.octa 0x19a4c11619a4c11619a4c11619a4c116
|
||||
.octa 0x1e376c081e376c081e376c081e376c08
|
||||
.octa 0x1e376c081e376c081e376c081e376c08
|
||||
.octa 0x2748774c2748774c2748774c2748774c
|
||||
.octa 0x2748774c2748774c2748774c2748774c
|
||||
.octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
|
||||
.octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
|
||||
.octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
|
||||
.octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
|
||||
.octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
|
||||
.octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
|
||||
.octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
|
||||
.octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
|
||||
.octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
|
||||
.octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
|
||||
.octa 0x748f82ee748f82ee748f82ee748f82ee
|
||||
.octa 0x748f82ee748f82ee748f82ee748f82ee
|
||||
.octa 0x78a5636f78a5636f78a5636f78a5636f
|
||||
.octa 0x78a5636f78a5636f78a5636f78a5636f
|
||||
.octa 0x84c8781484c8781484c8781484c87814
|
||||
.octa 0x84c8781484c8781484c8781484c87814
|
||||
.octa 0x8cc702088cc702088cc702088cc70208
|
||||
.octa 0x8cc702088cc702088cc702088cc70208
|
||||
.octa 0x90befffa90befffa90befffa90befffa
|
||||
.octa 0x90befffa90befffa90befffa90befffa
|
||||
.octa 0xa4506ceba4506ceba4506ceba4506ceb
|
||||
.octa 0xa4506ceba4506ceba4506ceba4506ceb
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
.global K256
|
||||
K256:
|
||||
.int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
.int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
.int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
.int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
.int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
.int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
.int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
.int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
.int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
.int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
.int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
.int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
.int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
.int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
.int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
@@ -1,12 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
|
||||
sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
|
||||
sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@@ -1,128 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA512 context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha512_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
};
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA512_DIGEST_LENGTH 8
|
||||
#define SHA512_LOG2_BLOCK_SIZE 7
|
||||
|
||||
#define SHA512_PADLENGTHFIELD_SIZE 16
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha512_ctx_mgr {
|
||||
struct sha512_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
|
||||
|
||||
struct sha512_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha512 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA512 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA512_DIGEST_WORDS 8
|
||||
|
||||
enum job_sts {STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha512 {
|
||||
u8 *buffer;
|
||||
u64 len;
|
||||
u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
struct sha512_args_x4 {
|
||||
uint64_t digest[8][4];
|
||||
uint8_t *data_ptr[4];
|
||||
};
|
||||
|
||||
struct sha512_lane_data {
|
||||
struct job_sha512 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha512_mb_mgr {
|
||||
struct sha512_args_x4 args;
|
||||
|
||||
uint64_t lens[4];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha512_lane_data ldata[4];
|
||||
};
|
||||
|
||||
#define SHA512_MB_MGR_NUM_LANES_AVX2 4
|
||||
|
||||
void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
|
||||
struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
|
||||
struct job_sha512 *job);
|
||||
struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
|
||||
struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
|
||||
|
||||
#endif
|
@@ -1,281 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _DATASTRUCT_ASM_
|
||||
#define _DATASTRUCT_ASM_
|
||||
|
||||
#define PTR_SZ 8
|
||||
#define SHA512_DIGEST_WORD_SIZE 8
|
||||
#define SHA512_MB_MGR_NUM_LANES_AVX2 4
|
||||
#define NUM_SHA512_DIGEST_WORDS 8
|
||||
#define SZ4 4*SHA512_DIGEST_WORD_SIZE
|
||||
#define ROUNDS 80*SZ4
|
||||
#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
|
||||
|
||||
# START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
# FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - ##tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
###################################################################
|
||||
### Define SHA512 Out Of Order Data Structures
|
||||
###################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
####################################################################
|
||||
|
||||
START_FIELDS # SHA512_ARGS_X4
|
||||
### name size align
|
||||
FIELD _digest, 8*8*4, 4 # transposed digest
|
||||
FIELD _data_ptr, 8*4, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA512_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA512_ARGS_X4_align = _STRUCT_ALIGN
|
||||
|
||||
#####################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
|
||||
FIELD _lens, 8*4, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
#######################################################################
|
||||
|
||||
#######################################################################
|
||||
#### Define constants
|
||||
#######################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
#######################################################################
|
||||
#### Define JOB_SHA512 structure
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA512
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 8, 8 # length in bytes
|
||||
FIELD _result_digest, 8*8, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA512_size = _FIELD_OFFSET
|
||||
_JOB_SHA512_align = _STRUCT_ALIGN
|
@@ -1,297 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA512 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha512_x4_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# idx needs to be other than arg1, arg2, rbx, r12
|
||||
#define idx %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 arg1
|
||||
|
||||
#define extra_blocks arg2
|
||||
#define p arg2
|
||||
|
||||
#define tmp4 %r8
|
||||
#define lens0 %r8
|
||||
|
||||
#define lens1 %r9
|
||||
#define lens2 %r10
|
||||
#define lens3 %r11
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha512_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+7, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 4
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 8*I +4)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
mov _lens + 0*8(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens + 1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1,idx
|
||||
mov _lens + 2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens + 3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
mov idx,len2
|
||||
and $0xF,idx
|
||||
and $~0xFF,len2
|
||||
jz len_is_0
|
||||
|
||||
sub len2, lens0
|
||||
sub len2, lens1
|
||||
sub len2, lens2
|
||||
sub len2, lens3
|
||||
shr $32,len2
|
||||
mov lens0, _lens + 0*8(state)
|
||||
mov lens1, _lens + 1*8(state)
|
||||
mov lens2, _lens + 2*8(state)
|
||||
mov lens3, _lens + 3*8(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha512_x4_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens+4(state, idx, 8)
|
||||
|
||||
vmovq _args_digest+0*32(state, idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state, idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state, idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state, idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest+2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest+3*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_flush_avx2)
|
||||
.align 16
|
||||
|
||||
ENTRY(sha512_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+7), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
mov _lens(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens+1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1,idx
|
||||
mov _lens+2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens+3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
test $~0xF,idx
|
||||
jnz .return_null
|
||||
and $0xF,idx
|
||||
|
||||
#process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens+4(state, idx, 8)
|
||||
|
||||
vmovq _args_digest(state, idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state, idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state, idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state, idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest+0*16(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest+2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest+3*16(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst8.one, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
|
||||
.section .rodata.cst8.two, "aM", @progbits, 8
|
||||
.align 8
|
||||
two:
|
||||
.quad 2
|
||||
|
||||
.section .rodata.cst8.three, "aM", @progbits, 8
|
||||
.align 8
|
||||
three:
|
||||
.quad 3
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA256 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha512_mb_mgr.h"
|
||||
|
||||
void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
/* initially all lanes are unused */
|
||||
state->lens[0] = 0xFFFFFFFF00000000;
|
||||
state->lens[1] = 0xFFFFFFFF00000001;
|
||||
state->lens[2] = 0xFFFFFFFF00000002;
|
||||
state->lens[3] = 0xFFFFFFFF00000003;
|
||||
|
||||
state->unused_lanes = 0xFF03020100;
|
||||
for (j = 0; j < 4; j++)
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA512 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha512_x4_avx2
|
||||
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
#define idx %rdx
|
||||
#define last_len %rdx
|
||||
|
||||
#define size_offset %rcx
|
||||
#define tmp2 %rcx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
#define p %r11
|
||||
#define start_offset %r11
|
||||
|
||||
#define unused_lanes %rbx
|
||||
|
||||
#define job_rax %rax
|
||||
#define len %rax
|
||||
|
||||
#define lane %r12
|
||||
#define tmp3 %r12
|
||||
#define lens3 %r12
|
||||
|
||||
#define extra_blocks %r8
|
||||
#define lens0 %r8
|
||||
|
||||
#define tmp %r9
|
||||
#define lens1 %r9
|
||||
|
||||
#define lane_data %r10
|
||||
#define lens2 %r10
|
||||
|
||||
#define DWORD_len %eax
|
||||
|
||||
# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha512_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movzb %bl,lane
|
||||
shr $8, unused_lanes
|
||||
imul $_LANE_DATA_size, lane,lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
movl DWORD_len,_lens+4(state , lane, 8)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest+0*16(job), %xmm0
|
||||
vmovdqu _result_digest+1*16(job), %xmm1
|
||||
vmovdqu _result_digest+2*16(job), %xmm2
|
||||
vmovdqu _result_digest+3*16(job), %xmm3
|
||||
|
||||
vmovq %xmm0, _args_digest(state, lane, 8)
|
||||
vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
|
||||
vmovq %xmm1, _args_digest+2*32(state , lane, 8)
|
||||
vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
|
||||
vmovq %xmm2, _args_digest+4*32(state , lane, 8)
|
||||
vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
|
||||
vmovq %xmm3, _args_digest+6*32(state , lane, 8)
|
||||
vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xFF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
|
||||
# Find min length
|
||||
mov _lens+0*8(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens+1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1, idx
|
||||
mov _lens+2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens+3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
mov idx,len2
|
||||
and $0xF,idx
|
||||
and $~0xFF,len2
|
||||
jz len_is_0
|
||||
|
||||
sub len2,lens0
|
||||
sub len2,lens1
|
||||
sub len2,lens2
|
||||
sub len2,lens3
|
||||
shr $32,len2
|
||||
mov lens0, _lens + 0*8(state)
|
||||
mov lens1, _lens + 1*8(state)
|
||||
mov lens2, _lens + 2*8(state)
|
||||
mov lens3, _lens + 3*8(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha512_x4_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF,_lens+4(state,idx,8)
|
||||
vmovq _args_digest+0*32(state , idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state , idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state , idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state , idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest + 0*16(job_rax)
|
||||
vmovdqu %xmm1, _result_digest + 1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest + 2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest + 3*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_submit_avx2)
|
||||
|
||||
/* UNUSED?
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
H0: .int 0x6a09e667
|
||||
H1: .int 0xbb67ae85
|
||||
H2: .int 0x3c6ef372
|
||||
H3: .int 0xa54ff53a
|
||||
H4: .int 0x510e527f
|
||||
H5: .int 0x9b05688c
|
||||
H6: .int 0x1f83d9ab
|
||||
H7: .int 0x5be0cd19
|
||||
*/
|
@@ -1,531 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA512 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# code to compute quad SHA512 using AVX2
|
||||
# use YMMs to tackle the larger digest size
|
||||
# outer calling routine takes care of save and restore of XMM registers
|
||||
# Logic designed/laid out by JDG
|
||||
|
||||
# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
|
||||
# Stack must be aligned to 32 bytes before call
|
||||
# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
|
||||
# Linux preserves: rcx rdx rdi rbp r13 r14 r15
|
||||
# clobbers ymm0-15
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
|
||||
# Common definitions
|
||||
STATE = arg1
|
||||
INP_SIZE = arg2
|
||||
|
||||
IDX = %rax
|
||||
ROUND = %rbx
|
||||
TBL = %r8
|
||||
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
|
||||
a = %ymm0
|
||||
b = %ymm1
|
||||
c = %ymm2
|
||||
d = %ymm3
|
||||
e = %ymm4
|
||||
f = %ymm5
|
||||
g = %ymm6
|
||||
h = %ymm7
|
||||
|
||||
a0 = %ymm8
|
||||
a1 = %ymm9
|
||||
a2 = %ymm10
|
||||
|
||||
TT0 = %ymm14
|
||||
TT1 = %ymm13
|
||||
TT2 = %ymm12
|
||||
TT3 = %ymm11
|
||||
TT4 = %ymm10
|
||||
TT5 = %ymm9
|
||||
|
||||
T1 = %ymm14
|
||||
TMP = %ymm15
|
||||
|
||||
# Define stack usage
|
||||
STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
|
||||
|
||||
#define VMOVPD vmovupd
|
||||
_digest = SZ4*16
|
||||
|
||||
# transpose r0, r1, r2, r3, t0, t1
|
||||
# "transpose" data in {r0..r3} using temps {t0..t3}
|
||||
# Input looks like: {r0 r1 r2 r3}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
#
|
||||
# output looks like: {t0 r1 r0 r3}
|
||||
# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
|
||||
# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
|
||||
# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
|
||||
# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
|
||||
|
||||
.macro TRANSPOSE r0 r1 r2 r3 t0 t1
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
|
||||
vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
|
||||
vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
|
||||
vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
|
||||
vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
|
||||
.endm
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = h
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = TMP_
|
||||
.endm
|
||||
|
||||
# PRORQ reg, imm, tmp
|
||||
# packed-rotate-right-double
|
||||
# does a rotate by doing two shifts and an or
|
||||
.macro _PRORQ reg imm tmp
|
||||
vpsllq $(64-\imm),\reg,\tmp
|
||||
vpsrlq $\imm,\reg, \reg
|
||||
vpor \tmp,\reg, \reg
|
||||
.endm
|
||||
|
||||
# non-destructive
|
||||
# PRORQ_nd reg, imm, tmp, src
|
||||
.macro _PRORQ_nd reg imm tmp src
|
||||
vpsllq $(64-\imm), \src, \tmp
|
||||
vpsrlq $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORQ dst/src, amt
|
||||
.macro PRORQ reg imm
|
||||
_PRORQ \reg, \imm, TMP
|
||||
.endm
|
||||
|
||||
# PRORQ_nd dst, src, amt
|
||||
.macro PRORQ_nd reg tmp imm
|
||||
_PRORQ_nd \reg, \imm, TMP, \tmp
|
||||
.endm
|
||||
|
||||
#; arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_00_15 _T1 i
|
||||
PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
|
||||
|
||||
vpxor g, f, a2 # ch: a2 = f^g
|
||||
vpand e,a2, a2 # ch: a2 = (f^g)&e
|
||||
vpxor g, a2, a2 # a2 = ch
|
||||
|
||||
PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
|
||||
|
||||
offset = SZ4*(\i & 0xf)
|
||||
vmovdqu \_T1,offset(%rsp)
|
||||
vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
|
||||
vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
|
||||
PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
|
||||
vpaddq a2, h, h # h = h + ch
|
||||
PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
|
||||
vpaddq \_T1,h, h # h = h + ch + W + K
|
||||
vpxor a1, a0, a0 # a0 = sigma1
|
||||
vmovdqu a,\_T1
|
||||
PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
|
||||
vpxor c, \_T1, \_T1 # maj: T1 = a^c
|
||||
add $SZ4, ROUND # ROUND++
|
||||
vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
|
||||
vpaddq a0, h, h
|
||||
vpaddq h, d, d
|
||||
vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
|
||||
PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
|
||||
vpxor a1, a2, a2 # a2 = sig0
|
||||
vpand c, a, a1 # maj: a1 = a&c
|
||||
vpor \_T1, a1, a1 # a1 = maj
|
||||
vpaddq a1, h, h # h = h + ch + W + K + maj
|
||||
vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
|
||||
ROTATE_ARGS
|
||||
.endm
|
||||
|
||||
|
||||
#; arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_16_XX _T1 i
|
||||
vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
|
||||
vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
|
||||
vmovdqu \_T1, a0
|
||||
PRORQ \_T1,7
|
||||
vmovdqu a1, a2
|
||||
PRORQ a1,42
|
||||
vpxor a0, \_T1, \_T1
|
||||
PRORQ \_T1, 1
|
||||
vpxor a2, a1, a1
|
||||
PRORQ a1, 19
|
||||
vpsrlq $7, a0, a0
|
||||
vpxor a0, \_T1, \_T1
|
||||
vpsrlq $6, a2, a2
|
||||
vpxor a2, a1, a1
|
||||
vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
|
||||
vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
|
||||
vpaddq a1, \_T1, \_T1
|
||||
|
||||
ROUND_00_15 \_T1,\i
|
||||
.endm
|
||||
|
||||
|
||||
# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
|
||||
# arg 1 : STATE : pointer to input data
|
||||
# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
|
||||
ENTRY(sha512_x4_avx2)
|
||||
# general registers preserved in outer calling routine
|
||||
# outer calling routine saves all the XMM registers
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
sub $STACK_SPACE1, %rsp
|
||||
|
||||
# Load the pre-transposed incoming digest.
|
||||
vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
|
||||
vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
|
||||
vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
|
||||
vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
|
||||
vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
|
||||
vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
|
||||
vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
|
||||
vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
|
||||
|
||||
lea K512_4(%rip),TBL
|
||||
|
||||
# load the address of each of the 4 message lanes
|
||||
# getting ready to transpose input onto stack
|
||||
mov _data_ptr+0*PTR_SZ(STATE),inp0
|
||||
mov _data_ptr+1*PTR_SZ(STATE),inp1
|
||||
mov _data_ptr+2*PTR_SZ(STATE),inp2
|
||||
mov _data_ptr+3*PTR_SZ(STATE),inp3
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
xor ROUND, ROUND
|
||||
|
||||
# save old digest
|
||||
vmovdqu a, _digest(%rsp)
|
||||
vmovdqu b, _digest+1*SZ4(%rsp)
|
||||
vmovdqu c, _digest+2*SZ4(%rsp)
|
||||
vmovdqu d, _digest+3*SZ4(%rsp)
|
||||
vmovdqu e, _digest+4*SZ4(%rsp)
|
||||
vmovdqu f, _digest+5*SZ4(%rsp)
|
||||
vmovdqu g, _digest+6*SZ4(%rsp)
|
||||
vmovdqu h, _digest+7*SZ4(%rsp)
|
||||
i = 0
|
||||
.rep 4
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
|
||||
VMOVPD i*32(inp0, IDX), TT2
|
||||
VMOVPD i*32(inp1, IDX), TT1
|
||||
VMOVPD i*32(inp2, IDX), TT4
|
||||
VMOVPD i*32(inp3, IDX), TT3
|
||||
TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
|
||||
vpshufb TMP, TT0, TT0
|
||||
vpshufb TMP, TT1, TT1
|
||||
vpshufb TMP, TT2, TT2
|
||||
vpshufb TMP, TT3, TT3
|
||||
ROUND_00_15 TT0,(i*4+0)
|
||||
ROUND_00_15 TT1,(i*4+1)
|
||||
ROUND_00_15 TT2,(i*4+2)
|
||||
ROUND_00_15 TT3,(i*4+3)
|
||||
i = (i+1)
|
||||
.endr
|
||||
add $128, IDX
|
||||
|
||||
i = (i*4)
|
||||
|
||||
jmp Lrounds_16_xx
|
||||
.align 16
|
||||
Lrounds_16_xx:
|
||||
.rep 16
|
||||
ROUND_16_XX T1, i
|
||||
i = (i+1)
|
||||
.endr
|
||||
cmp $0xa00,ROUND
|
||||
jb Lrounds_16_xx
|
||||
|
||||
# add old digest
|
||||
vpaddq _digest(%rsp), a, a
|
||||
vpaddq _digest+1*SZ4(%rsp), b, b
|
||||
vpaddq _digest+2*SZ4(%rsp), c, c
|
||||
vpaddq _digest+3*SZ4(%rsp), d, d
|
||||
vpaddq _digest+4*SZ4(%rsp), e, e
|
||||
vpaddq _digest+5*SZ4(%rsp), f, f
|
||||
vpaddq _digest+6*SZ4(%rsp), g, g
|
||||
vpaddq _digest+7*SZ4(%rsp), h, h
|
||||
|
||||
sub $1, INP_SIZE # unit is blocks
|
||||
jne lloop
|
||||
|
||||
# write back to memory (state object) the transposed digest
|
||||
vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
|
||||
# update input data pointers
|
||||
add IDX, inp0
|
||||
mov inp0, _data_ptr+0*PTR_SZ(STATE)
|
||||
add IDX, inp1
|
||||
mov inp1, _data_ptr+1*PTR_SZ(STATE)
|
||||
add IDX, inp2
|
||||
mov inp2, _data_ptr+2*PTR_SZ(STATE)
|
||||
add IDX, inp3
|
||||
mov inp3, _data_ptr+3*PTR_SZ(STATE)
|
||||
|
||||
#;;;;;;;;;;;;;;;
|
||||
#; Postamble
|
||||
add $STACK_SPACE1, %rsp
|
||||
# restore callee-saved clobbered registers
|
||||
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
# outer calling routine restores XMM and other GP registers
|
||||
ret
|
||||
ENDPROC(sha512_x4_avx2)
|
||||
|
||||
.section .rodata.K512_4, "a", @progbits
|
||||
.align 64
|
||||
K512_4:
|
||||
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
|
||||
0x428a2f98d728ae22428a2f98d728ae22
|
||||
.octa 0x7137449123ef65cd7137449123ef65cd,\
|
||||
0x7137449123ef65cd7137449123ef65cd
|
||||
.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
|
||||
0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
|
||||
.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
|
||||
0xe9b5dba58189dbbce9b5dba58189dbbc
|
||||
.octa 0x3956c25bf348b5383956c25bf348b538,\
|
||||
0x3956c25bf348b5383956c25bf348b538
|
||||
.octa 0x59f111f1b605d01959f111f1b605d019,\
|
||||
0x59f111f1b605d01959f111f1b605d019
|
||||
.octa 0x923f82a4af194f9b923f82a4af194f9b,\
|
||||
0x923f82a4af194f9b923f82a4af194f9b
|
||||
.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
|
||||
0xab1c5ed5da6d8118ab1c5ed5da6d8118
|
||||
.octa 0xd807aa98a3030242d807aa98a3030242,\
|
||||
0xd807aa98a3030242d807aa98a3030242
|
||||
.octa 0x12835b0145706fbe12835b0145706fbe,\
|
||||
0x12835b0145706fbe12835b0145706fbe
|
||||
.octa 0x243185be4ee4b28c243185be4ee4b28c,\
|
||||
0x243185be4ee4b28c243185be4ee4b28c
|
||||
.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
|
||||
0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
|
||||
.octa 0x72be5d74f27b896f72be5d74f27b896f,\
|
||||
0x72be5d74f27b896f72be5d74f27b896f
|
||||
.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
|
||||
0x80deb1fe3b1696b180deb1fe3b1696b1
|
||||
.octa 0x9bdc06a725c712359bdc06a725c71235,\
|
||||
0x9bdc06a725c712359bdc06a725c71235
|
||||
.octa 0xc19bf174cf692694c19bf174cf692694,\
|
||||
0xc19bf174cf692694c19bf174cf692694
|
||||
.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
|
||||
0xe49b69c19ef14ad2e49b69c19ef14ad2
|
||||
.octa 0xefbe4786384f25e3efbe4786384f25e3,\
|
||||
0xefbe4786384f25e3efbe4786384f25e3
|
||||
.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
|
||||
0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
|
||||
.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
|
||||
0x240ca1cc77ac9c65240ca1cc77ac9c65
|
||||
.octa 0x2de92c6f592b02752de92c6f592b0275,\
|
||||
0x2de92c6f592b02752de92c6f592b0275
|
||||
.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
|
||||
0x4a7484aa6ea6e4834a7484aa6ea6e483
|
||||
.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
|
||||
0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
|
||||
.octa 0x76f988da831153b576f988da831153b5,\
|
||||
0x76f988da831153b576f988da831153b5
|
||||
.octa 0x983e5152ee66dfab983e5152ee66dfab,\
|
||||
0x983e5152ee66dfab983e5152ee66dfab
|
||||
.octa 0xa831c66d2db43210a831c66d2db43210,\
|
||||
0xa831c66d2db43210a831c66d2db43210
|
||||
.octa 0xb00327c898fb213fb00327c898fb213f,\
|
||||
0xb00327c898fb213fb00327c898fb213f
|
||||
.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
|
||||
0xbf597fc7beef0ee4bf597fc7beef0ee4
|
||||
.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
|
||||
0xc6e00bf33da88fc2c6e00bf33da88fc2
|
||||
.octa 0xd5a79147930aa725d5a79147930aa725,\
|
||||
0xd5a79147930aa725d5a79147930aa725
|
||||
.octa 0x06ca6351e003826f06ca6351e003826f,\
|
||||
0x06ca6351e003826f06ca6351e003826f
|
||||
.octa 0x142929670a0e6e70142929670a0e6e70,\
|
||||
0x142929670a0e6e70142929670a0e6e70
|
||||
.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
|
||||
0x27b70a8546d22ffc27b70a8546d22ffc
|
||||
.octa 0x2e1b21385c26c9262e1b21385c26c926,\
|
||||
0x2e1b21385c26c9262e1b21385c26c926
|
||||
.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
|
||||
0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
|
||||
.octa 0x53380d139d95b3df53380d139d95b3df,\
|
||||
0x53380d139d95b3df53380d139d95b3df
|
||||
.octa 0x650a73548baf63de650a73548baf63de,\
|
||||
0x650a73548baf63de650a73548baf63de
|
||||
.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
|
||||
0x766a0abb3c77b2a8766a0abb3c77b2a8
|
||||
.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
|
||||
0x81c2c92e47edaee681c2c92e47edaee6
|
||||
.octa 0x92722c851482353b92722c851482353b,\
|
||||
0x92722c851482353b92722c851482353b
|
||||
.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
|
||||
0xa2bfe8a14cf10364a2bfe8a14cf10364
|
||||
.octa 0xa81a664bbc423001a81a664bbc423001,\
|
||||
0xa81a664bbc423001a81a664bbc423001
|
||||
.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
|
||||
0xc24b8b70d0f89791c24b8b70d0f89791
|
||||
.octa 0xc76c51a30654be30c76c51a30654be30,\
|
||||
0xc76c51a30654be30c76c51a30654be30
|
||||
.octa 0xd192e819d6ef5218d192e819d6ef5218,\
|
||||
0xd192e819d6ef5218d192e819d6ef5218
|
||||
.octa 0xd69906245565a910d69906245565a910,\
|
||||
0xd69906245565a910d69906245565a910
|
||||
.octa 0xf40e35855771202af40e35855771202a,\
|
||||
0xf40e35855771202af40e35855771202a
|
||||
.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
|
||||
0x106aa07032bbd1b8106aa07032bbd1b8
|
||||
.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
|
||||
0x19a4c116b8d2d0c819a4c116b8d2d0c8
|
||||
.octa 0x1e376c085141ab531e376c085141ab53,\
|
||||
0x1e376c085141ab531e376c085141ab53
|
||||
.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
|
||||
0x2748774cdf8eeb992748774cdf8eeb99
|
||||
.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
|
||||
0x34b0bcb5e19b48a834b0bcb5e19b48a8
|
||||
.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
|
||||
0x391c0cb3c5c95a63391c0cb3c5c95a63
|
||||
.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
|
||||
0x4ed8aa4ae3418acb4ed8aa4ae3418acb
|
||||
.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
|
||||
0x5b9cca4f7763e3735b9cca4f7763e373
|
||||
.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
|
||||
0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
|
||||
.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
|
||||
0x748f82ee5defb2fc748f82ee5defb2fc
|
||||
.octa 0x78a5636f43172f6078a5636f43172f60,\
|
||||
0x78a5636f43172f6078a5636f43172f60
|
||||
.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
|
||||
0x84c87814a1f0ab7284c87814a1f0ab72
|
||||
.octa 0x8cc702081a6439ec8cc702081a6439ec,\
|
||||
0x8cc702081a6439ec8cc702081a6439ec
|
||||
.octa 0x90befffa23631e2890befffa23631e28,\
|
||||
0x90befffa23631e2890befffa23631e28
|
||||
.octa 0xa4506cebde82bde9a4506cebde82bde9,\
|
||||
0xa4506cebde82bde9a4506cebde82bde9
|
||||
.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
|
||||
0xbef9a3f7b2c67915bef9a3f7b2c67915
|
||||
.octa 0xc67178f2e372532bc67178f2e372532b,\
|
||||
0xc67178f2e372532bc67178f2e372532b
|
||||
.octa 0xca273eceea26619cca273eceea26619c,\
|
||||
0xca273eceea26619cca273eceea26619c
|
||||
.octa 0xd186b8c721c0c207d186b8c721c0c207,\
|
||||
0xd186b8c721c0c207d186b8c721c0c207
|
||||
.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
|
||||
0xeada7dd6cde0eb1eeada7dd6cde0eb1e
|
||||
.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
|
||||
0xf57d4f7fee6ed178f57d4f7fee6ed178
|
||||
.octa 0x06f067aa72176fba06f067aa72176fba,\
|
||||
0x06f067aa72176fba06f067aa72176fba
|
||||
.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
|
||||
0x0a637dc5a2c898a60a637dc5a2c898a6
|
||||
.octa 0x113f9804bef90dae113f9804bef90dae,\
|
||||
0x113f9804bef90dae113f9804bef90dae
|
||||
.octa 0x1b710b35131c471b1b710b35131c471b,\
|
||||
0x1b710b35131c471b1b710b35131c471b
|
||||
.octa 0x28db77f523047d8428db77f523047d84,\
|
||||
0x28db77f523047d8428db77f523047d84
|
||||
.octa 0x32caab7b40c7249332caab7b40c72493,\
|
||||
0x32caab7b40c7249332caab7b40c72493
|
||||
.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
|
||||
0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
|
||||
.octa 0x431d67c49c100d4c431d67c49c100d4c,\
|
||||
0x431d67c49c100d4c431d67c49c100d4c
|
||||
.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
|
||||
0x4cc5d4becb3e42b64cc5d4becb3e42b6
|
||||
.octa 0x597f299cfc657e2a597f299cfc657e2a,\
|
||||
0x597f299cfc657e2a597f299cfc657e2a
|
||||
.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
|
||||
0x5fcb6fab3ad6faec5fcb6fab3ad6faec
|
||||
.octa 0x6c44198c4a4758176c44198c4a475817,\
|
||||
0x6c44198c4a4758176c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
Reference in New Issue
Block a user