diff options
Diffstat (limited to 'src/crypto/sha/sha512.c')
-rw-r--r-- | src/crypto/sha/sha512.c | 114 |
1 files changed, 52 insertions, 62 deletions
diff --git a/src/crypto/sha/sha512.c b/src/crypto/sha/sha512.c index 57c96ab..beb0f8c 100644 --- a/src/crypto/sha/sha512.c +++ b/src/crypto/sha/sha512.c @@ -65,35 +65,27 @@ /* IMPLEMENTATION NOTES. * - * As you might have noticed 32-bit hash algorithms: - * - * - permit SHA_LONG to be wider than 32-bit (case on CRAY); - * - optimized versions implement two transform functions: one operating - * on [aligned] data in host byte order and one - on data in input - * stream byte order; - * - share common byte-order neutral collector and padding function - * implementations, ../md32_common.h; - * - * Neither of the above applies to this SHA-512 implementations. Reasons + * The 32-bit hash algorithms share a common byte-order neutral collector and + * padding function implementations that operate on unaligned data, + * ../md32_common.h. This SHA-512 implementation does not. Reasons * [in reverse order] are: * - * - it's the only 64-bit hash algorithm for the moment of this writing, + * - It's the only 64-bit hash algorithm for the moment of this writing, * there is no need for common collector/padding implementation [yet]; - * - by supporting only one transform function [which operates on - * *aligned* data in input stream byte order, big-endian in this case] - * we minimize burden of maintenance in two ways: a) collector/padding - * function is simpler; b) only one transform function to stare at; - * - SHA_LONG64 is required to be exactly 64-bit in order to be able to - * apply a number of optimizations to mitigate potential performance - * penalties caused by previous design decision; */ + * - By supporting only a transform function that operates on *aligned* data + * the collector/padding function is simpler and easier to optimize. */ #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) -#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA #define SHA512_ASM #endif +#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ + defined(__ARM_FEATURE_UNALIGNED) +#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA +#endif + int SHA384_Init(SHA512_CTX *sha) { sha->h[0] = OPENSSL_U64(0xcbbb9d5dc1059ed8); sha->h[1] = OPENSSL_U64(0x629a292a367cd507); @@ -163,7 +155,7 @@ uint8_t *SHA512(const uint8_t *data, size_t len, uint8_t *out) { #if !defined(SHA512_ASM) static #endif -void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); +void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num); int SHA384_Final(uint8_t *md, SHA512_CTX *sha) { @@ -181,7 +173,7 @@ void SHA512_Transform(SHA512_CTX *c, const uint8_t *data) { data = c->u.p; } #endif - sha512_block_data_order(c, data, 1); + sha512_block_data_order(c->h, (uint64_t *)data, 1); } int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) { @@ -213,7 +205,7 @@ int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) { memcpy(p + c->num, data, n), c->num = 0; len -= n; data += n; - sha512_block_data_order(c, p, 1); + sha512_block_data_order(c->h, (uint64_t *)p, 1); } } @@ -222,14 +214,14 @@ int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) { if ((size_t)data % sizeof(c->u.d[0]) != 0) { while (len >= sizeof(c->u)) { memcpy(p, data, sizeof(c->u)); - sha512_block_data_order(c, p, 1); + sha512_block_data_order(c->h, (uint64_t *)p, 1); len -= sizeof(c->u); data += sizeof(c->u); } } else #endif { - sha512_block_data_order(c, data, len / sizeof(c->u)); + sha512_block_data_order(c->h, (uint64_t *)data, len / sizeof(c->u)); data += len; len %= sizeof(c->u); data -= len; @@ -253,7 +245,7 @@ int SHA512_Final(uint8_t *md, SHA512_CTX *sha) { if (n > (sizeof(sha->u) - 16)) { memset(p + n, 0, sizeof(sha->u) - n); n = 0; - sha512_block_data_order(sha, p, 1); + sha512_block_data_order(sha->h, (uint64_t *)p, 1); } memset(p + n, 0, sizeof(sha->u) - 16 - n); @@ -274,7 +266,7 @@ int SHA512_Final(uint8_t *md, SHA512_CTX *sha) { p[sizeof(sha->u) - 15] = (uint8_t)(sha->Nh >> 48); p[sizeof(sha->u) - 16] = (uint8_t)(sha->Nh >> 56); - sha512_block_data_order(sha, p, 1); + sha512_block_data_order(sha->h, (uint64_t *)p, 1); if (md == NULL) { /* TODO(davidben): This NULL check is absent in other low-level hash 'final' @@ -443,23 +435,22 @@ static uint64_t __fastcall __pull64be(const void *x) { * This code should give better results on 32-bit CPU with less than * ~24 registers, both size and performance wise... */ -static void sha512_block_data_order(SHA512_CTX *ctx, const void *in, +static void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { - const uint64_t *W = in; uint64_t A, E, T; uint64_t X[9 + 80], *F; int i; while (num--) { F = X + 80; - A = ctx->h[0]; - F[1] = ctx->h[1]; - F[2] = ctx->h[2]; - F[3] = ctx->h[3]; - E = ctx->h[4]; - F[5] = ctx->h[5]; - F[6] = ctx->h[6]; - F[7] = ctx->h[7]; + A = state[0]; + F[1] = state[1]; + F[2] = state[2]; + F[3] = state[3]; + E = state[4]; + F[5] = state[5]; + F[6] = state[6]; + F[7] = state[7]; for (i = 0; i < 16; i++, F--) { T = PULL64(W[i]); @@ -484,14 +475,14 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in, A = T + Sigma0(A) + Maj(A, F[1], F[2]); } - ctx->h[0] += A; - ctx->h[1] += F[1]; - ctx->h[2] += F[2]; - ctx->h[3] += F[3]; - ctx->h[4] += E; - ctx->h[5] += F[5]; - ctx->h[6] += F[6]; - ctx->h[7] += F[7]; + state[0] += A; + state[1] += F[1]; + state[2] += F[2]; + state[3] += F[3]; + state[4] += E; + state[5] += F[5]; + state[6] += F[6]; + state[7] += F[7]; W += 16; } @@ -517,23 +508,22 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in, ROUND_00_15(i + j, a, b, c, d, e, f, g, h); \ } while (0) -static void sha512_block_data_order(SHA512_CTX *ctx, const void *in, +static void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num) { - const uint64_t *W = in; uint64_t a, b, c, d, e, f, g, h, s0, s1, T1; uint64_t X[16]; int i; while (num--) { - a = ctx->h[0]; - b = ctx->h[1]; - c = ctx->h[2]; - d = ctx->h[3]; - e = ctx->h[4]; - f = ctx->h[5]; - g = ctx->h[6]; - h = ctx->h[7]; + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; T1 = X[0] = PULL64(W[0]); ROUND_00_15(0, a, b, c, d, e, f, g, h); @@ -587,14 +577,14 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in, ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X); } - ctx->h[0] += a; - ctx->h[1] += b; - ctx->h[2] += c; - ctx->h[3] += d; - ctx->h[4] += e; - ctx->h[5] += f; - ctx->h[6] += g; - ctx->h[7] += h; + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; W += 16; } |