summaryrefslogtreecommitdiffstats
path: root/src/crypto/sha/sha512.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/sha/sha512.c')
-rw-r--r--src/crypto/sha/sha512.c114
1 files changed, 52 insertions, 62 deletions
diff --git a/src/crypto/sha/sha512.c b/src/crypto/sha/sha512.c
index 57c96ab..beb0f8c 100644
--- a/src/crypto/sha/sha512.c
+++ b/src/crypto/sha/sha512.c
@@ -65,35 +65,27 @@
/* IMPLEMENTATION NOTES.
*
- * As you might have noticed 32-bit hash algorithms:
- *
- * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
- * - optimized versions implement two transform functions: one operating
- * on [aligned] data in host byte order and one - on data in input
- * stream byte order;
- * - share common byte-order neutral collector and padding function
- * implementations, ../md32_common.h;
- *
- * Neither of the above applies to this SHA-512 implementations. Reasons
+ * The 32-bit hash algorithms share a common byte-order neutral collector and
+ * padding function implementations that operate on unaligned data,
+ * ../md32_common.h. This SHA-512 implementation does not. Reasons
* [in reverse order] are:
*
- * - it's the only 64-bit hash algorithm for the moment of this writing,
+ * - It's the only 64-bit hash algorithm for the moment of this writing,
* there is no need for common collector/padding implementation [yet];
- * - by supporting only one transform function [which operates on
- * *aligned* data in input stream byte order, big-endian in this case]
- * we minimize burden of maintenance in two ways: a) collector/padding
- * function is simpler; b) only one transform function to stare at;
- * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
- * apply a number of optimizations to mitigate potential performance
- * penalties caused by previous design decision; */
+ * - By supporting only a transform function that operates on *aligned* data
+ * the collector/padding function is simpler and easier to optimize. */
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
-#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
#define SHA512_ASM
#endif
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+ defined(__ARM_FEATURE_UNALIGNED)
+#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+#endif
+
int SHA384_Init(SHA512_CTX *sha) {
sha->h[0] = OPENSSL_U64(0xcbbb9d5dc1059ed8);
sha->h[1] = OPENSSL_U64(0x629a292a367cd507);
@@ -163,7 +155,7 @@ uint8_t *SHA512(const uint8_t *data, size_t len, uint8_t *out) {
#if !defined(SHA512_ASM)
static
#endif
-void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
+void sha512_block_data_order(uint64_t *state, const uint64_t *W, size_t num);
int SHA384_Final(uint8_t *md, SHA512_CTX *sha) {
@@ -181,7 +173,7 @@ void SHA512_Transform(SHA512_CTX *c, const uint8_t *data) {
data = c->u.p;
}
#endif
- sha512_block_data_order(c, data, 1);
+ sha512_block_data_order(c->h, (uint64_t *)data, 1);
}
int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) {
@@ -213,7 +205,7 @@ int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) {
memcpy(p + c->num, data, n), c->num = 0;
len -= n;
data += n;
- sha512_block_data_order(c, p, 1);
+ sha512_block_data_order(c->h, (uint64_t *)p, 1);
}
}
@@ -222,14 +214,14 @@ int SHA512_Update(SHA512_CTX *c, const void *in_data, size_t len) {
if ((size_t)data % sizeof(c->u.d[0]) != 0) {
while (len >= sizeof(c->u)) {
memcpy(p, data, sizeof(c->u));
- sha512_block_data_order(c, p, 1);
+ sha512_block_data_order(c->h, (uint64_t *)p, 1);
len -= sizeof(c->u);
data += sizeof(c->u);
}
} else
#endif
{
- sha512_block_data_order(c, data, len / sizeof(c->u));
+ sha512_block_data_order(c->h, (uint64_t *)data, len / sizeof(c->u));
data += len;
len %= sizeof(c->u);
data -= len;
@@ -253,7 +245,7 @@ int SHA512_Final(uint8_t *md, SHA512_CTX *sha) {
if (n > (sizeof(sha->u) - 16)) {
memset(p + n, 0, sizeof(sha->u) - n);
n = 0;
- sha512_block_data_order(sha, p, 1);
+ sha512_block_data_order(sha->h, (uint64_t *)p, 1);
}
memset(p + n, 0, sizeof(sha->u) - 16 - n);
@@ -274,7 +266,7 @@ int SHA512_Final(uint8_t *md, SHA512_CTX *sha) {
p[sizeof(sha->u) - 15] = (uint8_t)(sha->Nh >> 48);
p[sizeof(sha->u) - 16] = (uint8_t)(sha->Nh >> 56);
- sha512_block_data_order(sha, p, 1);
+ sha512_block_data_order(sha->h, (uint64_t *)p, 1);
if (md == NULL) {
/* TODO(davidben): This NULL check is absent in other low-level hash 'final'
@@ -443,23 +435,22 @@ static uint64_t __fastcall __pull64be(const void *x) {
* This code should give better results on 32-bit CPU with less than
* ~24 registers, both size and performance wise...
*/
-static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
+static void sha512_block_data_order(uint64_t *state, const uint64_t *W,
size_t num) {
- const uint64_t *W = in;
uint64_t A, E, T;
uint64_t X[9 + 80], *F;
int i;
while (num--) {
F = X + 80;
- A = ctx->h[0];
- F[1] = ctx->h[1];
- F[2] = ctx->h[2];
- F[3] = ctx->h[3];
- E = ctx->h[4];
- F[5] = ctx->h[5];
- F[6] = ctx->h[6];
- F[7] = ctx->h[7];
+ A = state[0];
+ F[1] = state[1];
+ F[2] = state[2];
+ F[3] = state[3];
+ E = state[4];
+ F[5] = state[5];
+ F[6] = state[6];
+ F[7] = state[7];
for (i = 0; i < 16; i++, F--) {
T = PULL64(W[i]);
@@ -484,14 +475,14 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
A = T + Sigma0(A) + Maj(A, F[1], F[2]);
}
- ctx->h[0] += A;
- ctx->h[1] += F[1];
- ctx->h[2] += F[2];
- ctx->h[3] += F[3];
- ctx->h[4] += E;
- ctx->h[5] += F[5];
- ctx->h[6] += F[6];
- ctx->h[7] += F[7];
+ state[0] += A;
+ state[1] += F[1];
+ state[2] += F[2];
+ state[3] += F[3];
+ state[4] += E;
+ state[5] += F[5];
+ state[6] += F[6];
+ state[7] += F[7];
W += 16;
}
@@ -517,23 +508,22 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
ROUND_00_15(i + j, a, b, c, d, e, f, g, h); \
} while (0)
-static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
+static void sha512_block_data_order(uint64_t *state, const uint64_t *W,
size_t num) {
- const uint64_t *W = in;
uint64_t a, b, c, d, e, f, g, h, s0, s1, T1;
uint64_t X[16];
int i;
while (num--) {
- a = ctx->h[0];
- b = ctx->h[1];
- c = ctx->h[2];
- d = ctx->h[3];
- e = ctx->h[4];
- f = ctx->h[5];
- g = ctx->h[6];
- h = ctx->h[7];
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ f = state[5];
+ g = state[6];
+ h = state[7];
T1 = X[0] = PULL64(W[0]);
ROUND_00_15(0, a, b, c, d, e, f, g, h);
@@ -587,14 +577,14 @@ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
}
- ctx->h[0] += a;
- ctx->h[1] += b;
- ctx->h[2] += c;
- ctx->h[3] += d;
- ctx->h[4] += e;
- ctx->h[5] += f;
- ctx->h[6] += g;
- ctx->h[7] += h;
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
W += 16;
}