diff options
Diffstat (limited to 'src/crypto/chacha/chacha_vec_arm.S')
-rw-r--r-- | src/crypto/chacha/chacha_vec_arm.S | 597 |
1 files changed, 308 insertions, 289 deletions
diff --git a/src/crypto/chacha/chacha_vec_arm.S b/src/crypto/chacha/chacha_vec_arm.S index 0f82627..f18c867 100644 --- a/src/crypto/chacha/chacha_vec_arm.S +++ b/src/crypto/chacha/chacha_vec_arm.S @@ -23,7 +23,7 @@ # /opt/gcc-linaro-4.9-2014.11-x86_64_arm-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -fpic -DASM_GEN -I ../../include -S chacha_vec.c -o - #if !defined(OPENSSL_NO_ASM) -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) .syntax unified .cpu cortex-a8 @@ -60,137 +60,138 @@ .thumb_func .type CRYPTO_chacha_20_neon, %function CRYPTO_chacha_20_neon: - @ args = 8, pretend = 0, frame = 152 + @ args = 8, pretend = 0, frame = 160 @ frame_needed = 1, uses_anonymous_args = 0 push {r4, r5, r6, r7, r8, r9, r10, fp, lr} - mov r8, r3 + mov r9, r3 vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15} - mov r9, r2 + mov r10, r2 ldr r4, .L91+16 - mov fp, r0 - mov r10, r1 - mov lr, r8 + mov fp, r1 + mov r8, r9 .LPIC16: add r4, pc - sub sp, sp, #156 + sub sp, sp, #164 add r7, sp, #0 sub sp, sp, #112 - add r6, r7, #144 - str r0, [r7, #88] + add lr, r7, #148 + str r0, [r7, #80] str r1, [r7, #12] str r2, [r7, #8] ldmia r4, {r0, r1, r2, r3} add r4, sp, #15 bic r4, r4, #15 - ldr ip, [r7, #256] - str r4, [r7, #84] + ldr r6, [r7, #264] + str r4, [r7, #88] mov r5, r4 adds r4, r4, #64 - adds r5, r5, #80 - str r8, [r7, #68] + add ip, r5, #80 + str r9, [r7, #56] stmia r4, {r0, r1, r2, r3} movw r4, #43691 - ldr r0, [ip] @ unaligned + ldr r0, [r6] @ unaligned movt r4, 43690 - ldr r1, [ip, #4] @ unaligned - ldr r3, [r7, #84] - ldr r2, [r8, #8] @ unaligned - mov r8, #0 - stmia r6!, {r0, r1} - mov r6, r5 - ldr r1, [lr, #4] @ unaligned - ldr r0, [lr] @ unaligned - vldr d24, [r3, #64] - vldr d25, [r3, #72] - ldr r3, [lr, #12] @ unaligned - str r5, [r7, #80] - stmia r5!, {r0, r1, r2, r3} - ldr r0, [lr, #16]! @ unaligned - ldr r2, [r7, #84] - umull r4, r5, r9, r4 + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r9, #12] @ unaligned + str ip, [r7, #84] + stmia lr!, {r0, r1, r2} + mov lr, ip + ldr r1, [r9, #4] @ unaligned + ldr r2, [r9, #8] @ unaligned + ldr r0, [r9] @ unaligned + vldr d24, [r5, #64] + vldr d25, [r5, #72] + umull r4, r5, r10, r4 + stmia ip!, {r0, r1, r2, r3} + ldr r0, [r8, #16]! @ unaligned + ldr r2, [r7, #88] + ldr r4, [r7, #268] + ldr r1, [r8, #4] @ unaligned vldr d26, [r2, #80] vldr d27, [r2, #88] - ldr r1, [lr, #4] @ unaligned - ldr r2, [lr, #8] @ unaligned - ldr r3, [lr, #12] @ unaligned - ldr r4, [r7, #260] - stmia r6!, {r0, r1, r2, r3} - ldr r3, [ip] - ldr r1, [r7, #84] - ldr r2, [ip, #4] - str r3, [r7, #64] - vldr d28, [r1, #80] - vldr d29, [r1, #88] - str r3, [r7, #136] + ldr r3, [r8, #12] @ unaligned + ldr r2, [r8, #8] @ unaligned + stmia lr!, {r0, r1, r2, r3} + ldr r3, [r6] + ldr r1, [r6, #4] + ldr r6, [r6, #8] + str r3, [r7, #68] + str r3, [r7, #132] lsrs r3, r5, #7 + str r6, [r7, #140] + str r6, [r7, #60] + ldr r6, [r7, #88] str r4, [r7, #128] - str r2, [r7, #140] - str r8, [r7, #132] - str r2, [r7, #60] + str r1, [r7, #136] + str r1, [r7, #64] + vldr d28, [r6, #80] + vldr d29, [r6, #88] vldr d22, [r7, #128] vldr d23, [r7, #136] beq .L26 + mov r5, r6 lsls r2, r3, #8 - ldr r5, [r1, #64] sub r3, r2, r3, lsl #6 - ldr r2, [r1, #68] + ldr r2, [r5, #68] + ldr r6, [r6, #64] vldr d0, .L91 vldr d1, .L91+8 - adds r4, r4, #2 - str r5, [r7, #56] - str r2, [r7, #52] - ldr r5, [r1, #72] - ldr r2, [r1, #76] + str r2, [r7, #48] + ldr r2, [r5, #72] str r3, [r7, #4] - str r5, [r7, #48] + str r6, [r7, #52] str r2, [r7, #44] - mov r2, fp - str r4, [r7, #72] + adds r2, r4, #2 + str r2, [r7, #72] + ldr r2, [r5, #76] + str fp, [r7, #76] + str r2, [r7, #40] + ldr r2, [r7, #80] adds r3, r2, r3 - str r10, [r7, #76] str r3, [r7, #16] .L4: - ldr r5, [r7, #68] - add r8, r7, #44 - ldr r4, [r7, #72] + ldr r5, [r7, #56] + add r8, r7, #40 + ldr r4, [r7, #68] vadd.i32 q3, q11, q0 ldmia r8, {r8, r9, r10, fp} - vmov q8, q14 @ v4si + mov r1, r5 ldr r2, [r5, #4] - vmov q1, q13 @ v4si + vmov q8, q14 @ v4si ldr r3, [r5] + vmov q1, q13 @ v4si + ldr r6, [r1, #28] vmov q9, q12 @ v4si - ldr lr, [r5, #20] - vmov q2, q11 @ v4si mov r0, r2 ldr r2, [r5, #8] - str r3, [r7, #108] - mov r3, r5 - ldr ip, [r5, #16] + str r4, [r7, #112] + movs r1, #10 + ldr r4, [r7, #72] + vmov q2, q11 @ v4si + ldr lr, [r5, #20] vmov q15, q14 @ v4si - mov r1, r2 - ldr r2, [r5, #12] - ldr r5, [r5, #24] + str r3, [r7, #108] vmov q5, q13 @ v4si - ldr r6, [r3, #28] + str r2, [r7, #116] vmov q10, q12 @ v4si + ldr r2, [r5, #12] + ldr ip, [r5, #16] ldr r3, [r7, #64] - str r5, [r7, #116] - movs r5, #10 + ldr r5, [r5, #24] str r6, [r7, #120] - str r4, [r7, #112] + str r1, [r7, #92] ldr r6, [r7, #60] + str r4, [r7, #100] + ldr r1, [r7, #116] + ldr r4, [r7, #108] str r8, [r7, #96] mov r8, r10 - ldr r4, [r7, #108] - mov r10, r9 - ldr r9, [r7, #116] str lr, [r7, #104] + mov r10, r9 mov lr, r3 - str r5, [r7, #92] - movs r5, #0 + mov r9, r5 str r6, [r7, #124] - str r5, [r7, #100] b .L92 .L93: .align 3 @@ -213,25 +214,24 @@ CRYPTO_chacha_20_neon: str r5, [r7, #116] add r10, r10, r1 vrev32.16 q3, q3 - eor lr, lr, r10 + str r6, [r7, #108] vadd.i32 q8, q8, q3 vrev32.16 q2, q2 vadd.i32 q15, q15, q2 mov fp, r3 - ldr r3, [r7, #112] + ldr r3, [r7, #100] veor q4, q8, q1 - str r6, [r7, #112] veor q6, q15, q5 + add fp, fp, r2 eors r3, r3, r5 mov r5, r6 - ldr r6, [r7, #100] + ldr r6, [r7, #112] vshl.i32 q1, q4, #12 vshl.i32 q5, q6, #12 - add fp, fp, r2 - eors r6, r6, r5 ror r3, r3, #16 + eors r6, r6, r5 + eor lr, lr, r10 vsri.32 q1, q4, #20 - ror lr, lr, #16 mov r5, r6 ldr r6, [r7, #124] vsri.32 q5, q6, #20 @@ -239,25 +239,26 @@ CRYPTO_chacha_20_neon: eor r6, r6, fp ror r5, r5, #16 vadd.i32 q9, q9, q1 - add r9, r9, lr + ror lr, lr, #16 ror r3, r6, #16 ldr r6, [r7, #124] vadd.i32 q10, q10, q5 - str r3, [r7, #108] + add r9, r9, lr veor q4, q9, q3 add ip, ip, r6 ldr r6, [r7, #104] veor q6, q10, q2 eor r4, ip, r4 - eor r1, r9, r1 + str r3, [r7, #104] vshl.i32 q3, q4, #8 + eor r1, r9, r1 mov r8, r6 ldr r6, [r7, #120] vshl.i32 q2, q6, #8 ror r4, r4, #20 add r6, r6, r3 vsri.32 q3, q4, #24 - str r6, [r7, #104] + str r6, [r7, #100] eors r2, r2, r6 ldr r6, [r7, #116] vsri.32 q2, q6, #24 @@ -268,7 +269,7 @@ CRYPTO_chacha_20_neon: eor r0, r8, r0 vadd.i32 q15, q15, q2 mov r3, r6 - ldr r6, [r7, #112] + ldr r6, [r7, #108] veor q6, q4, q1 ror r0, r0, #20 str r3, [r7, #112] @@ -285,7 +286,7 @@ CRYPTO_chacha_20_neon: ror r1, r1, #20 eors r5, r5, r6 vsri.32 q8, q6, #25 - ldr r6, [r7, #108] + ldr r6, [r7, #104] ror r3, r3, #24 ror r5, r5, #24 vsri.32 q1, q5, #25 @@ -297,7 +298,7 @@ CRYPTO_chacha_20_neon: vext.32 q8, q8, q8, #1 str ip, [r7, #124] add ip, r5, r8 - ldr r5, [r7, #104] + ldr r5, [r7, #100] eor lr, r10, lr ror r6, r6, #24 vext.32 q1, q1, q1, #1 @@ -410,7 +411,7 @@ CRYPTO_chacha_20_neon: veor q6, q15, q1 ldr r3, [r7, #116] vshl.i32 q1, q4, #7 - str r2, [r7, #112] + str r2, [r7, #100] add r3, r3, r2 str r3, [r7, #120] vshl.i32 q5, q6, #7 @@ -423,7 +424,7 @@ CRYPTO_chacha_20_neon: vsri.32 q5, q6, #25 ldr r3, [r7, #92] ror r4, r4, #25 - str r6, [r7, #100] + str r6, [r7, #112] ror r0, r0, #25 subs r3, r3, #1 str r5, [r7, #104] @@ -437,308 +438,325 @@ CRYPTO_chacha_20_neon: vext.32 q5, q5, q5, #3 vext.32 q1, q1, q1, #3 bne .L3 - ldr r3, [r7, #80] + ldr r3, [r7, #84] vadd.i32 q4, q12, q10 - str r9, [r7, #116] + str r9, [r7, #92] mov r9, r10 mov r10, r8 ldr r8, [r7, #96] str lr, [r7, #96] mov lr, r5 - ldr r5, [r7, #56] + ldr r5, [r7, #52] vadd.i32 q5, q13, q5 ldr r6, [r7, #76] vadd.i32 q15, q14, q15 add fp, fp, r5 - ldr r5, [r7, #52] - str r4, [r7, #108] + ldr r5, [r7, #48] + str r3, [r7, #104] vadd.i32 q7, q14, q8 - ldr r4, [r7, #112] - add r5, r10, r5 - str r3, [r7, #112] - vadd.i32 q2, q11, q2 ldr r3, [r6, #12] @ unaligned + add r10, r10, r5 + str r0, [r7, #36] + vadd.i32 q2, q11, q2 + ldr r0, [r6] @ unaligned vadd.i32 q6, q12, q9 - str r0, [r7, #92] + ldr r5, [r7, #104] vadd.i32 q1, q13, q1 - ldr r0, [r6] @ unaligned + str r1, [r7, #116] vadd.i32 q11, q11, q0 - str r1, [r7, #40] - str r2, [r7, #36] - vadd.i32 q3, q11, q3 ldr r1, [r6, #4] @ unaligned - vadd.i32 q11, q11, q0 + str r2, [r7, #32] + vadd.i32 q3, q11, q3 ldr r2, [r6, #8] @ unaligned - str r5, [r7, #104] vadd.i32 q11, q11, q0 - ldr r5, [r7, #112] - ldr r10, [r7, #80] + str r4, [r7, #108] + ldr r4, [r7, #100] + vadd.i32 q11, q11, q0 stmia r5!, {r0, r1, r2, r3} - mov r5, r10 - ldr r0, [r7, #84] - ldr r2, [r7, #48] - ldr r3, [r7, #72] - vldr d20, [r0, #80] - vldr d21, [r0, #88] - add r9, r9, r2 + ldr r2, [r7, #88] + ldr r3, [r7, #44] + ldr r5, [r7, #84] + vldr d20, [r2, #80] + vldr d21, [r2, #88] + add r3, r9, r3 + str r3, [r7, #104] veor q10, q10, q4 - ldr r2, [r7, #44] + ldr r3, [r7, #40] + add r3, r8, r3 + str r3, [r7, #100] + ldr r3, [r7, #72] + vstr d20, [r2, #80] + vstr d21, [r2, #88] adds r1, r4, r3 str r1, [r7, #28] - add r2, r8, r2 - str r2, [r7, #32] - vstr d20, [r0, #80] - vstr d21, [r0, #88] ldmia r5!, {r0, r1, r2, r3} + ldr r4, [r7, #68] + ldr r5, [r7, #112] + ldr r8, [r7, #84] + add r5, r5, r4 ldr r4, [r7, #96] + str r5, [r7, #24] ldr r5, [r7, #64] add r4, r4, r5 - ldr r5, [r7, #124] + ldr r5, [r7, #60] str r4, [r7, #96] - ldr r4, [r7, #60] - add r5, r5, r4 - ldr r4, [r7, #88] - str r5, [r7, #24] - mov r5, r10 + ldr r4, [r7, #124] + add r4, r4, r5 + str r4, [r7, #20] + ldr r4, [r7, #80] + mov r5, r8 str r0, [r4] @ unaligned mov r0, r4 str r1, [r4, #4] @ unaligned - mov r8, r0 + mov r4, r8 str r2, [r0, #8] @ unaligned - mov r4, r10 + mov r8, r0 str r3, [r0, #12] @ unaligned + mov r9, r4 ldr r0, [r6, #16]! @ unaligned + ldr r3, [r6, #12] @ unaligned ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned - ldr r3, [r6, #12] @ unaligned ldr r6, [r7, #76] stmia r5!, {r0, r1, r2, r3} - mov r5, r10 - ldr r3, [r7, #84] + mov r5, r8 + ldr r3, [r7, #88] vldr d20, [r3, #80] vldr d21, [r3, #88] veor q10, q10, q5 vstr d20, [r3, #80] vstr d21, [r3, #88] ldmia r4!, {r0, r1, r2, r3} - mov r4, r8 + mov r4, r9 str r0, [r8, #16] @ unaligned str r1, [r8, #20] @ unaligned str r2, [r8, #24] @ unaligned str r3, [r8, #28] @ unaligned - mov r8, r4 + mov r8, r5 ldr r0, [r6, #32]! @ unaligned - str r10, [r7, #124] + mov r5, r9 ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned ldr r3, [r6, #12] @ unaligned ldr r6, [r7, #76] stmia r5!, {r0, r1, r2, r3} - mov r5, r10 - ldr r2, [r7, #84] - vldr d16, [r2, #80] - vldr d17, [r2, #88] + mov r5, r8 + ldr r1, [r7, #88] + vldr d16, [r1, #80] + vldr d17, [r1, #88] veor q15, q8, q15 - vstr d30, [r2, #80] - vstr d31, [r2, #88] - ldmia r10!, {r0, r1, r2, r3} - str r0, [r4, #32] @ unaligned - str r1, [r4, #36] @ unaligned - str r2, [r4, #40] @ unaligned - str r3, [r4, #44] @ unaligned + vstr d30, [r1, #80] + vstr d31, [r1, #88] + ldmia r4!, {r0, r1, r2, r3} + mov r4, r9 + str r0, [r8, #32] @ unaligned + str r1, [r8, #36] @ unaligned + str r2, [r8, #40] @ unaligned + str r3, [r8, #44] @ unaligned + mov r8, r5 ldr r0, [r6, #48]! @ unaligned ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned ldr r3, [r6, #12] @ unaligned ldr r6, [r7, #76] - stmia r5!, {r0, r1, r2, r3} - ldr r1, [r7, #84] + stmia r4!, {r0, r1, r2, r3} + mov r4, r9 + ldr r1, [r7, #88] + str r9, [r7, #112] vldr d18, [r1, #80] vldr d19, [r1, #88] veor q9, q9, q2 vstr d18, [r1, #80] vstr d19, [r1, #88] - ldr r3, [r7, #112] - ldr r5, [r7, #80] - mov r10, r3 - ldmia r10!, {r0, r1, r2, r3} - str r0, [r4, #48] @ unaligned - str r1, [r4, #52] @ unaligned - str r2, [r4, #56] @ unaligned - str r3, [r4, #60] @ unaligned + ldmia r9!, {r0, r1, r2, r3} + str r0, [r5, #48] @ unaligned + str r1, [r5, #52] @ unaligned + str r2, [r5, #56] @ unaligned + str r3, [r5, #60] @ unaligned ldr r0, [r6, #64]! @ unaligned ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned ldr r3, [r6, #12] @ unaligned ldr r6, [r7, #76] - stmia r5!, {r0, r1, r2, r3} - ldr r1, [r7, #84] - ldr r3, [r7, #112] - ldr r5, [r7, #80] + mov r9, r6 + mov r6, r4 + stmia r6!, {r0, r1, r2, r3} + mov r6, r4 + ldr r1, [r7, #88] vldr d18, [r1, #80] vldr d19, [r1, #88] veor q9, q9, q6 - mov r10, r3 - str r5, [r7, #20] vstr d18, [r1, #80] vstr d19, [r1, #88] - ldmia r10!, {r0, r1, r2, r3} - str r1, [r4, #68] @ unaligned - str r2, [r4, #72] @ unaligned - str r3, [r4, #76] @ unaligned - str r0, [r4, #64] @ unaligned - ldr r0, [r6, #80]! @ unaligned - ldr r1, [r6, #4] @ unaligned - ldr r2, [r6, #8] @ unaligned - ldr r3, [r6, #12] @ unaligned + ldmia r4!, {r0, r1, r2, r3} + mov r4, r6 + str r3, [r5, #76] @ unaligned + mov r3, r9 + str r2, [r5, #72] @ unaligned + str r0, [r5, #64] @ unaligned + str r1, [r5, #68] @ unaligned + mov r5, r4 + ldr r0, [r3, #80]! @ unaligned + mov r9, r3 + ldr r1, [r9, #4] @ unaligned + ldr r2, [r9, #8] @ unaligned + ldr r3, [r9, #12] @ unaligned + mov r9, r4 ldr r6, [r7, #76] + str r9, [r7, #124] stmia r5!, {r0, r1, r2, r3} - ldr r1, [r7, #84] - ldr r3, [r7, #20] - ldr r5, [r7, #80] + mov r5, r8 + ldr r1, [r7, #88] vldr d18, [r1, #80] vldr d19, [r1, #88] veor q1, q9, q1 - mov r10, r3 vstr d2, [r1, #80] vstr d3, [r1, #88] - ldmia r10!, {r0, r1, r2, r3} - mov r10, r5 - str r0, [r4, #80] @ unaligned - str r1, [r4, #84] @ unaligned - str r2, [r4, #88] @ unaligned - str r3, [r4, #92] @ unaligned + ldmia r4!, {r0, r1, r2, r3} + mov r4, r9 + str r0, [r8, #80] @ unaligned + str r1, [r8, #84] @ unaligned + str r2, [r8, #88] @ unaligned + str r3, [r8, #92] @ unaligned ldr r0, [r6, #96]! @ unaligned + ldr r3, [r6, #12] @ unaligned ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned - ldr r3, [r6, #12] @ unaligned ldr r6, [r7, #76] - stmia r5!, {r0, r1, r2, r3} - mov r5, r10 - ldr r3, [r7, #84] + stmia r4!, {r0, r1, r2, r3} + mov r4, r9 + ldr r3, [r7, #88] vldr d16, [r3, #80] vldr d17, [r3, #88] veor q8, q8, q7 vstr d16, [r3, #80] vstr d17, [r3, #88] - ldmia r10!, {r0, r1, r2, r3} - str r0, [r4, #96] @ unaligned - str r1, [r4, #100] @ unaligned - str r2, [r4, #104] @ unaligned - str r3, [r4, #108] @ unaligned + ldmia r9!, {r0, r1, r2, r3} + str r0, [r5, #96] @ unaligned + str r1, [r5, #100] @ unaligned + str r2, [r5, #104] @ unaligned + str r3, [r5, #108] @ unaligned ldr r0, [r6, #112]! @ unaligned ldr r1, [r6, #4] @ unaligned ldr r2, [r6, #8] @ unaligned ldr r3, [r6, #12] @ unaligned - mov r6, r5 + mov r6, r4 stmia r6!, {r0, r1, r2, r3} - ldr r3, [r7, #84] + mov r6, r5 + ldr r3, [r7, #88] vldr d16, [r3, #80] vldr d17, [r3, #88] veor q8, q8, q3 vstr d16, [r3, #80] vstr d17, [r3, #88] - ldmia r5!, {r0, r1, r2, r3} - str r1, [r4, #116] @ unaligned - ldr r1, [r7, #76] - str r0, [r4, #112] @ unaligned - str r2, [r4, #120] @ unaligned - str r3, [r4, #124] @ unaligned - ldr r3, [r1, #128] - ldr r2, [r7, #104] + ldmia r4!, {r0, r1, r2, r3} + mov r4, r5 + mov r8, r4 + str r2, [r5, #120] @ unaligned + ldr r2, [r7, #76] + str r0, [r5, #112] @ unaligned + str r1, [r5, #116] @ unaligned + str r3, [r5, #124] @ unaligned + ldr r3, [r2, #128] + ldr r1, [r7, #104] eor r3, fp, r3 - str r3, [r4, #128] - ldr r3, [r1, #132] - eors r2, r2, r3 - str r2, [r8, #132] - ldr r3, [r1, #136] - ldr r5, [r7, #68] - ldr r6, [r7, #32] - eor r3, r9, r3 - str r3, [r4, #136] - ldr r3, [r1, #140] - ldr r0, [r7, #92] - eors r3, r3, r6 - ldr r6, [r7, #108] + str r3, [r5, #128] + ldr r3, [r2, #132] + mov r5, r2 + eor r3, r10, r3 + str r3, [r6, #132] + ldr r3, [r2, #136] + mov r6, r5 + eors r1, r1, r3 + str r1, [r8, #136] + ldr r1, [r7, #56] + ldr r3, [r2, #140] + ldr r2, [r7, #100] + ldr r0, [r7, #108] + eors r3, r3, r2 str r3, [r4, #140] - ldr r3, [r5] - ldr r2, [r1, #144] - add r6, r6, r3 - eors r2, r2, r6 + ldr r3, [r1] + ldr r2, [r5, #144] + mov r8, r0 + add r8, r8, r3 + mov r5, r6 + mov r3, r8 + eors r2, r2, r3 str r2, [r4, #144] - ldr r2, [r5, #4] - ldr r3, [r1, #148] - add r0, r0, r2 + ldr r3, [r6, #148] + ldr r2, [r1, #4] ldr r6, [r7, #36] - eors r3, r3, r0 - ldr r0, [r7, #40] - str r3, [r4, #148] - ldr r2, [r5, #8] - ldr r3, [r1, #152] - add r0, r0, r2 - eors r3, r3, r0 - str r3, [r4, #152] - ldr r2, [r5, #12] - mov r0, r4 - ldr r3, [r1, #156] - mov r4, r1 add r6, r6, r2 - mov r1, r0 eors r3, r3, r6 - str r3, [r0, #156] - ldr r2, [r5, #16] - ldr r3, [r4, #160] + mov r6, r1 + str r3, [r4, #148] + ldr r2, [r1, #8] + ldr r1, [r7, #116] + ldr r3, [r5, #152] + mov r8, r1 + add r8, r8, r2 + ldr r1, [r7, #32] + mov r2, r8 + eors r3, r3, r2 + str r3, [r4, #152] + mov r8, r4 + ldr r2, [r6, #12] + ldr r3, [r5, #156] + add r1, r1, r2 + eors r3, r3, r1 + str r3, [r4, #156] + ldr r2, [r6, #16] + mov r1, r4 + ldr r3, [r5, #160] + mov r4, r5 add ip, ip, r2 + mov r5, r6 eor r3, ip, r3 str r3, [r1, #160] - ldr r2, [r5, #20] + ldr r2, [r6, #20] ldr r3, [r4, #164] add lr, lr, r2 - ldr r2, [r7, #116] + ldr r2, [r7, #92] eor r3, lr, r3 str r3, [r1, #164] ldr r6, [r5, #24] mov lr, r4 ldr r3, [r4, #168] add r2, r2, r6 - mov r6, r4 + ldr r6, [r7, #120] eors r3, r3, r2 str r3, [r1, #168] ldr r5, [r5, #28] - mov r2, r1 ldr r3, [r4, #172] - ldr r0, [r7, #120] - add r0, r0, r5 - ldr r5, [r7, #24] - eors r3, r3, r0 + add r6, r6, r5 + eors r3, r3, r6 str r3, [r1, #172] - ldr r3, [r7, #72] ldr r4, [r4, #176] - ldr r1, [r7, #28] - eors r4, r4, r1 - adds r1, r3, #3 - str r4, [r2, #176] - ldr r3, [r7, #100] + ldr r0, [r7, #28] + ldr r5, [r7, #24] + eors r4, r4, r0 + str r4, [r8, #176] ldr r0, [lr, #180] - str r1, [r7, #72] - eors r3, r3, r0 - mov r0, r3 - mov r3, r2 - str r0, [r2, #180] - adds r3, r3, #192 - ldr r1, [lr, #184] ldr r2, [r7, #96] + eors r0, r0, r5 + str r0, [r8, #180] + ldr r1, [lr, #184] + ldr r4, [r7, #20] eors r1, r1, r2 - str r1, [r3, #-8] + str r1, [r8, #184] ldr r2, [lr, #188] - mov r1, r6 - adds r1, r1, #192 - str r1, [r7, #76] - eors r2, r2, r5 - str r2, [r3, #-4] + add r1, lr, #192 + ldr r3, [r7, #72] + eors r2, r2, r4 + str r2, [r8, #188] ldr r2, [r7, #16] - str r3, [r7, #88] + adds r3, r3, #3 + str r3, [r7, #72] + mov r3, r8 + adds r3, r3, #192 + str r1, [r7, #76] cmp r2, r3 + str r3, [r7, #80] bne .L4 ldr r3, [r7, #12] ldr r2, [r7, #4] @@ -757,8 +775,8 @@ CRYPTO_chacha_20_neon: beq .L6 ldr r5, [r7, #12] ldr r4, [r7, #16] - ldr r6, [r7, #84] - ldr lr, [r7, #80] + ldr r6, [r7, #88] + ldr lr, [r7, #84] vldr d30, .L94 vldr d31, .L94+8 str fp, [r7, #120] @@ -964,7 +982,7 @@ CRYPTO_chacha_20_neon: mov r9, r5 bhi .L88 vadd.i32 q12, q12, q10 - ldr r3, [r7, #84] + ldr r3, [r7, #88] vst1.64 {d24-d25}, [r3:128] .L14: ldr r3, [r7, #8] @@ -1001,7 +1019,7 @@ CRYPTO_chacha_20_neon: movcs r1, ip cmp r1, #0 beq .L17 - ldr r5, [r7, #84] + ldr r5, [r7, #88] cmp r1, #1 ldrb r0, [r0] @ zero_extendqisi2 add r3, r2, #1 @@ -1136,7 +1154,7 @@ CRYPTO_chacha_20_neon: ldr r5, [r7, #16] cmp r6, #1 add r0, r1, r2 - ldr r1, [r7, #84] + ldr r1, [r7, #88] add r1, r1, r2 vld1.64 {d18-d19}, [r0:64] add r2, r2, r5 @@ -1174,7 +1192,7 @@ CRYPTO_chacha_20_neon: add r3, r3, lr beq .L1 .L19: - ldr r4, [r7, #84] + ldr r4, [r7, #88] adds r2, r3, #1 ldr r1, [r7, #12] cmp r2, r9 @@ -1289,7 +1307,7 @@ CRYPTO_chacha_20_neon: eor r1, r1, r0 strb r1, [r5, r2] bls .L1 - ldr r2, [r7, #84] + ldr r2, [r7, #88] ldrb r1, [r2, r3] @ zero_extendqisi2 ldr r2, [r7, #12] ldrb r2, [r2, r3] @ zero_extendqisi2 @@ -1297,7 +1315,7 @@ CRYPTO_chacha_20_neon: ldr r1, [r7, #16] strb r2, [r1, r3] .L1: - adds r7, r7, #156 + adds r7, r7, #164 mov sp, r7 @ sp needed vldm sp!, {d8-d15} @@ -1305,7 +1323,7 @@ CRYPTO_chacha_20_neon: .L88: ldr r5, [r7, #12] vadd.i32 q12, q12, q10 - ldr r4, [r7, #80] + ldr r4, [r7, #84] cmp r9, #31 ldr r0, [r5] @ unaligned ldr r1, [r5, #4] @ unaligned @@ -1313,7 +1331,7 @@ CRYPTO_chacha_20_neon: ldr r2, [r5, #8] @ unaligned ldr r3, [r5, #12] @ unaligned stmia r6!, {r0, r1, r2, r3} - ldr r2, [r7, #84] + ldr r2, [r7, #88] ldr r6, [r7, #16] vldr d18, [r2, #80] vldr d19, [r2, #88] @@ -1328,7 +1346,7 @@ CRYPTO_chacha_20_neon: str r3, [r6, #12] @ unaligned bhi .L89 vadd.i32 q13, q13, q15 - ldr r3, [r7, #84] + ldr r3, [r7, #88] vstr d26, [r3, #16] vstr d27, [r3, #24] b .L14 @@ -1337,7 +1355,7 @@ CRYPTO_chacha_20_neon: ldr r2, [r7, #12] add r2, r2, r9 mov r5, r2 - ldr r2, [r7, #84] + ldr r2, [r7, #88] add r2, r2, r3 mov r3, r2 .L24: @@ -1347,17 +1365,18 @@ CRYPTO_chacha_20_neon: eor r2, r2, r1 strb r2, [r4], #1 bne .L24 - adds r7, r7, #156 + adds r7, r7, #164 mov sp, r7 @ sp needed vldm sp!, {d8-d15} pop {r4, r5, r6, r7, r8, r9, r10, fp, pc} .L26: - str fp, [r7, #16] + ldr r3, [r7, #80] + str r3, [r7, #16] b .L2 .L89: mov r3, r5 - ldr r4, [r7, #80] + ldr r4, [r7, #84] ldr r0, [r3, #16]! @ unaligned add lr, r1, #16 mov r5, r1 @@ -1368,7 +1387,7 @@ CRYPTO_chacha_20_neon: ldr r2, [r3, #8] @ unaligned ldr r3, [r3, #12] @ unaligned stmia r6!, {r0, r1, r2, r3} - ldr r2, [r7, #84] + ldr r2, [r7, #88] vldr d18, [r2, #80] vldr d19, [r2, #88] veor q13, q9, q13 @@ -1381,16 +1400,16 @@ CRYPTO_chacha_20_neon: str r3, [lr, #12] @ unaligned bhi .L90 vadd.i32 q8, q14, q8 - ldr r3, [r7, #84] + ldr r3, [r7, #88] vstr d16, [r3, #32] vstr d17, [r3, #40] b .L14 .L90: ldr r3, [r7, #12] add lr, r5, #32 - ldr r4, [r7, #80] + ldr r4, [r7, #84] vadd.i32 q8, q14, q8 - ldr r5, [r7, #84] + ldr r5, [r7, #88] vadd.i32 q11, q11, q3 ldr r0, [r3, #32]! @ unaligned mov r6, r4 @@ -1424,5 +1443,5 @@ CRYPTO_chacha_20_neon: .ident "GCC: (Linaro GCC 2014.11) 4.9.3 20141031 (prerelease)" .section .note.GNU-stack,"",%progbits -#endif /* __arm__ || __aarch64__ */ +#endif /* __arm__ */ #endif /* !OPENSSL_NO_ASM */ |