diff options
Diffstat (limited to 'src/crypto/chacha/chacha_vec_arm.S')
-rw-r--r-- | src/crypto/chacha/chacha_vec_arm.S | 1426 |
1 files changed, 1426 insertions, 0 deletions
diff --git a/src/crypto/chacha/chacha_vec_arm.S b/src/crypto/chacha/chacha_vec_arm.S new file mode 100644 index 0000000..15d4556 --- /dev/null +++ b/src/crypto/chacha/chacha_vec_arm.S @@ -0,0 +1,1426 @@ +# Copyright (c) 2014, Google Inc. +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# This file contains a pre-compiled version of chacha_vec.c for ARM. This is +# needed to support switching on NEON code at runtime. If the whole of OpenSSL +# were to be compiled with the needed flags to build chacha_vec.c, then it +# wouldn't be possible to run on non-NEON systems. +# +# This file was generated by chacha_vec_arm_generate.go using the following +# compiler command: +# +# /opt/gcc-linaro-4.9-2014.11-x86_64_arm-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -fpic -DASM_GEN -I ../../include -S chacha_vec.c -o - + +#if !defined(OPENSSL_NO_ASM) + + .syntax unified + .cpu cortex-a8 + .eabi_attribute 27, 3 + +# EABI attribute 28 sets whether VFP register arguments were used to build this +# file. If object files are inconsistent on this point, the linker will refuse +# to link them. Thus we report whatever the compiler expects since we don't use +# VFP arguments. + +#if defined(__ARM_PCS_VFP) + .eabi_attribute 28, 1 +#else + .eabi_attribute 28, 0 +#endif + + .fpu neon + .eabi_attribute 20, 1 + .eabi_attribute 21, 1 + .eabi_attribute 23, 3 + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + .eabi_attribute 26, 2 + .eabi_attribute 30, 2 + .eabi_attribute 34, 1 + .eabi_attribute 18, 4 + .thumb + .file "chacha_vec.c" + .text + .align 2 + .global CRYPTO_chacha_20_neon + .hidden CRYPTO_chacha_20_neon + .thumb + .thumb_func + .type CRYPTO_chacha_20_neon, %function +CRYPTO_chacha_20_neon: + @ args = 8, pretend = 0, frame = 128 + @ frame_needed = 1, uses_anonymous_args = 0 + push {r4, r5, r6, r7, r8, r9, r10, fp, lr} + mov r4, r2 + vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15} + movw r8, #43691 + movt r8, 43690 + mov ip, r3 + umull r8, r9, r4, r8 + sub sp, sp, #132 + add r7, sp, #0 + sub sp, sp, #112 + mov fp, r0 + mov r10, r1 + str r2, [r7, #8] + add r4, sp, #15 + ldr r2, .L92+16 + bic r4, r4, #15 + ldr r5, [r7, #232] + add lr, r4, #64 +.LPIC16: + add r2, pc + str r0, [r7, #60] + str r1, [r7, #12] + str r3, [r7, #44] + ldmia r2, {r0, r1, r2, r3} + ldr r6, [r5] + str r4, [r7, #72] + ldr r5, [r5, #4] + ldr r4, [r7, #236] + str r6, [r7, #120] + str r5, [r7, #124] + str r4, [r7, #112] + stmia lr, {r0, r1, r2, r3} + movs r3, #0 + ldr r0, [r7, #72] + str r3, [r7, #116] + lsrs r3, r9, #7 + vldr d22, [r7, #112] + vldr d23, [r7, #120] + vldr d24, [r0, #64] + vldr d25, [r0, #72] + vld1.64 {d26-d27}, [ip:64] + vldr d28, [ip, #16] + vldr d29, [ip, #24] + beq .L26 + ldr r1, [r0, #64] + lsls r2, r3, #8 + sub r3, r2, r3, lsl #6 + str r3, [r7, #4] + ldr r2, [r0, #72] + str r1, [r7, #40] + mov r1, r3 + ldr r3, [r0, #68] + vldr d0, .L92 + vldr d1, .L92+8 + str r2, [r7, #32] + adds r2, r4, #2 + str r3, [r7, #36] + ldr r3, [r0, #76] + str r2, [r7, #48] + mov r2, r0 + mov r0, fp + str r10, [r7, #64] + str r3, [r7, #28] + adds r3, r0, r1 + mov r1, r6 + str r3, [r7, #16] + add r3, r2, #80 + mov r2, r5 + str r3, [r7, #68] +.L4: + ldr r0, [r7, #44] + add r8, r7, #28 + str r2, [r7, #108] + vadd.i32 q3, q11, q0 + ldmia r8, {r8, r9, r10, fp} + vmov q8, q14 @ v4si + ldr r3, [r0] + vmov q1, q13 @ v4si + vmov q9, q12 @ v4si + vmov q2, q11 @ v4si + str r3, [r7, #52] + mov r3, r0 + ldr r5, [r3, #8] + vmov q15, q14 @ v4si + ldr lr, [r3, #20] + vmov q5, q13 @ v4si + ldr r6, [r3, #12] + vmov q10, q12 @ v4si + str r5, [r7, #92] + mov r5, r3 + ldr r4, [r5, #28] + movs r5, #10 + ldr ip, [r3, #16] + ldr r3, [r3, #24] + str r4, [r7, #104] + ldr r4, [r7, #48] + str r3, [r7, #100] + mov r3, r1 + str r6, [r7, #56] + str r4, [r7, #96] + str r8, [r7, #80] + mov r8, r10 + ldr r0, [r0, #4] + mov r10, r9 + ldr r1, [r7, #92] + ldr r2, [r7, #56] + ldr r9, [r7, #100] + ldr r4, [r7, #52] + str lr, [r7, #88] + mov lr, r3 + str r5, [r7, #76] + movs r5, #0 + str r5, [r7, #84] + b .L93 +.L94: + .align 3 +.L92: + .word 1 + .word 0 + .word 0 + .word 0 + .word .LANCHOR0-(.LPIC16+4) +.L93: +.L3: + vadd.i32 q9, q9, q1 + add r3, r8, r0 + vadd.i32 q10, q10, q5 + add r5, fp, r4 + veor q3, q3, q9 + mov r6, r3 + veor q2, q2, q10 + ldr r3, [r7, #80] + str r5, [r7, #100] + add r10, r10, r1 + vrev32.16 q3, q3 + eor lr, lr, r10 + vadd.i32 q8, q8, q3 + vrev32.16 q2, q2 + vadd.i32 q15, q15, q2 + mov fp, r3 + ldr r3, [r7, #96] + veor q4, q8, q1 + str r6, [r7, #96] + veor q6, q15, q5 + eors r3, r3, r5 + mov r5, r6 + ldr r6, [r7, #84] + vshl.i32 q1, q4, #12 + vshl.i32 q5, q6, #12 + add fp, fp, r2 + eors r6, r6, r5 + ror r3, r3, #16 + vsri.32 q1, q4, #20 + ror lr, lr, #16 + mov r5, r6 + ldr r6, [r7, #108] + vsri.32 q5, q6, #20 + str r3, [r7, #108] + eor r6, r6, fp + ror r5, r5, #16 + vadd.i32 q9, q9, q1 + add r9, r9, lr + ror r3, r6, #16 + ldr r6, [r7, #108] + vadd.i32 q10, q10, q5 + str r3, [r7, #92] + veor q4, q9, q3 + add ip, ip, r6 + ldr r6, [r7, #88] + veor q6, q10, q2 + eor r4, ip, r4 + eor r1, r9, r1 + vshl.i32 q3, q4, #8 + mov r8, r6 + ldr r6, [r7, #104] + vshl.i32 q2, q6, #8 + ror r4, r4, #20 + add r6, r6, r3 + vsri.32 q3, q4, #24 + str r6, [r7, #88] + eors r2, r2, r6 + ldr r6, [r7, #100] + vsri.32 q2, q6, #24 + add r8, r8, r5 + ror r2, r2, #20 + adds r6, r4, r6 + vadd.i32 q4, q8, q3 + eor r0, r8, r0 + vadd.i32 q15, q15, q2 + mov r3, r6 + ldr r6, [r7, #96] + veor q6, q4, q1 + ror r0, r0, #20 + str r3, [r7, #96] + veor q5, q15, q5 + adds r6, r0, r6 + str r6, [r7, #104] + mov r6, r3 + ldr r3, [r7, #108] + vshl.i32 q8, q6, #7 + add fp, fp, r2 + eors r3, r3, r6 + ldr r6, [r7, #104] + vshl.i32 q1, q5, #7 + ror r1, r1, #20 + eors r5, r5, r6 + vsri.32 q8, q6, #25 + ldr r6, [r7, #92] + ror r3, r3, #24 + ror r5, r5, #24 + vsri.32 q1, q5, #25 + str r5, [r7, #100] + eor r6, fp, r6 + ldr r5, [r7, #100] + add r10, r10, r1 + add ip, r3, ip + vext.32 q8, q8, q8, #1 + str ip, [r7, #108] + add ip, r5, r8 + ldr r5, [r7, #88] + eor lr, r10, lr + ror r6, r6, #24 + vext.32 q1, q1, q1, #1 + add r8, r6, r5 + vadd.i32 q9, q9, q8 + ldr r5, [r7, #108] + vext.32 q3, q3, q3, #3 + vadd.i32 q10, q10, q1 + ror lr, lr, #24 + eor r0, ip, r0 + vext.32 q2, q2, q2, #3 + add r9, r9, lr + eors r4, r4, r5 + veor q3, q9, q3 + ldr r5, [r7, #96] + eor r1, r9, r1 + ror r0, r0, #25 + veor q2, q10, q2 + adds r5, r0, r5 + vext.32 q4, q4, q4, #2 + str r5, [r7, #96] + ldr r5, [r7, #104] + ror r1, r1, #25 + vrev32.16 q3, q3 + eor r2, r8, r2 + vext.32 q15, q15, q15, #2 + adds r5, r1, r5 + vadd.i32 q4, q4, q3 + ror r4, r4, #25 + vrev32.16 q2, q2 + str r5, [r7, #84] + vadd.i32 q15, q15, q2 + eors r3, r3, r5 + ldr r5, [r7, #96] + add fp, fp, r4 + veor q8, q4, q8 + ror r2, r2, #25 + veor q1, q15, q1 + eor lr, fp, lr + eors r6, r6, r5 + ror r3, r3, #16 + ldr r5, [r7, #100] + add r10, r10, r2 + str r3, [r7, #104] + ror lr, lr, #16 + ldr r3, [r7, #104] + eor r5, r10, r5 + vshl.i32 q5, q8, #12 + add ip, lr, ip + vshl.i32 q6, q1, #12 + str ip, [r7, #88] + add ip, r3, r8 + str ip, [r7, #100] + ldr r3, [r7, #108] + ror r5, r5, #16 + vsri.32 q5, q8, #20 + ror r6, r6, #16 + add ip, r5, r3 + ldr r3, [r7, #88] + vsri.32 q6, q1, #20 + add r9, r9, r6 + eor r2, ip, r2 + eors r4, r4, r3 + ldr r3, [r7, #100] + eor r0, r9, r0 + vadd.i32 q9, q9, q5 + ror r4, r4, #20 + eors r1, r1, r3 + vadd.i32 q10, q10, q6 + ror r3, r2, #20 + str r3, [r7, #92] + ldr r3, [r7, #96] + veor q3, q9, q3 + ror r0, r0, #20 + add r8, r4, fp + veor q2, q10, q2 + add fp, r0, r3 + ldr r3, [r7, #84] + ror r1, r1, #20 + mov r2, r8 + vshl.i32 q8, q3, #8 + str r8, [r7, #80] + add r8, r1, r3 + ldr r3, [r7, #92] + vmov q1, q6 @ v4si + vshl.i32 q6, q2, #8 + eor r6, fp, r6 + add r10, r10, r3 + ldr r3, [r7, #104] + vsri.32 q8, q3, #24 + eor lr, r2, lr + eor r3, r8, r3 + ror r2, r6, #24 + vsri.32 q6, q2, #24 + eor r5, r10, r5 + str r2, [r7, #108] + ror r2, r3, #24 + ldr r3, [r7, #88] + vmov q3, q8 @ v4si + vadd.i32 q15, q15, q6 + ror lr, lr, #24 + vadd.i32 q8, q4, q8 + ror r6, r5, #24 + add r5, lr, r3 + ldr r3, [r7, #108] + veor q4, q8, q5 + add ip, ip, r6 + vmov q2, q6 @ v4si + add r9, r9, r3 + veor q6, q15, q1 + ldr r3, [r7, #100] + vshl.i32 q1, q4, #7 + str r2, [r7, #96] + add r3, r3, r2 + str r3, [r7, #104] + vshl.i32 q5, q6, #7 + eors r1, r1, r3 + ldr r3, [r7, #92] + vsri.32 q1, q4, #25 + eors r4, r4, r5 + eor r0, r9, r0 + eor r2, ip, r3 + vsri.32 q5, q6, #25 + ldr r3, [r7, #76] + ror r4, r4, #25 + str r6, [r7, #84] + ror r0, r0, #25 + subs r3, r3, #1 + str r5, [r7, #88] + ror r1, r1, #25 + ror r2, r2, #25 + vext.32 q15, q15, q15, #2 + str r3, [r7, #76] + vext.32 q2, q2, q2, #1 + vext.32 q8, q8, q8, #2 + vext.32 q3, q3, q3, #1 + vext.32 q5, q5, q5, #3 + vext.32 q1, q1, q1, #3 + bne .L3 + ldr r3, [r7, #68] + vadd.i32 q4, q12, q10 + str r9, [r7, #100] + mov r9, r10 + mov r10, r8 + ldr r8, [r7, #80] + str lr, [r7, #80] + mov lr, r5 + ldr r5, [r7, #40] + vadd.i32 q5, q13, q5 + ldr r6, [r7, #64] + vadd.i32 q15, q14, q15 + add fp, fp, r5 + ldr r5, [r7, #36] + str r4, [r7, #52] + vadd.i32 q7, q14, q8 + ldr r4, [r7, #96] + add r5, r10, r5 + str r3, [r7, #96] + vadd.i32 q2, q11, q2 + ldr r3, [r6, #12] @ unaligned + vadd.i32 q6, q12, q9 + str r0, [r7, #76] + vadd.i32 q1, q13, q1 + ldr r0, [r6] @ unaligned + vadd.i32 q11, q11, q0 + str r1, [r7, #92] + str r2, [r7, #56] + vadd.i32 q3, q11, q3 + ldr r1, [r6, #4] @ unaligned + vadd.i32 q11, q11, q0 + ldr r2, [r6, #8] @ unaligned + str r5, [r7, #88] + vadd.i32 q11, q11, q0 + ldr r5, [r7, #96] + ldr r10, [r7, #68] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r2, [r7, #72] + ldr r1, [r7, #32] + ldr r3, [r7, #48] + vldr d20, [r2, #80] + vldr d21, [r2, #88] + add r9, r9, r1 + veor q10, q10, q4 + ldr r1, [r7, #28] + add r0, r8, r1 + str r0, [r7, #24] + vstr d20, [r2, #80] + vstr d21, [r2, #88] + adds r0, r4, r3 + str r0, [r7, #20] + ldmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r4, [r7, #60] + str r0, [r4] @ unaligned + mov r4, r10 + ldr r0, [r7, #60] + str r1, [r0, #4] @ unaligned + mov r8, r0 + str r2, [r0, #8] @ unaligned + str r3, [r0, #12] @ unaligned + ldr r0, [r6, #16]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r3, [r7, #72] + vldr d20, [r3, #80] + vldr d21, [r3, #88] + veor q10, q10, q5 + vstr d20, [r3, #80] + vstr d21, [r3, #88] + ldmia r4!, {r0, r1, r2, r3} + mov r4, r8 + str r0, [r8, #16] @ unaligned + str r1, [r8, #20] @ unaligned + str r2, [r8, #24] @ unaligned + str r3, [r8, #28] @ unaligned + ldr r0, [r6, #32]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r0, [r7, #72] + vldr d16, [r0, #80] + vldr d17, [r0, #88] + veor q15, q8, q15 + vstr d30, [r0, #80] + vstr d31, [r0, #88] + ldmia r10!, {r0, r1, r2, r3} + mov r10, r5 + str r0, [r4, #32] @ unaligned + str r1, [r4, #36] @ unaligned + str r2, [r4, #40] @ unaligned + str r3, [r4, #44] @ unaligned + ldr r0, [r6, #48]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r2, [r7, #72] + vldr d18, [r2, #80] + vldr d19, [r2, #88] + veor q9, q9, q2 + vstr d18, [r2, #80] + vstr d19, [r2, #88] + ldmia r10!, {r0, r1, r2, r3} + mov r10, r5 + str r0, [r4, #48] @ unaligned + str r1, [r4, #52] @ unaligned + str r2, [r4, #56] @ unaligned + str r3, [r4, #60] @ unaligned + ldr r0, [r6, #64]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r2, [r7, #72] + vldr d18, [r2, #80] + vldr d19, [r2, #88] + veor q9, q9, q6 + vstr d18, [r2, #80] + vstr d19, [r2, #88] + ldmia r10!, {r0, r1, r2, r3} + mov r10, r5 + str r0, [r4, #64] @ unaligned + str r1, [r4, #68] @ unaligned + str r2, [r4, #72] @ unaligned + str r3, [r4, #76] @ unaligned + ldr r0, [r6, #80]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r2, [r7, #72] + vldr d18, [r2, #80] + vldr d19, [r2, #88] + veor q1, q9, q1 + vstr d2, [r2, #80] + vstr d3, [r2, #88] + ldmia r10!, {r0, r1, r2, r3} + mov r10, r5 + str r0, [r4, #80] @ unaligned + str r1, [r4, #84] @ unaligned + str r2, [r4, #88] @ unaligned + str r3, [r4, #92] @ unaligned + ldr r0, [r6, #96]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + ldr r6, [r7, #64] + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r3, [r7, #72] + vldr d16, [r3, #80] + vldr d17, [r3, #88] + veor q8, q8, q7 + vstr d16, [r3, #80] + vstr d17, [r3, #88] + ldmia r10!, {r0, r1, r2, r3} + mov r10, r5 + str r0, [r4, #96] @ unaligned + str r1, [r4, #100] @ unaligned + str r2, [r4, #104] @ unaligned + str r3, [r4, #108] @ unaligned + ldr r0, [r6, #112]! @ unaligned + ldr r1, [r6, #4] @ unaligned + ldr r2, [r6, #8] @ unaligned + ldr r3, [r6, #12] @ unaligned + stmia r5!, {r0, r1, r2, r3} + mov r5, r10 + ldr r0, [r7, #72] + ldr r6, [r7, #44] + vldr d16, [r0, #80] + vldr d17, [r0, #88] + veor q8, q8, q3 + vstr d16, [r0, #80] + vstr d17, [r0, #88] + ldmia r5!, {r0, r1, r2, r3} + mov r5, r4 + mov r8, r5 + str r1, [r4, #116] @ unaligned + ldr r1, [r7, #64] + str r0, [r4, #112] @ unaligned + mov r0, r5 + str r2, [r4, #120] @ unaligned + str r3, [r4, #124] @ unaligned + ldr r3, [r1, #128] + ldr r2, [r7, #88] + eor r3, fp, r3 + str r3, [r4, #128] + ldr r3, [r1, #132] + mov r4, r1 + mov r1, r5 + eors r2, r2, r3 + str r2, [r8, #132] + ldr r3, [r4, #136] + ldr r2, [r7, #24] + eor r3, r9, r3 + str r3, [r5, #136] + ldr r3, [r4, #140] + eors r3, r3, r2 + str r3, [r5, #140] + mov r5, r4 + ldr r3, [r6] + ldr r2, [r4, #144] + ldr r4, [r7, #52] + add r4, r4, r3 + eors r2, r2, r4 + mov r4, r1 + str r2, [r1, #144] + ldr r1, [r7, #76] + ldr r2, [r6, #4] + ldr r3, [r5, #148] + mov r8, r1 + add r8, r8, r2 + mov r2, r8 + eors r3, r3, r2 + str r3, [r0, #148] + mov r0, r4 + ldr r2, [r6, #8] + ldr r1, [r7, #92] + ldr r3, [r5, #152] + mov r8, r1 + add r8, r8, r2 + ldr r1, [r7, #56] + mov r2, r8 + eors r3, r3, r2 + str r3, [r4, #152] + mov r8, r6 + ldr r2, [r6, #12] + mov r4, r5 + ldr r3, [r5, #156] + add r1, r1, r2 + eors r3, r3, r1 + str r3, [r0, #156] + ldr r2, [r6, #16] + mov r1, r0 + ldr r3, [r5, #160] + add ip, ip, r2 + eor r3, ip, r3 + str r3, [r0, #160] + ldr r2, [r6, #20] + mov ip, r0 + ldr r3, [r5, #164] + add lr, lr, r2 + ldr r2, [r7, #100] + eor r3, lr, r3 + str r3, [r1, #164] + ldr r6, [r6, #24] + ldr r3, [r4, #168] + add r2, r2, r6 + eors r3, r3, r2 + ldr r2, [r7, #104] + str r3, [r0, #168] + ldr r5, [r8, #28] + ldr r3, [r4, #172] + add r2, r2, r5 + mov r5, r4 + eors r3, r3, r2 + mov r2, r0 + str r3, [r0, #172] + ldr r3, [r7, #48] + ldr r4, [r4, #176] + ldr r0, [r7, #20] + adds r1, r3, #3 + ldr r3, [r7, #84] + eors r4, r4, r0 + str r4, [r2, #176] + ldr r0, [r5, #180] + mov r4, r2 + str r1, [r7, #48] + eors r3, r3, r0 + mov r0, r3 + ldr r3, [r7, #232] + str r0, [r2, #180] + ldr r1, [r3] + ldr r3, [r5, #184] + ldr r2, [r7, #80] + add r2, r2, r1 + mov r1, r5 + eors r3, r3, r2 + str r3, [ip, #184] + ldr r3, [r7, #232] + adds r1, r1, #192 + str r1, [r7, #64] + ldr r1, [r7, #108] + ldr r2, [r3, #4] + ldr r3, [r5, #188] + add r1, r1, r2 + mov r2, r1 + eors r2, r2, r3 + str r2, [ip, #188] + mov r3, r4 + ldr r2, [r7, #16] + adds r3, r3, #192 + str r3, [r7, #60] + cmp r2, r3 + beq .L85 + ldr r3, [r7, #232] + ldmia r3, {r1, r2} + b .L4 +.L85: + ldr r3, [r7, #12] + ldr r2, [r7, #4] + add r3, r3, r2 + str r3, [r7, #12] +.L2: + ldr r1, [r7, #8] + movw r2, #43691 + movt r2, 43690 + umull r2, r3, r1, r2 + lsr fp, r3, #7 + lsl r3, fp, #8 + sub fp, r3, fp, lsl #6 + rsb fp, fp, r1 + lsrs fp, fp, #6 + beq .L6 + ldr r6, [r7, #72] + ldr r5, [r7, #12] + ldr r4, [r7, #16] + mov r3, r6 + adds r3, r3, #80 + vldr d30, .L95 + vldr d31, .L95+8 + mov lr, r3 + str fp, [r7, #104] + str fp, [r7, #108] +.L8: + vmov q2, q11 @ v4si + movs r3, #10 + vmov q8, q14 @ v4si + vmov q9, q13 @ v4si + vmov q10, q12 @ v4si +.L7: + vadd.i32 q10, q10, q9 + subs r3, r3, #1 + veor q3, q2, q10 + vrev32.16 q3, q3 + vadd.i32 q8, q8, q3 + veor q9, q8, q9 + vshl.i32 q2, q9, #12 + vsri.32 q2, q9, #20 + vadd.i32 q10, q10, q2 + veor q3, q10, q3 + vshl.i32 q9, q3, #8 + vsri.32 q9, q3, #24 + vadd.i32 q8, q8, q9 + vext.32 q9, q9, q9, #3 + veor q2, q8, q2 + vext.32 q8, q8, q8, #2 + vshl.i32 q3, q2, #7 + vsri.32 q3, q2, #25 + vext.32 q3, q3, q3, #1 + vadd.i32 q10, q10, q3 + veor q9, q10, q9 + vrev32.16 q9, q9 + vadd.i32 q8, q8, q9 + veor q3, q8, q3 + vshl.i32 q2, q3, #12 + vsri.32 q2, q3, #20 + vadd.i32 q10, q10, q2 + vmov q3, q2 @ v4si + veor q9, q10, q9 + vshl.i32 q2, q9, #8 + vsri.32 q2, q9, #24 + vadd.i32 q8, q8, q2 + vext.32 q2, q2, q2, #1 + veor q3, q8, q3 + vext.32 q8, q8, q8, #2 + vshl.i32 q9, q3, #7 + vsri.32 q9, q3, #25 + vext.32 q9, q9, q9, #3 + bne .L7 + ldr r0, [r5] @ unaligned + vadd.i32 q1, q12, q10 + ldr r1, [r5, #4] @ unaligned + mov ip, lr + ldr r2, [r5, #8] @ unaligned + mov r9, lr + ldr r3, [r5, #12] @ unaligned + mov r10, r5 + vadd.i32 q9, q13, q9 + mov r8, lr + vadd.i32 q8, q14, q8 + stmia ip!, {r0, r1, r2, r3} + mov ip, lr + vldr d20, [r6, #80] + vldr d21, [r6, #88] + vadd.i32 q3, q11, q2 + veor q10, q10, q1 + vadd.i32 q11, q11, q15 + vstr d20, [r6, #80] + vstr d21, [r6, #88] + ldmia r9!, {r0, r1, r2, r3} + mov r9, r5 + str r0, [r4] @ unaligned + str r1, [r4, #4] @ unaligned + str r2, [r4, #8] @ unaligned + str r3, [r4, #12] @ unaligned + ldr r0, [r10, #16]! @ unaligned + ldr r1, [r10, #4] @ unaligned + ldr r2, [r10, #8] @ unaligned + ldr r3, [r10, #12] @ unaligned + add r10, r4, #48 + adds r4, r4, #64 + stmia r8!, {r0, r1, r2, r3} + mov r8, lr + vldr d20, [r6, #80] + vldr d21, [r6, #88] + veor q10, q10, q9 + vstr d20, [r6, #80] + vstr d21, [r6, #88] + ldmia ip!, {r0, r1, r2, r3} + mov ip, lr + str r0, [r4, #-48] @ unaligned + str r1, [r4, #-44] @ unaligned + str r2, [r4, #-40] @ unaligned + str r3, [r4, #-36] @ unaligned + ldr r0, [r9, #32]! @ unaligned + ldr r1, [r9, #4] @ unaligned + ldr r2, [r9, #8] @ unaligned + ldr r3, [r9, #12] @ unaligned + mov r9, r5 + adds r5, r5, #64 + stmia r8!, {r0, r1, r2, r3} + mov r8, lr + vldr d18, [r6, #80] + vldr d19, [r6, #88] + veor q9, q9, q8 + vstr d18, [r6, #80] + vstr d19, [r6, #88] + ldmia ip!, {r0, r1, r2, r3} + mov ip, lr + str r0, [r4, #-32] @ unaligned + str r1, [r4, #-28] @ unaligned + str r2, [r4, #-24] @ unaligned + str r3, [r4, #-20] @ unaligned + ldr r0, [r9, #48]! @ unaligned + ldr r1, [r9, #4] @ unaligned + ldr r2, [r9, #8] @ unaligned + ldr r3, [r9, #12] @ unaligned + stmia r8!, {r0, r1, r2, r3} + vldr d16, [r6, #80] + vldr d17, [r6, #88] + veor q8, q8, q3 + vstr d16, [r6, #80] + vstr d17, [r6, #88] + ldmia ip!, {r0, r1, r2, r3} + str r0, [r4, #-16] @ unaligned + str r1, [r4, #-12] @ unaligned + str r3, [r10, #12] @ unaligned + ldr r3, [r7, #108] + str r2, [r10, #8] @ unaligned + cmp r3, #1 + beq .L88 + movs r3, #1 + str r3, [r7, #108] + b .L8 +.L96: + .align 3 +.L95: + .word 1 + .word 0 + .word 0 + .word 0 +.L88: + ldr fp, [r7, #104] + ldr r3, [r7, #12] + lsl fp, fp, #6 + add r3, r3, fp + str r3, [r7, #12] + ldr r3, [r7, #16] + add r3, r3, fp + str r3, [r7, #16] +.L6: + ldr r3, [r7, #8] + ands r9, r3, #63 + beq .L1 + vmov q3, q11 @ v4si + movs r3, #10 + vmov q8, q14 @ v4si + mov r5, r9 + vmov q15, q13 @ v4si + vmov q10, q12 @ v4si +.L10: + vadd.i32 q10, q10, q15 + subs r3, r3, #1 + veor q9, q3, q10 + vrev32.16 q9, q9 + vadd.i32 q8, q8, q9 + veor q15, q8, q15 + vshl.i32 q3, q15, #12 + vsri.32 q3, q15, #20 + vadd.i32 q10, q10, q3 + veor q15, q10, q9 + vshl.i32 q9, q15, #8 + vsri.32 q9, q15, #24 + vadd.i32 q8, q8, q9 + vext.32 q9, q9, q9, #3 + veor q3, q8, q3 + vext.32 q8, q8, q8, #2 + vshl.i32 q15, q3, #7 + vsri.32 q15, q3, #25 + vext.32 q15, q15, q15, #1 + vadd.i32 q10, q10, q15 + veor q9, q10, q9 + vrev32.16 q9, q9 + vadd.i32 q8, q8, q9 + veor q15, q8, q15 + vshl.i32 q3, q15, #12 + vsri.32 q3, q15, #20 + vadd.i32 q10, q10, q3 + vmov q15, q3 @ v4si + veor q9, q10, q9 + vshl.i32 q3, q9, #8 + vsri.32 q3, q9, #24 + vadd.i32 q8, q8, q3 + vext.32 q3, q3, q3, #1 + veor q9, q8, q15 + vext.32 q8, q8, q8, #2 + vshl.i32 q15, q9, #7 + vsri.32 q15, q9, #25 + vext.32 q15, q15, q15, #3 + bne .L10 + cmp r5, #15 + mov r9, r5 + bhi .L89 + vadd.i32 q12, q12, q10 + ldr r3, [r7, #72] + vst1.64 {d24-d25}, [r3:128] +.L14: + ldr r3, [r7, #8] + and r2, r3, #48 + cmp r9, r2 + bls .L1 + ldr r6, [r7, #16] + add r3, r2, #16 + ldr r1, [r7, #12] + rsb ip, r2, r9 + adds r0, r1, r2 + mov r4, r6 + add r1, r1, r3 + add r4, r4, r2 + add r3, r3, r6 + cmp r0, r3 + it cc + cmpcc r4, r1 + ite cs + movcs r3, #1 + movcc r3, #0 + cmp ip, #18 + ite ls + movls r3, #0 + andhi r3, r3, #1 + cmp r3, #0 + beq .L16 + and r1, r0, #7 + mov r3, r2 + negs r1, r1 + and r1, r1, #15 + cmp r1, ip + it cs + movcs r1, ip + cmp r1, #0 + beq .L17 + ldr r5, [r7, #72] + cmp r1, #1 + ldrb r0, [r0] @ zero_extendqisi2 + add r3, r2, #1 + ldrb lr, [r5, r2] @ zero_extendqisi2 + mov r6, r5 + eor r0, lr, r0 + strb r0, [r4] + beq .L17 + ldr r0, [r7, #12] + cmp r1, #2 + ldrb r4, [r5, r3] @ zero_extendqisi2 + ldr r5, [r7, #16] + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #2 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #3 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #3 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #4 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #4 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #5 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #5 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #6 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #6 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #7 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #7 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #8 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #8 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #9 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #9 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #10 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #10 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #11 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #11 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #12 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #12 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #13 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #13 + beq .L17 + ldr r0, [r7, #12] + cmp r1, #15 + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #14 + bne .L17 + ldr r0, [r7, #12] + ldrb r4, [r6, r3] @ zero_extendqisi2 + ldrb r0, [r0, r3] @ zero_extendqisi2 + eors r0, r0, r4 + strb r0, [r5, r3] + add r3, r2, #15 +.L17: + rsb r4, r1, ip + add r0, ip, #-1 + sub r6, r4, #16 + subs r0, r0, r1 + cmp r0, #14 + lsr r6, r6, #4 + add r6, r6, #1 + lsl lr, r6, #4 + bls .L19 + add r2, r2, r1 + ldr r1, [r7, #12] + ldr r5, [r7, #16] + cmp r6, #1 + add r0, r1, r2 + ldr r1, [r7, #72] + add r1, r1, r2 + vld1.64 {d18-d19}, [r0:64] + add r2, r2, r5 + vld1.8 {q8}, [r1] + veor q8, q8, q9 + vst1.8 {q8}, [r2] + beq .L20 + add r8, r1, #16 + add ip, r2, #16 + vldr d18, [r0, #16] + vldr d19, [r0, #24] + cmp r6, #2 + vld1.8 {q8}, [r8] + veor q8, q8, q9 + vst1.8 {q8}, [ip] + beq .L20 + add r8, r1, #32 + add ip, r2, #32 + vldr d18, [r0, #32] + vldr d19, [r0, #40] + cmp r6, #3 + vld1.8 {q8}, [r8] + veor q8, q8, q9 + vst1.8 {q8}, [ip] + beq .L20 + adds r1, r1, #48 + adds r2, r2, #48 + vldr d18, [r0, #48] + vldr d19, [r0, #56] + vld1.8 {q8}, [r1] + veor q8, q8, q9 + vst1.8 {q8}, [r2] +.L20: + cmp lr, r4 + add r3, r3, lr + beq .L1 +.L19: + ldr r4, [r7, #72] + adds r2, r3, #1 + ldr r1, [r7, #12] + cmp r2, r9 + ldr r5, [r7, #16] + ldrb r0, [r4, r3] @ zero_extendqisi2 + ldrb r1, [r1, r3] @ zero_extendqisi2 + eor r1, r1, r0 + strb r1, [r5, r3] + bcs .L1 + ldr r0, [r7, #12] + adds r1, r3, #2 + mov r6, r4 + cmp r9, r1 + ldrb r4, [r4, r2] @ zero_extendqisi2 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + adds r2, r3, #3 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r0, [r7, #12] + adds r1, r3, #4 + ldrb r4, [r6, r2] @ zero_extendqisi2 + cmp r9, r1 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + adds r2, r3, #5 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r0, [r7, #12] + adds r1, r3, #6 + ldrb r4, [r6, r2] @ zero_extendqisi2 + cmp r9, r1 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + adds r2, r3, #7 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r0, [r7, #12] + add r1, r3, #8 + ldrb r4, [r6, r2] @ zero_extendqisi2 + cmp r9, r1 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + add r2, r3, #9 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r0, [r7, #12] + add r1, r3, #10 + ldrb r4, [r6, r2] @ zero_extendqisi2 + cmp r9, r1 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + add r2, r3, #11 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r0, [r7, #12] + add r1, r3, #12 + ldrb r4, [r6, r2] @ zero_extendqisi2 + cmp r9, r1 + ldrb r0, [r0, r2] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r2] + bls .L1 + ldr r0, [r7, #12] + add r2, r3, #13 + ldrb r4, [r6, r1] @ zero_extendqisi2 + cmp r9, r2 + ldrb r0, [r0, r1] @ zero_extendqisi2 + eor r0, r0, r4 + strb r0, [r5, r1] + bls .L1 + ldr r1, [r7, #12] + adds r3, r3, #14 + ldrb r0, [r6, r2] @ zero_extendqisi2 + cmp r9, r3 + ldrb r1, [r1, r2] @ zero_extendqisi2 + eor r1, r1, r0 + strb r1, [r5, r2] + bls .L1 + ldr r2, [r7, #72] + ldrb r1, [r2, r3] @ zero_extendqisi2 + ldr r2, [r7, #12] + ldrb r2, [r2, r3] @ zero_extendqisi2 + eors r2, r2, r1 + ldr r1, [r7, #16] + strb r2, [r1, r3] +.L1: + adds r7, r7, #132 + mov sp, r7 + @ sp needed + vldm sp!, {d8-d15} + pop {r4, r5, r6, r7, r8, r9, r10, fp, pc} +.L89: + ldr r4, [r7, #12] + vadd.i32 q12, q12, q10 + ldr r5, [r7, #72] + cmp r9, #31 + ldr r0, [r4] @ unaligned + add r6, r5, #80 + ldr r1, [r4, #4] @ unaligned + ldr r2, [r4, #8] @ unaligned + mov r5, r6 + ldr r3, [r4, #12] @ unaligned + mov r4, r6 + str r6, [r7, #68] + stmia r6!, {r0, r1, r2, r3} + ldr r2, [r7, #72] + ldr r6, [r7, #16] + vldr d18, [r2, #80] + vldr d19, [r2, #88] + veor q9, q9, q12 + vstr d18, [r2, #80] + vstr d19, [r2, #88] + ldmia r4!, {r0, r1, r2, r3} + str r1, [r6, #4] @ unaligned + mov r1, r6 + str r0, [r6] @ unaligned + str r2, [r6, #8] @ unaligned + str r3, [r6, #12] @ unaligned + bhi .L90 + vadd.i32 q13, q13, q15 + ldr r3, [r7, #72] + vstr d26, [r3, #16] + vstr d27, [r3, #24] + b .L14 +.L16: + subs r3, r2, #1 + ldr r2, [r7, #12] + add r2, r2, r9 + mov r5, r2 + ldr r2, [r7, #72] + add r2, r2, r3 + mov r3, r2 +.L24: + ldrb r1, [r0], #1 @ zero_extendqisi2 + ldrb r2, [r3, #1]! @ zero_extendqisi2 + cmp r0, r5 + eor r2, r2, r1 + strb r2, [r4], #1 + bne .L24 + adds r7, r7, #132 + mov sp, r7 + @ sp needed + vldm sp!, {d8-d15} + pop {r4, r5, r6, r7, r8, r9, r10, fp, pc} +.L26: + str fp, [r7, #16] + b .L2 +.L90: + ldr r3, [r7, #12] + add lr, r1, #16 + mov r4, r5 + mov r6, r5 + mov r5, r1 + vadd.i32 q13, q13, q15 + ldr r0, [r3, #16]! @ unaligned + cmp r9, #47 + ldr r1, [r3, #4] @ unaligned + ldr r2, [r3, #8] @ unaligned + ldr r3, [r3, #12] @ unaligned + stmia r6!, {r0, r1, r2, r3} + ldr r2, [r7, #72] + vldr d18, [r2, #80] + vldr d19, [r2, #88] + veor q13, q9, q13 + vstr d26, [r2, #80] + vstr d27, [r2, #88] + ldmia r4!, {r0, r1, r2, r3} + str r0, [r5, #16] @ unaligned + str r1, [lr, #4] @ unaligned + str r2, [lr, #8] @ unaligned + str r3, [lr, #12] @ unaligned + bhi .L91 + vadd.i32 q8, q14, q8 + ldr r3, [r7, #72] + vstr d16, [r3, #32] + vstr d17, [r3, #40] + b .L14 +.L91: + ldr r3, [r7, #12] + add lr, r5, #32 + ldr r4, [r7, #68] + vadd.i32 q8, q14, q8 + ldr r5, [r7, #72] + vadd.i32 q11, q11, q3 + ldr r0, [r3, #32]! @ unaligned + mov r6, r4 + vstr d22, [r5, #48] + vstr d23, [r5, #56] + ldr r1, [r3, #4] @ unaligned + ldr r2, [r3, #8] @ unaligned + ldr r3, [r3, #12] @ unaligned + stmia r4!, {r0, r1, r2, r3} + vldr d18, [r5, #80] + vldr d19, [r5, #88] + veor q9, q9, q8 + ldr r4, [r7, #16] + vstr d18, [r5, #80] + vstr d19, [r5, #88] + ldmia r6!, {r0, r1, r2, r3} + str r0, [r4, #32] @ unaligned + str r1, [lr, #4] @ unaligned + str r2, [lr, #8] @ unaligned + str r3, [lr, #12] @ unaligned + b .L14 + .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon + .section .rodata + .align 2 +.LANCHOR0 = . + 0 +.LC0: + .word 1634760805 + .word 857760878 + .word 2036477234 + .word 1797285236 + .ident "GCC: (Linaro GCC 2014.11) 4.9.3 20141031 (prerelease)" + .section .note.GNU-stack,"",%progbits + +#endif /* !OPENSSL_NO_ASM */ |