summaryrefslogtreecommitdiffstats
path: root/win-x86_64/crypto
diff options
context:
space:
mode:
authorKenny Root <kroot@google.com>2015-09-25 00:26:37 +0000
committerKenny Root <kroot@google.com>2015-09-25 00:26:37 +0000
commita04d78d392463df4e69a64360c952ffa5abd22f7 (patch)
treedc62c249d595198e0d99e43890019d21e901fbec /win-x86_64/crypto
parent1e4884f615b20946411a74e41eb9c6aa65e2d5f3 (diff)
downloadexternal_boringssl-a04d78d392463df4e69a64360c952ffa5abd22f7.zip
external_boringssl-a04d78d392463df4e69a64360c952ffa5abd22f7.tar.gz
external_boringssl-a04d78d392463df4e69a64360c952ffa5abd22f7.tar.bz2
Revert "external/boringssl: sync with upstream."
This reverts commit 1e4884f615b20946411a74e41eb9c6aa65e2d5f3. This breaks some x86 builds. Change-Id: I4d4310663ce52bc0a130e6b9dbc22b868ff4fb25
Diffstat (limited to 'win-x86_64/crypto')
-rw-r--r--win-x86_64/crypto/bn/modexp512-x86_64.asm1887
-rw-r--r--win-x86_64/crypto/cpu-x86_64-asm.asm154
2 files changed, 2041 insertions, 0 deletions
diff --git a/win-x86_64/crypto/bn/modexp512-x86_64.asm b/win-x86_64/crypto/bn/modexp512-x86_64.asm
new file mode 100644
index 0000000..d3e4a61
--- /dev/null
+++ b/win-x86_64/crypto/bn/modexp512-x86_64.asm
@@ -0,0 +1,1887 @@
+OPTION DOTNAME
+.text$ SEGMENT ALIGN(256) 'CODE'
+
+
+ALIGN 16
+MULADD_128x512 PROC PRIVATE
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[rcx],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov rbp,QWORD PTR[8+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ mov QWORD PTR[8+rcx],r9
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov r9,rdx
+ DB 0F3h,0C3h ;repret
+MULADD_128x512 ENDP
+
+ALIGN 16
+mont_reduce PROC PRIVATE
+ lea rdi,QWORD PTR[192+rsp]
+ mov rsi,QWORD PTR[32+rsp]
+ add rsi,576
+ lea rcx,QWORD PTR[520+rsp]
+
+ mov rbp,QWORD PTR[96+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ mov r8,QWORD PTR[rcx]
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[rdi],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ mov r9,QWORD PTR[8+rcx]
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ mov r10,QWORD PTR[16+rcx]
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ mov r11,QWORD PTR[24+rcx]
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ mov r12,QWORD PTR[32+rcx]
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ mov r13,QWORD PTR[40+rcx]
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ mov r14,QWORD PTR[48+rcx]
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ mov r15,QWORD PTR[56+rcx]
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov rbp,QWORD PTR[104+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ mov QWORD PTR[8+rdi],r9
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov r9,rdx
+ mov rbp,QWORD PTR[112+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov QWORD PTR[16+rdi],r10
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov r10,rdx
+ mov rbp,QWORD PTR[120+rcx]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[24+rdi],r11
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov r11,rdx
+ xor rax,rax
+
+ add r8,QWORD PTR[64+rcx]
+ adc r9,QWORD PTR[72+rcx]
+ adc r10,QWORD PTR[80+rcx]
+ adc r11,QWORD PTR[88+rcx]
+ adc rax,0
+
+
+
+
+ mov QWORD PTR[64+rdi],r8
+ mov QWORD PTR[72+rdi],r9
+ mov rbp,r10
+ mov QWORD PTR[88+rdi],r11
+
+ mov QWORD PTR[384+rsp],rax
+
+ mov r8,QWORD PTR[rdi]
+ mov r9,QWORD PTR[8+rdi]
+ mov r10,QWORD PTR[16+rdi]
+ mov r11,QWORD PTR[24+rdi]
+
+
+
+
+
+
+
+
+ add rdi,8*10
+
+ add rsi,64
+ lea rcx,QWORD PTR[296+rsp]
+
+ call MULADD_128x512
+
+ mov rax,QWORD PTR[384+rsp]
+
+
+ add r8,QWORD PTR[((-16))+rdi]
+ adc r9,QWORD PTR[((-8))+rdi]
+ mov QWORD PTR[64+rcx],r8
+ mov QWORD PTR[72+rcx],r9
+
+ adc rax,rax
+ mov QWORD PTR[384+rsp],rax
+
+ lea rdi,QWORD PTR[192+rsp]
+ add rsi,64
+
+
+
+
+
+ mov r8,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+
+ mov rax,QWORD PTR[rcx]
+ mul r8
+ mov rbp,rax
+ mov r9,rdx
+
+ mov rax,QWORD PTR[8+rcx]
+ mul r8
+ add r9,rax
+
+ mov rax,QWORD PTR[rcx]
+ mul rbx
+ add r9,rax
+
+ mov QWORD PTR[8+rdi],r9
+
+
+ sub rsi,192
+
+ mov r8,QWORD PTR[rcx]
+ mov r9,QWORD PTR[8+rcx]
+
+ call MULADD_128x512
+
+
+
+
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdi,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+
+
+ mov rbp,QWORD PTR[384+rsp]
+
+ add r8,QWORD PTR[64+rcx]
+ adc r9,QWORD PTR[72+rcx]
+
+
+ adc rbp,rbp
+
+
+
+ shl rbp,3
+ mov rcx,QWORD PTR[32+rsp]
+ add rbp,rcx
+
+
+ xor rsi,rsi
+
+ add r10,QWORD PTR[rbp]
+ adc r11,QWORD PTR[64+rbp]
+ adc r12,QWORD PTR[128+rbp]
+ adc r13,QWORD PTR[192+rbp]
+ adc r14,QWORD PTR[256+rbp]
+ adc r15,QWORD PTR[320+rbp]
+ adc r8,QWORD PTR[384+rbp]
+ adc r9,QWORD PTR[448+rbp]
+
+
+
+ sbb rsi,0
+
+
+ and rax,rsi
+ and rbx,rsi
+ and rdi,rsi
+ and rdx,rsi
+
+ mov rbp,1
+ sub r10,rax
+ sbb r11,rbx
+ sbb r12,rdi
+ sbb r13,rdx
+
+
+
+
+ sbb rbp,0
+
+
+
+ add rcx,512
+ mov rax,QWORD PTR[32+rcx]
+ mov rbx,QWORD PTR[40+rcx]
+ mov rdi,QWORD PTR[48+rcx]
+ mov rdx,QWORD PTR[56+rcx]
+
+
+
+ and rax,rsi
+ and rbx,rsi
+ and rdi,rsi
+ and rdx,rsi
+
+
+
+ sub rbp,1
+
+ sbb r14,rax
+ sbb r15,rbx
+ sbb r8,rdi
+ sbb r9,rdx
+
+
+
+ mov rsi,QWORD PTR[144+rsp]
+ mov QWORD PTR[rsi],r10
+ mov QWORD PTR[8+rsi],r11
+ mov QWORD PTR[16+rsi],r12
+ mov QWORD PTR[24+rsi],r13
+ mov QWORD PTR[32+rsi],r14
+ mov QWORD PTR[40+rsi],r15
+ mov QWORD PTR[48+rsi],r8
+ mov QWORD PTR[56+rsi],r9
+
+ DB 0F3h,0C3h ;repret
+mont_reduce ENDP
+
+ALIGN 16
+mont_mul_a3b PROC PRIVATE
+
+
+
+
+ mov rbp,QWORD PTR[rdi]
+
+ mov rax,r10
+ mul rbp
+ mov QWORD PTR[520+rsp],rax
+ mov r10,rdx
+ mov rax,r11
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov r11,rdx
+ mov rax,r12
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov r12,rdx
+ mov rax,r13
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ mov r13,rdx
+ mov rax,r14
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ mov r14,rdx
+ mov rax,r15
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ mov r15,rdx
+ mov rax,r8
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ mov r8,rdx
+ mov rax,r9
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov r9,rdx
+ mov rbp,QWORD PTR[8+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ mov QWORD PTR[528+rsp],r10
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov r10,rdx
+ mov rbp,QWORD PTR[16+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[536+rsp],r11
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov r11,rdx
+ mov rbp,QWORD PTR[24+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ mov QWORD PTR[544+rsp],r12
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov r12,rdx
+ mov rbp,QWORD PTR[32+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ mov QWORD PTR[552+rsp],r13
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov r13,rdx
+ mov rbp,QWORD PTR[40+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ mov QWORD PTR[560+rsp],r14
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov r14,rdx
+ mov rbp,QWORD PTR[48+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[568+rsp],r15
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ add r8,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov r15,rdx
+ mov rbp,QWORD PTR[56+rdi]
+ mov rax,QWORD PTR[rsi]
+ mul rbp
+ add r8,rax
+ adc rdx,0
+ mov QWORD PTR[576+rsp],r8
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rsi]
+ mul rbp
+ add r9,rax
+ adc rdx,0
+ add r9,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[16+rsi]
+ mul rbp
+ add r10,rax
+ adc rdx,0
+ add r10,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[24+rsi]
+ mul rbp
+ add r11,rax
+ adc rdx,0
+ add r11,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[32+rsi]
+ mul rbp
+ add r12,rax
+ adc rdx,0
+ add r12,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[40+rsi]
+ mul rbp
+ add r13,rax
+ adc rdx,0
+ add r13,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[48+rsi]
+ mul rbp
+ add r14,rax
+ adc rdx,0
+ add r14,rbx
+ adc rdx,0
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[56+rsi]
+ mul rbp
+ add r15,rax
+ adc rdx,0
+ add r15,rbx
+ adc rdx,0
+ mov r8,rdx
+ mov QWORD PTR[584+rsp],r9
+ mov QWORD PTR[592+rsp],r10
+ mov QWORD PTR[600+rsp],r11
+ mov QWORD PTR[608+rsp],r12
+ mov QWORD PTR[616+rsp],r13
+ mov QWORD PTR[624+rsp],r14
+ mov QWORD PTR[632+rsp],r15
+ mov QWORD PTR[640+rsp],r8
+
+
+
+
+
+ jmp mont_reduce
+
+
+mont_mul_a3b ENDP
+
+ALIGN 16
+sqr_reduce PROC PRIVATE
+ mov rcx,QWORD PTR[16+rsp]
+
+
+
+ mov rbx,r10
+
+ mov rax,r11
+ mul rbx
+ mov QWORD PTR[528+rsp],rax
+ mov r10,rdx
+ mov rax,r12
+ mul rbx
+ add r10,rax
+ adc rdx,0
+ mov r11,rdx
+ mov rax,r13
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov r12,rdx
+ mov rax,r14
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ mov r13,rdx
+ mov rax,r15
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ mov r14,rdx
+ mov rax,r8
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ mov r15,rdx
+ mov rax,r9
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov rsi,rdx
+
+ mov QWORD PTR[536+rsp],r10
+
+
+
+
+
+ mov rbx,QWORD PTR[8+rcx]
+
+ mov rax,QWORD PTR[16+rcx]
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[544+rsp],r11
+
+ mov r10,rdx
+ mov rax,QWORD PTR[24+rcx]
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+ mov QWORD PTR[552+rsp],r12
+
+ mov r10,rdx
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ add r13,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ add r14,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+
+ mov r11,rdx
+
+
+
+
+ mov rbx,QWORD PTR[16+rcx]
+
+ mov rax,QWORD PTR[24+rcx]
+ mul rbx
+ add r13,rax
+ adc rdx,0
+ mov QWORD PTR[560+rsp],r13
+
+ mov r10,rdx
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r14,rax
+ adc rdx,0
+ add r14,r10
+ adc rdx,0
+ mov QWORD PTR[568+rsp],r14
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+
+ mov r12,rdx
+
+
+
+
+
+ mov rbx,QWORD PTR[24+rcx]
+
+ mov rax,QWORD PTR[32+rcx]
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[576+rsp],r15
+
+ mov r10,rdx
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add rsi,rax
+ adc rdx,0
+ add rsi,r10
+ adc rdx,0
+ mov QWORD PTR[584+rsp],rsi
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+
+ mov r15,rdx
+
+
+
+
+ mov rbx,QWORD PTR[32+rcx]
+
+ mov rax,QWORD PTR[40+rcx]
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ mov QWORD PTR[592+rsp],r11
+
+ mov r10,rdx
+ mov rax,r8
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ add r12,r10
+ adc rdx,0
+ mov QWORD PTR[600+rsp],r12
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ add r15,r10
+ adc rdx,0
+
+ mov r11,rdx
+
+
+
+
+ mov rbx,QWORD PTR[40+rcx]
+
+ mov rax,r8
+ mul rbx
+ add r15,rax
+ adc rdx,0
+ mov QWORD PTR[608+rsp],r15
+
+ mov r10,rdx
+ mov rax,r9
+ mul rbx
+ add r11,rax
+ adc rdx,0
+ add r11,r10
+ adc rdx,0
+ mov QWORD PTR[616+rsp],r11
+
+ mov r12,rdx
+
+
+
+
+ mov rbx,r8
+
+ mov rax,r9
+ mul rbx
+ add r12,rax
+ adc rdx,0
+ mov QWORD PTR[624+rsp],r12
+
+ mov QWORD PTR[632+rsp],rdx
+
+
+ mov r10,QWORD PTR[528+rsp]
+ mov r11,QWORD PTR[536+rsp]
+ mov r12,QWORD PTR[544+rsp]
+ mov r13,QWORD PTR[552+rsp]
+ mov r14,QWORD PTR[560+rsp]
+ mov r15,QWORD PTR[568+rsp]
+
+ mov rax,QWORD PTR[24+rcx]
+ mul rax
+ mov rdi,rax
+ mov r8,rdx
+
+ add r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,r15
+ adc r8,0
+
+ mov rax,QWORD PTR[rcx]
+ mul rax
+ mov QWORD PTR[520+rsp],rax
+ mov rbx,rdx
+
+ mov rax,QWORD PTR[8+rcx]
+ mul rax
+
+ add r10,rbx
+ adc r11,rax
+ adc rdx,0
+
+ mov rbx,rdx
+ mov QWORD PTR[528+rsp],r10
+ mov QWORD PTR[536+rsp],r11
+
+ mov rax,QWORD PTR[16+rcx]
+ mul rax
+
+ add r12,rbx
+ adc r13,rax
+ adc rdx,0
+
+ mov rbx,rdx
+
+ mov QWORD PTR[544+rsp],r12
+ mov QWORD PTR[552+rsp],r13
+
+ xor rbp,rbp
+ add r14,rbx
+ adc r15,rdi
+ adc rbp,0
+
+ mov QWORD PTR[560+rsp],r14
+ mov QWORD PTR[568+rsp],r15
+
+
+
+
+ mov r10,QWORD PTR[576+rsp]
+ mov r11,QWORD PTR[584+rsp]
+ mov r12,QWORD PTR[592+rsp]
+ mov r13,QWORD PTR[600+rsp]
+ mov r14,QWORD PTR[608+rsp]
+ mov r15,QWORD PTR[616+rsp]
+ mov rdi,QWORD PTR[624+rsp]
+ mov rsi,QWORD PTR[632+rsp]
+
+ mov rax,r9
+ mul rax
+ mov r9,rax
+ mov rbx,rdx
+
+ add r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,r15
+ adc rdi,rdi
+ adc rsi,rsi
+ adc rbx,0
+
+ add r10,rbp
+
+ mov rax,QWORD PTR[32+rcx]
+ mul rax
+
+ add r10,r8
+ adc r11,rax
+ adc rdx,0
+
+ mov rbp,rdx
+
+ mov QWORD PTR[576+rsp],r10
+ mov QWORD PTR[584+rsp],r11
+
+ mov rax,QWORD PTR[40+rcx]
+ mul rax
+
+ add r12,rbp
+ adc r13,rax
+ adc rdx,0
+
+ mov rbp,rdx
+
+ mov QWORD PTR[592+rsp],r12
+ mov QWORD PTR[600+rsp],r13
+
+ mov rax,QWORD PTR[48+rcx]
+ mul rax
+
+ add r14,rbp
+ adc r15,rax
+ adc rdx,0
+
+ mov QWORD PTR[608+rsp],r14
+ mov QWORD PTR[616+rsp],r15
+
+ add rdi,rdx
+ adc rsi,r9
+ adc rbx,0
+
+ mov QWORD PTR[624+rsp],rdi
+ mov QWORD PTR[632+rsp],rsi
+ mov QWORD PTR[640+rsp],rbx
+
+ jmp mont_reduce
+
+
+sqr_reduce ENDP
+PUBLIC mod_exp_512
+
+mod_exp_512 PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_mod_exp_512::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r8,rsp
+ sub rsp,2688
+ and rsp,-64
+
+
+ mov QWORD PTR[rsp],r8
+ mov QWORD PTR[8+rsp],rdi
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],rcx
+$L$body::
+
+
+
+ pxor xmm4,xmm4
+ movdqu xmm0,XMMWORD PTR[rsi]
+ movdqu xmm1,XMMWORD PTR[16+rsi]
+ movdqu xmm2,XMMWORD PTR[32+rsi]
+ movdqu xmm3,XMMWORD PTR[48+rsi]
+ movdqa XMMWORD PTR[512+rsp],xmm4
+ movdqa XMMWORD PTR[528+rsp],xmm4
+ movdqa XMMWORD PTR[608+rsp],xmm4
+ movdqa XMMWORD PTR[624+rsp],xmm4
+ movdqa XMMWORD PTR[544+rsp],xmm0
+ movdqa XMMWORD PTR[560+rsp],xmm1
+ movdqa XMMWORD PTR[576+rsp],xmm2
+ movdqa XMMWORD PTR[592+rsp],xmm3
+
+
+ movdqu xmm0,XMMWORD PTR[rdx]
+ movdqu xmm1,XMMWORD PTR[16+rdx]
+ movdqu xmm2,XMMWORD PTR[32+rdx]
+ movdqu xmm3,XMMWORD PTR[48+rdx]
+
+ lea rbx,QWORD PTR[384+rsp]
+ mov QWORD PTR[136+rsp],rbx
+ call mont_reduce
+
+
+ lea rcx,QWORD PTR[448+rsp]
+ xor rax,rax
+ mov QWORD PTR[rcx],rax
+ mov QWORD PTR[8+rcx],rax
+ mov QWORD PTR[24+rcx],rax
+ mov QWORD PTR[32+rcx],rax
+ mov QWORD PTR[40+rcx],rax
+ mov QWORD PTR[48+rcx],rax
+ mov QWORD PTR[56+rcx],rax
+ mov QWORD PTR[128+rsp],rax
+ mov QWORD PTR[16+rcx],1
+
+ lea rbp,QWORD PTR[640+rsp]
+ mov rsi,rcx
+ mov rdi,rbp
+ mov rax,8
+loop_0::
+ mov rbx,QWORD PTR[rcx]
+ mov WORD PTR[rdi],bx
+ shr rbx,16
+ mov WORD PTR[64+rdi],bx
+ shr rbx,16
+ mov WORD PTR[128+rdi],bx
+ shr rbx,16
+ mov WORD PTR[192+rdi],bx
+ lea rcx,QWORD PTR[8+rcx]
+ lea rdi,QWORD PTR[256+rdi]
+ dec rax
+ jnz loop_0
+ mov rax,31
+ mov QWORD PTR[32+rsp],rax
+ mov QWORD PTR[40+rsp],rbp
+
+ mov QWORD PTR[136+rsp],rsi
+ mov r10,QWORD PTR[rsi]
+ mov r11,QWORD PTR[8+rsi]
+ mov r12,QWORD PTR[16+rsi]
+ mov r13,QWORD PTR[24+rsi]
+ mov r14,QWORD PTR[32+rsi]
+ mov r15,QWORD PTR[40+rsi]
+ mov r8,QWORD PTR[48+rsi]
+ mov r9,QWORD PTR[56+rsi]
+init_loop::
+ lea rdi,QWORD PTR[384+rsp]
+ call mont_mul_a3b
+ lea rsi,QWORD PTR[448+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ add rbp,2
+ mov QWORD PTR[40+rsp],rbp
+ mov rcx,rsi
+ mov rax,8
+loop_1::
+ mov rbx,QWORD PTR[rcx]
+ mov WORD PTR[rbp],bx
+ shr rbx,16
+ mov WORD PTR[64+rbp],bx
+ shr rbx,16
+ mov WORD PTR[128+rbp],bx
+ shr rbx,16
+ mov WORD PTR[192+rbp],bx
+ lea rcx,QWORD PTR[8+rcx]
+ lea rbp,QWORD PTR[256+rbp]
+ dec rax
+ jnz loop_1
+ mov rax,QWORD PTR[32+rsp]
+ sub rax,1
+ mov QWORD PTR[32+rsp],rax
+ jne init_loop
+
+
+
+ movdqa XMMWORD PTR[64+rsp],xmm0
+ movdqa XMMWORD PTR[80+rsp],xmm1
+ movdqa XMMWORD PTR[96+rsp],xmm2
+ movdqa XMMWORD PTR[112+rsp],xmm3
+
+
+
+
+
+ mov eax,DWORD PTR[126+rsp]
+ mov rdx,rax
+ shr rax,11
+ and edx,007FFh
+ mov DWORD PTR[126+rsp],edx
+ lea rsi,QWORD PTR[640+rax*2+rsp]
+ mov rdx,QWORD PTR[8+rsp]
+ mov rbp,4
+loop_2::
+ movzx rbx,WORD PTR[192+rsi]
+ movzx rax,WORD PTR[448+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[128+rsi]
+ mov ax,WORD PTR[384+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[64+rsi]
+ mov ax,WORD PTR[320+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[rsi]
+ mov ax,WORD PTR[256+rsi]
+ mov QWORD PTR[rdx],rbx
+ mov QWORD PTR[8+rdx],rax
+ lea rsi,QWORD PTR[512+rsi]
+ lea rdx,QWORD PTR[16+rdx]
+ sub rbp,1
+ jnz loop_2
+ mov QWORD PTR[48+rsp],505
+
+ mov rcx,QWORD PTR[8+rsp]
+ mov QWORD PTR[136+rsp],rcx
+ mov r10,QWORD PTR[rcx]
+ mov r11,QWORD PTR[8+rcx]
+ mov r12,QWORD PTR[16+rcx]
+ mov r13,QWORD PTR[24+rcx]
+ mov r14,QWORD PTR[32+rcx]
+ mov r15,QWORD PTR[40+rcx]
+ mov r8,QWORD PTR[48+rcx]
+ mov r9,QWORD PTR[56+rcx]
+ jmp sqr_2
+
+main_loop_a3b::
+ call sqr_reduce
+ call sqr_reduce
+ call sqr_reduce
+sqr_2::
+ call sqr_reduce
+ call sqr_reduce
+
+
+
+ mov rcx,QWORD PTR[48+rsp]
+ mov rax,rcx
+ shr rax,4
+ mov edx,DWORD PTR[64+rax*2+rsp]
+ and rcx,15
+ shr rdx,cl
+ and rdx,01Fh
+
+ lea rsi,QWORD PTR[640+rdx*2+rsp]
+ lea rdx,QWORD PTR[448+rsp]
+ mov rdi,rdx
+ mov rbp,4
+loop_3::
+ movzx rbx,WORD PTR[192+rsi]
+ movzx rax,WORD PTR[448+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[128+rsi]
+ mov ax,WORD PTR[384+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[64+rsi]
+ mov ax,WORD PTR[320+rsi]
+ shl rbx,16
+ shl rax,16
+ mov bx,WORD PTR[rsi]
+ mov ax,WORD PTR[256+rsi]
+ mov QWORD PTR[rdx],rbx
+ mov QWORD PTR[8+rdx],rax
+ lea rsi,QWORD PTR[512+rsi]
+ lea rdx,QWORD PTR[16+rdx]
+ sub rbp,1
+ jnz loop_3
+ mov rsi,QWORD PTR[8+rsp]
+ call mont_mul_a3b
+
+
+
+ mov rcx,QWORD PTR[48+rsp]
+ sub rcx,5
+ mov QWORD PTR[48+rsp],rcx
+ jge main_loop_a3b
+
+
+
+end_main_loop_a3b::
+
+
+ mov rdx,QWORD PTR[8+rsp]
+ pxor xmm4,xmm4
+ movdqu xmm0,XMMWORD PTR[rdx]
+ movdqu xmm1,XMMWORD PTR[16+rdx]
+ movdqu xmm2,XMMWORD PTR[32+rdx]
+ movdqu xmm3,XMMWORD PTR[48+rdx]
+ movdqa XMMWORD PTR[576+rsp],xmm4
+ movdqa XMMWORD PTR[592+rsp],xmm4
+ movdqa XMMWORD PTR[608+rsp],xmm4
+ movdqa XMMWORD PTR[624+rsp],xmm4
+ movdqa XMMWORD PTR[512+rsp],xmm0
+ movdqa XMMWORD PTR[528+rsp],xmm1
+ movdqa XMMWORD PTR[544+rsp],xmm2
+ movdqa XMMWORD PTR[560+rsp],xmm3
+ call mont_reduce
+
+
+
+ mov rax,QWORD PTR[8+rsp]
+ mov r8,QWORD PTR[rax]
+ mov r9,QWORD PTR[8+rax]
+ mov r10,QWORD PTR[16+rax]
+ mov r11,QWORD PTR[24+rax]
+ mov r12,QWORD PTR[32+rax]
+ mov r13,QWORD PTR[40+rax]
+ mov r14,QWORD PTR[48+rax]
+ mov r15,QWORD PTR[56+rax]
+
+
+ mov rbx,QWORD PTR[24+rsp]
+ add rbx,512
+
+ sub r8,QWORD PTR[rbx]
+ sbb r9,QWORD PTR[8+rbx]
+ sbb r10,QWORD PTR[16+rbx]
+ sbb r11,QWORD PTR[24+rbx]
+ sbb r12,QWORD PTR[32+rbx]
+ sbb r13,QWORD PTR[40+rbx]
+ sbb r14,QWORD PTR[48+rbx]
+ sbb r15,QWORD PTR[56+rbx]
+
+
+ mov rsi,QWORD PTR[rax]
+ mov rdi,QWORD PTR[8+rax]
+ mov rcx,QWORD PTR[16+rax]
+ mov rdx,QWORD PTR[24+rax]
+ cmovnc rsi,r8
+ cmovnc rdi,r9
+ cmovnc rcx,r10
+ cmovnc rdx,r11
+ mov QWORD PTR[rax],rsi
+ mov QWORD PTR[8+rax],rdi
+ mov QWORD PTR[16+rax],rcx
+ mov QWORD PTR[24+rax],rdx
+
+ mov rsi,QWORD PTR[32+rax]
+ mov rdi,QWORD PTR[40+rax]
+ mov rcx,QWORD PTR[48+rax]
+ mov rdx,QWORD PTR[56+rax]
+ cmovnc rsi,r12
+ cmovnc rdi,r13
+ cmovnc rcx,r14
+ cmovnc rdx,r15
+ mov QWORD PTR[32+rax],rsi
+ mov QWORD PTR[40+rax],rdi
+ mov QWORD PTR[48+rax],rcx
+ mov QWORD PTR[56+rax],rdx
+
+ mov rsi,QWORD PTR[rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbx,QWORD PTR[32+rsi]
+ mov rbp,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_mod_exp_512::
+mod_exp_512 ENDP
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+mod_exp_512_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$body]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov rax,QWORD PTR[rax]
+
+ mov rbx,QWORD PTR[32+rax]
+ mov rbp,QWORD PTR[40+rax]
+ mov r12,QWORD PTR[24+rax]
+ mov r13,QWORD PTR[16+rax]
+ mov r14,QWORD PTR[8+rax]
+ mov r15,QWORD PTR[rax]
+ lea rax,QWORD PTR[48+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+mod_exp_512_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_mod_exp_512
+ DD imagerel $L$SEH_end_mod_exp_512
+ DD imagerel $L$SEH_info_mod_exp_512
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_mod_exp_512::
+DB 9,0,0,0
+ DD imagerel mod_exp_512_se_handler
+
+.xdata ENDS
+END
diff --git a/win-x86_64/crypto/cpu-x86_64-asm.asm b/win-x86_64/crypto/cpu-x86_64-asm.asm
new file mode 100644
index 0000000..c92d7bb
--- /dev/null
+++ b/win-x86_64/crypto/cpu-x86_64-asm.asm
@@ -0,0 +1,154 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+global OPENSSL_ia32_cpuid
+
+ALIGN 16
+OPENSSL_ia32_cpuid:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_OPENSSL_ia32_cpuid:
+ mov rdi,rcx
+
+
+
+
+ mov rdi,rcx
+ mov r8,rbx
+
+ xor eax,eax
+ mov DWORD[8+rdi],eax
+ cpuid
+ mov r11d,eax
+
+ xor eax,eax
+ cmp ebx,0x756e6547
+ setne al
+ mov r9d,eax
+ cmp edx,0x49656e69
+ setne al
+ or r9d,eax
+ cmp ecx,0x6c65746e
+ setne al
+ or r9d,eax
+ jz NEAR $L$intel
+
+ cmp ebx,0x68747541
+ setne al
+ mov r10d,eax
+ cmp edx,0x69746E65
+ setne al
+ or r10d,eax
+ cmp ecx,0x444D4163
+ setne al
+ or r10d,eax
+ jnz NEAR $L$intel
+
+
+
+
+ mov eax,0x80000000
+ cpuid
+
+
+ cmp eax,0x80000001
+ jb NEAR $L$intel
+ mov r10d,eax
+ mov eax,0x80000001
+ cpuid
+
+
+ or r9d,ecx
+ and r9d,0x00000801
+
+ cmp r10d,0x80000008
+ jb NEAR $L$intel
+
+ mov eax,0x80000008
+ cpuid
+
+ movzx r10,cl
+ inc r10
+
+ mov eax,1
+ cpuid
+
+ bt edx,28
+ jnc NEAR $L$generic
+ shr ebx,16
+ cmp bl,r10b
+ ja NEAR $L$generic
+ and edx,0xefffffff
+ jmp NEAR $L$generic
+
+$L$intel:
+ cmp r11d,4
+ mov r10d,-1
+ jb NEAR $L$nocacheinfo
+
+ mov eax,4
+ mov ecx,0
+ cpuid
+ mov r10d,eax
+ shr r10d,14
+ and r10d,0xfff
+
+ cmp r11d,7
+ jb NEAR $L$nocacheinfo
+
+ mov eax,7
+ xor ecx,ecx
+ cpuid
+ mov DWORD[8+rdi],ebx
+
+$L$nocacheinfo:
+ mov eax,1
+ cpuid
+
+ and edx,0xbfefffff
+ cmp r9d,0
+ jne NEAR $L$notintel
+ or edx,0x40000000
+$L$notintel:
+ bt edx,28
+ jnc NEAR $L$generic
+ and edx,0xefffffff
+ cmp r10d,0
+ je NEAR $L$generic
+
+ or edx,0x10000000
+ shr ebx,16
+ cmp bl,1
+ ja NEAR $L$generic
+ and edx,0xefffffff
+$L$generic:
+ and r9d,0x00000800
+ and ecx,0xfffff7ff
+ or r9d,ecx
+
+ mov r10d,edx
+ bt r9d,27
+ jnc NEAR $L$clear_avx
+ xor ecx,ecx
+DB 0x0f,0x01,0xd0
+ and eax,6
+ cmp eax,6
+ je NEAR $L$done
+$L$clear_avx:
+ mov eax,0xefffe7ff
+ and r9d,eax
+ and DWORD[8+rdi],0xffffffdf
+$L$done:
+ mov DWORD[4+rdi],r9d
+ mov DWORD[rdi],r10d
+ mov rbx,r8
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_OPENSSL_ia32_cpuid:
+