summaryrefslogtreecommitdiffstats
path: root/mac-x86/crypto/sha/sha256-586.S
diff options
context:
space:
mode:
Diffstat (limited to 'mac-x86/crypto/sha/sha256-586.S')
-rw-r--r--mac-x86/crypto/sha/sha256-586.S236
1 files changed, 218 insertions, 18 deletions
diff --git a/mac-x86/crypto/sha/sha256-586.S b/mac-x86/crypto/sha/sha256-586.S
index 4615588..f0ba612 100644
--- a/mac-x86/crypto/sha/sha256-586.S
+++ b/mac-x86/crypto/sha/sha256-586.S
@@ -36,15 +36,17 @@ L000pic_point:
jz L003no_xmm
andl $1073741824,%ecx
andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz L004shaext
orl %ebx,%ecx
andl $1342177280,%ecx
cmpl $1342177280,%ecx
testl $512,%ebx
- jnz L004SSSE3
+ jnz L005SSSE3
L003no_xmm:
subl %edi,%eax
cmpl $256,%eax
- jae L005unrolled
+ jae L006unrolled
jmp L002loop
.align 4,0x90
L002loop:
@@ -116,7 +118,7 @@ L002loop:
movl %ecx,28(%esp)
movl %edi,32(%esp)
.align 4,0x90
-L00600_15:
+L00700_15:
movl %edx,%ecx
movl 24(%esp),%esi
rorl $14,%ecx
@@ -154,11 +156,11 @@ L00600_15:
addl $4,%ebp
addl %ebx,%eax
cmpl $3248222580,%esi
- jne L00600_15
+ jne L00700_15
movl 156(%esp),%ecx
- jmp L00716_63
+ jmp L00816_63
.align 4,0x90
-L00716_63:
+L00816_63:
movl %ecx,%ebx
movl 104(%esp),%esi
rorl $11,%ecx
@@ -213,7 +215,7 @@ L00716_63:
addl $4,%ebp
addl %ebx,%eax
cmpl $3329325298,%esi
- jne L00716_63
+ jne L00816_63
movl 356(%esp),%esi
movl 8(%esp),%ebx
movl 16(%esp),%ecx
@@ -257,7 +259,7 @@ L001K256:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.align 4,0x90
-L005unrolled:
+L006unrolled:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebp
@@ -274,9 +276,9 @@ L005unrolled:
movl %ebx,20(%esp)
movl %ecx,24(%esp)
movl %esi,28(%esp)
- jmp L008grand_loop
+ jmp L009grand_loop
.align 4,0x90
-L008grand_loop:
+L009grand_loop:
movl (%edi),%ebx
movl 4(%edi),%ecx
bswap %ebx
@@ -3156,7 +3158,7 @@ L008grand_loop:
movl %ebx,24(%esp)
movl %ecx,28(%esp)
cmpl 104(%esp),%edi
- jb L008grand_loop
+ jb L009grand_loop
movl 108(%esp),%esp
popl %edi
popl %esi
@@ -3164,7 +3166,205 @@ L008grand_loop:
popl %ebp
ret
.align 5,0x90
-L004SSSE3:
+L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp L010loop_shaext
+.align 4,0x90
+L010loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz L010loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 5,0x90
+L005SSSE3:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebx
@@ -3183,9 +3383,9 @@ L004SSSE3:
movl %ecx,24(%esp)
movl %esi,28(%esp)
movdqa 256(%ebp),%xmm7
- jmp L009grand_ssse3
+ jmp L011grand_ssse3
.align 4,0x90
-L009grand_ssse3:
+L011grand_ssse3:
movdqu (%edi),%xmm0
movdqu 16(%edi),%xmm1
movdqu 32(%edi),%xmm2
@@ -3208,9 +3408,9 @@ L009grand_ssse3:
paddd %xmm3,%xmm7
movdqa %xmm6,64(%esp)
movdqa %xmm7,80(%esp)
- jmp L010ssse3_00_47
+ jmp L012ssse3_00_47
.align 4,0x90
-L010ssse3_00_47:
+L012ssse3_00_47:
addl $64,%ebp
movl %edx,%ecx
movdqa %xmm1,%xmm4
@@ -3853,7 +4053,7 @@ L010ssse3_00_47:
addl %ecx,%eax
movdqa %xmm6,80(%esp)
cmpl $66051,64(%ebp)
- jne L010ssse3_00_47
+ jne L012ssse3_00_47
movl %edx,%ecx
rorl $14,%edx
movl 20(%esp),%esi
@@ -4367,7 +4567,7 @@ L010ssse3_00_47:
movdqa 64(%ebp),%xmm7
subl $192,%ebp
cmpl 104(%esp),%edi
- jb L009grand_ssse3
+ jb L011grand_ssse3
movl 108(%esp),%esp
popl %edi
popl %esi