summaryrefslogtreecommitdiffstats
path: root/linux-x86
diff options
context:
space:
mode:
Diffstat (limited to 'linux-x86')
-rw-r--r--linux-x86/crypto/sha/sha1-586.S189
-rw-r--r--linux-x86/crypto/sha/sha256-586.S236
2 files changed, 399 insertions, 26 deletions
diff --git a/linux-x86/crypto/sha/sha1-586.S b/linux-x86/crypto/sha/sha1-586.S
index dd86f31..808ccac 100644
--- a/linux-x86/crypto/sha/sha1-586.S
+++ b/linux-x86/crypto/sha/sha1-586.S
@@ -23,6 +23,8 @@ sha1_block_data_order:
movl 8(%esi),%ecx
testl $16777216,%eax
jz .L001x86
+ testl $536870912,%ecx
+ jnz .Lshaext_shortcut
jmp .Lssse3_shortcut
.align 16
.L001x86:
@@ -1391,10 +1393,10 @@ sha1_block_data_order:
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.hidden _sha1_block_data_order_ssse3
-.type _sha1_block_data_order_ssse3,@function
+.hidden _sha1_block_data_order_shaext
+.type _sha1_block_data_order_shaext,@function
.align 16
-_sha1_block_data_order_ssse3:
+_sha1_block_data_order_shaext:
pushl %ebp
pushl %ebx
pushl %esi
@@ -1403,6 +1405,177 @@ _sha1_block_data_order_ssse3:
.L003pic_point:
popl %ebp
leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lshaext_shortcut:
+ movl 20(%esp),%edi
+ movl %esp,%ebx
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ subl $32,%esp
+ movdqu (%edi),%xmm0
+ movd 16(%edi),%xmm1
+ andl $-32,%esp
+ movdqa 80(%ebp),%xmm3
+ movdqu (%esi),%xmm4
+ pshufd $27,%xmm0,%xmm0
+ movdqu 16(%esi),%xmm5
+ pshufd $27,%xmm1,%xmm1
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,227
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,235
+.byte 102,15,56,0,243
+.byte 102,15,56,0,251
+ jmp .L004loop_shaext
+.align 16
+.L004loop_shaext:
+ decl %ecx
+ leal 64(%esi),%eax
+ movdqa %xmm1,(%esp)
+ paddd %xmm4,%xmm1
+ cmovnel %eax,%esi
+ movdqa %xmm0,16(%esp)
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+ movdqu (%esi),%xmm4
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+ movdqu 16(%esi),%xmm5
+.byte 102,15,56,0,227
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,235
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,243
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+ movdqa (%esp),%xmm2
+.byte 102,15,56,0,251
+.byte 15,56,200,202
+ paddd 16(%esp),%xmm0
+ jnz .L004loop_shaext
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm1,%xmm1
+ movdqu %xmm0,(%edi)
+ movd %xmm1,16(%edi)
+ movl %ebx,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
+.hidden _sha1_block_data_order_ssse3
+.type _sha1_block_data_order_ssse3,@function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L005pic_point
+.L005pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
.Lssse3_shortcut:
movdqa (%ebp),%xmm7
movdqa 16(%ebp),%xmm0
@@ -1455,9 +1628,9 @@ _sha1_block_data_order_ssse3:
xorl %edx,%ebp
pshufd $238,%xmm0,%xmm4
andl %ebp,%esi
- jmp .L004loop
+ jmp .L006loop
.align 16
-.L004loop:
+.L006loop:
rorl $2,%ebx
xorl %edx,%esi
movl %eax,%ebp
@@ -2360,7 +2533,7 @@ _sha1_block_data_order_ssse3:
addl %edx,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L005done
+ je .L007done
movdqa 160(%esp),%xmm7
movdqa 176(%esp),%xmm6
movdqu (%ebp),%xmm0
@@ -2495,9 +2668,9 @@ _sha1_block_data_order_ssse3:
pshufd $238,%xmm0,%xmm4
andl %ebx,%esi
movl %ebp,%ebx
- jmp .L004loop
+ jmp .L006loop
.align 16
-.L005done:
+.L007done:
addl 16(%esp),%ebx
xorl %edi,%esi
movl %ecx,%ebp
diff --git a/linux-x86/crypto/sha/sha256-586.S b/linux-x86/crypto/sha/sha256-586.S
index 196f7f9..08d9484 100644
--- a/linux-x86/crypto/sha/sha256-586.S
+++ b/linux-x86/crypto/sha/sha256-586.S
@@ -37,15 +37,17 @@ sha256_block_data_order:
jz .L003no_xmm
andl $1073741824,%ecx
andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz .L004shaext
orl %ebx,%ecx
andl $1342177280,%ecx
cmpl $1342177280,%ecx
testl $512,%ebx
- jnz .L004SSSE3
+ jnz .L005SSSE3
.L003no_xmm:
subl %edi,%eax
cmpl $256,%eax
- jae .L005unrolled
+ jae .L006unrolled
jmp .L002loop
.align 16
.L002loop:
@@ -117,7 +119,7 @@ sha256_block_data_order:
movl %ecx,28(%esp)
movl %edi,32(%esp)
.align 16
-.L00600_15:
+.L00700_15:
movl %edx,%ecx
movl 24(%esp),%esi
rorl $14,%ecx
@@ -155,11 +157,11 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3248222580,%esi
- jne .L00600_15
+ jne .L00700_15
movl 156(%esp),%ecx
- jmp .L00716_63
+ jmp .L00816_63
.align 16
-.L00716_63:
+.L00816_63:
movl %ecx,%ebx
movl 104(%esp),%esi
rorl $11,%ecx
@@ -214,7 +216,7 @@ sha256_block_data_order:
addl $4,%ebp
addl %ebx,%eax
cmpl $3329325298,%esi
- jne .L00716_63
+ jne .L00816_63
movl 356(%esp),%esi
movl 8(%esp),%ebx
movl 16(%esp),%ecx
@@ -258,7 +260,7 @@ sha256_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.align 16
-.L005unrolled:
+.L006unrolled:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebp
@@ -275,9 +277,9 @@ sha256_block_data_order:
movl %ebx,20(%esp)
movl %ecx,24(%esp)
movl %esi,28(%esp)
- jmp .L008grand_loop
+ jmp .L009grand_loop
.align 16
-.L008grand_loop:
+.L009grand_loop:
movl (%edi),%ebx
movl 4(%edi),%ecx
bswap %ebx
@@ -3157,7 +3159,7 @@ sha256_block_data_order:
movl %ebx,24(%esp)
movl %ecx,28(%esp)
cmpl 104(%esp),%edi
- jb .L008grand_loop
+ jb .L009grand_loop
movl 108(%esp),%esp
popl %edi
popl %esi
@@ -3165,7 +3167,205 @@ sha256_block_data_order:
popl %ebp
ret
.align 32
-.L004SSSE3:
+.L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .L010loop_shaext
+.align 16
+.L010loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz .L010loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L005SSSE3:
leal -96(%esp),%esp
movl (%esi),%eax
movl 4(%esi),%ebx
@@ -3184,9 +3384,9 @@ sha256_block_data_order:
movl %ecx,24(%esp)
movl %esi,28(%esp)
movdqa 256(%ebp),%xmm7
- jmp .L009grand_ssse3
+ jmp .L011grand_ssse3
.align 16
-.L009grand_ssse3:
+.L011grand_ssse3:
movdqu (%edi),%xmm0
movdqu 16(%edi),%xmm1
movdqu 32(%edi),%xmm2
@@ -3209,9 +3409,9 @@ sha256_block_data_order:
paddd %xmm3,%xmm7
movdqa %xmm6,64(%esp)
movdqa %xmm7,80(%esp)
- jmp .L010ssse3_00_47
+ jmp .L012ssse3_00_47
.align 16
-.L010ssse3_00_47:
+.L012ssse3_00_47:
addl $64,%ebp
movl %edx,%ecx
movdqa %xmm1,%xmm4
@@ -3854,7 +4054,7 @@ sha256_block_data_order:
addl %ecx,%eax
movdqa %xmm6,80(%esp)
cmpl $66051,64(%ebp)
- jne .L010ssse3_00_47
+ jne .L012ssse3_00_47
movl %edx,%ecx
rorl $14,%edx
movl 20(%esp),%esi
@@ -4368,7 +4568,7 @@ sha256_block_data_order:
movdqa 64(%ebp),%xmm7
subl $192,%ebp
cmpl 104(%esp),%edi
- jb .L009grand_ssse3
+ jb .L011grand_ssse3
movl 108(%esp),%esp
popl %edi
popl %esi