diff options
author | Catalin Ionita <catalin.ionita@intel.com> | 2012-09-13 01:20:41 +0300 |
---|---|---|
committer | Brian Carlstrom <bdc@google.com> | 2012-09-12 12:27:44 -0700 |
commit | c58cd0fd2cebb61b0e0f200e01562c542525ef46 (patch) | |
tree | 9d111acd77a9488e44b0b8527450028f7509c4b7 /crypto/aes | |
parent | fef450d35d0654d04dc142ecbb62b40535f53f47 (diff) | |
download | replicant_openssl-c58cd0fd2cebb61b0e0f200e01562c542525ef46.zip replicant_openssl-c58cd0fd2cebb61b0e0f200e01562c542525ef46.tar.gz replicant_openssl-c58cd0fd2cebb61b0e0f200e01562c542525ef46.tar.bz2 |
Enable openssl crypto optimizations for x86 platform
Asm files attached to this patch were generated from the
current OpenSSL version.
Change-Id: I05ef67a6e34016ef94a0ef23ca264bcac805b1cc
Signed-off-by: Catalin Ionita <catalin.ionita@intel.com>
Diffstat (limited to 'crypto/aes')
-rw-r--r-- | crypto/aes/asm/aes-586.s | 3236 | ||||
-rw-r--r-- | crypto/aes/asm/aesni-x86.s | 2143 | ||||
-rw-r--r-- | crypto/aes/asm/vpaes-x86.s | 661 |
3 files changed, 6040 insertions, 0 deletions
diff --git a/crypto/aes/asm/aes-586.s b/crypto/aes/asm/aes-586.s new file mode 100644 index 0000000..f69b7d5 --- /dev/null +++ b/crypto/aes/asm/aes-586.s @@ -0,0 +1,3236 @@ +.file "aes-586.s" +.text +.type _x86_AES_encrypt_compact,@function +.align 16 +_x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %ecx,%ebp + xorl %edi,%esi + xorl %esi,%ecx + roll $24,%ecx + xorl %esi,%ecx + rorl $16,%ebp + xorl %ebp,%ecx + rorl $8,%ebp + xorl %ebp,%ecx + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %edx,%ebp + xorl %edi,%esi + xorl %esi,%edx + roll $24,%edx + xorl %esi,%edx + rorl $16,%ebp + xorl %ebp,%edx + rorl $8,%ebp + xorl %ebp,%edx + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %eax,%ebp + xorl %edi,%esi + xorl %esi,%eax + roll $24,%eax + xorl %esi,%eax + rorl $16,%ebp + xorl %ebp,%eax + rorl $8,%ebp + xorl %ebp,%eax + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%edi + subl %ebp,%esi + andl $4278124286,%edi + andl $454761243,%esi + movl %ebx,%ebp + xorl %edi,%esi + xorl %esi,%ebx + roll $24,%ebx + xorl %esi,%ebx + rorl $16,%ebp + xorl %ebp,%ebx + rorl $8,%ebp + xorl %ebp,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact +.type _sse_AES_encrypt_compact,@function +.align 16 +_sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + pshufw $13,%mm0,%mm2 + movzbl %ah,%edx + movzbl -128(%ebp,%edx,1),%edx + shll $8,%edx + shrl $16,%eax + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + pshufw $8,%mm4,%mm6 + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movd %ecx,%mm0 + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + movd %mm2,%eax + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + movd %mm6,%ebx + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm1 + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%ecx + shrl $16,%ebx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + shrl $16,%eax + punpckldq %mm1,%mm0 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + andl $255,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $16,%eax + orl %eax,%edx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm4 + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + orl %ebx,%edx + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L001loop +.align 16 +.L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact +.type _x86_AES_encrypt,@function +.align 16 +_x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.size _x86_AES_encrypt,.-_x86_AES_encrypt +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +AES_encrypt: +.L_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Te-.L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_encrypt,.-.L_AES_encrypt_begin +.type _x86_AES_decrypt_compact,@function +.align 16 +_x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + roll $24,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $16,%ebx + xorl %ebp,%ecx + roll $8,%ebp + xorl %eax,%ecx + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + roll $24,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $16,%ecx + xorl %ebp,%edx + roll $8,%ebp + xorl %ebx,%edx + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + roll $24,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $16,%edx + xorl %ebp,%eax + roll $8,%ebp + xorl %ecx,%eax + xorl %edx,%eax + xorl %ebp,%eax + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%edi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + roll $24,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $16,%edx + xorl %ebp,%ebx + roll $8,%ebp + xorl %ecx,%ebx + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact +.type _sse_AES_decrypt_compact,@function +.align 16 +_sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L007loop: + pshufw $12,%mm0,%mm1 + movd %mm1,%eax + pshufw $9,%mm4,%mm5 + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%ecx + movd %mm5,%ebx + movzbl %ah,%edx + movzbl -128(%ebp,%edx,1),%edx + shll $8,%edx + pshufw $6,%mm0,%mm2 + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%ecx + shrl $16,%eax + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + pshufw $3,%mm4,%mm6 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%ecx + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + movd %ecx,%mm0 + movzbl %al,%esi + movd %mm2,%eax + movzbl -128(%ebp,%esi,1),%ecx + shll $16,%ecx + movzbl %bl,%esi + movd %mm6,%ebx + movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl %bl,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%edx + movd %edx,%mm1 + movzbl %ah,%esi + movzbl -128(%ebp,%esi,1),%edx + shll $8,%edx + movzbl %bh,%esi + shrl $16,%eax + movzbl -128(%ebp,%esi,1),%esi + shll $24,%esi + orl %esi,%edx + shrl $16,%ebx + punpckldq %mm1,%mm0 + movzbl %bh,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $8,%esi + orl %esi,%ecx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + orl %ebx,%edx + movzbl %al,%esi + movzbl -128(%ebp,%esi,1),%esi + shll $16,%esi + orl %esi,%edx + movd %edx,%mm4 + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + orl %eax,%ecx + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L007loop +.align 16 +.L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact +.type _x86_AES_decrypt,@function +.align 16 +_x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.size _x86_AES_decrypt,.-_x86_AES_decrypt +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +AES_decrypt: +.L_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L010pic_point +.L010pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Td-.L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_decrypt,.-.L_AES_decrypt_begin +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: +.L_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L012drop_out + call .L013pic_point +.L013pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + cmpl $0,40(%esp) + leal .LAES_Te-.L013pic_point(%ebp),%ebp + jne .L014picked_te + leal .LAES_Td-.LAES_Te(%ebp),%ebp +.L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb .L015slow_way + testl $15,%ecx + jnz .L015slow_way + btl $28,(%eax) + jc .L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb .L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp .L017tbl_ok +.align 4 +.L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 4 +.L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb .L018do_copy + cmpl $3852,%ebx + jb .L019skip_copy +.align 4 +.L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +.L019skip_copy: + movl $16,%edi +.align 4 +.L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz .L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je .L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 16 +.L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L023skip_ezero: + movl 28(%esp),%esp + popfl +.L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L021fast_decrypt: + cmpl 36(%esp),%esi + je .L024fast_dec_in_place + movl %edi,52(%esp) +.align 4 +.align 16 +.L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L026fast_dec_out +.align 16 +.L024fast_dec_in_place: +.L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L027fast_dec_in_place_loop +.align 4 +.L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je .L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb .L030slow_enc_tail + btl $25,52(%esp) + jnc .L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 16 +.L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L032slow_enc_loop_sse + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L033slow_enc_loop_x86 + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L034enc_in_place +.align 4 +.long 2767451785 + jmp .L035enc_skip_in_place +.L034enc_in_place: + leal (%edi,%ecx,1),%edi +.L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp .L033slow_enc_loop_x86 +.align 16 +.L029slow_decrypt: + btl $25,52(%esp) + jnc .L036slow_dec_loop_x86 +.align 4 +.L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc .L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz .L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz .L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin +.type _x86_AES_set_encrypt_key,@function +.align 16 +_x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz .L040badpointer + testl $-1,%edi + jz .L040badpointer + call .L041pic_point +.L041pic_point: + popl %ebp + leal .LAES_Te-.L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je .L04210rounds + cmpl $192,%ecx + je .L04312rounds + cmpl $256,%ecx + je .L04414rounds + movl $-2,%eax + jmp .L045exit +.L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp .L04610shortcut +.align 4 +.L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +.L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl .L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp .L045exit +.L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp .L04812shortcut +.align 4 +.L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +.L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je .L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp .L04912loop +.L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp .L045exit +.L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp .L05114shortcut +.align 4 +.L05214loop: + movl 28(%edi),%edx +.L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je .L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp .L05214loop +.L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp .L045exit +.L040badpointer: + movl $-1,%eax +.L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function +.align 16 +private_AES_set_encrypt_key: +.L_private_AES_set_encrypt_key_begin: + call _x86_AES_set_encrypt_key + ret +.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function +.align 16 +private_AES_set_decrypt_key: +.L_private_AES_set_decrypt_key_begin: + call _x86_AES_set_encrypt_key + cmpl $0,%eax + je .L054proceed + ret +.L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 4 +.L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne .L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 4 +.L056permute: + addl $16,%edi + movl %eax,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl %ebx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %ecx,%esi + movl %esi,%ecx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl %ecx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ecx,%ecx,1),%edx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %edx,%esi + movl %esi,%edx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl %edx,%esi + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%edx,%edx,1),%eax + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %eax,%esi + movl %esi,%eax + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%eax,%eax,1),%ebx + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %ebx,%esi + movl %esi,%ebx + andl $2155905152,%esi + movl %esi,%ebp + shrl $7,%ebp + leal (%ebx,%ebx,1),%ecx + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb .L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,8,4 diff --git a/crypto/aes/asm/aesni-x86.s b/crypto/aes/asm/aesni-x86.s new file mode 100644 index 0000000..0766bb5 --- /dev/null +++ b/crypto/aes/asm/aesni-x86.s @@ -0,0 +1,2143 @@ +.file "crypto/aes/asm/aesni-x86.s" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L002enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 + movups (%edx),%xmm0 + jnz .L002enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups (%edx),%xmm0 +.L003dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 + movups (%edx),%xmm0 + jnz .L003dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L004enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups (%edx),%xmm0 + jnz .L004enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shrl $1,%ecx + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups (%edx),%xmm0 +.L005dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups (%edx),%xmm0 + jnz .L005dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + jmp .L_aesni_encrypt6_enter +.align 16 +.L006enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + decl %ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.align 16 +.L_aesni_encrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + leal 32(%edx),%edx +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups (%edx),%xmm0 + jnz .L006enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shrl $1,%ecx + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + jmp .L_aesni_decrypt6_enter +.align 16 +.L007dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + decl %ecx +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.align 16 +.L_aesni_decrypt6_enter: + movups 16(%edx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + leal 32(%edx),%edx +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups (%edx),%xmm0 + jnz .L007dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L008ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L009ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L010ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L011ecb_enc_loop6_enter +.align 16 +.L012ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L011ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L012ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L010ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L013ecb_enc_one + movups 16(%esi),%xmm3 + je .L014ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L015ecb_enc_three + movups 48(%esi),%xmm5 + je .L016ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L013ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L017enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L017enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L014ecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L015ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L016ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L008ecb_ret +.align 16 +.L009ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L018ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L019ecb_dec_loop6_enter +.align 16 +.L020ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L019ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L020ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L008ecb_ret +.L018ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L021ecb_dec_one + movups 16(%esi),%xmm3 + je .L022ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L023ecb_dec_three + movups 48(%esi),%xmm5 + je .L024ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L008ecb_ret +.align 16 +.L021ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L025dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L025dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L008ecb_ret +.align 16 +.L022ecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L008ecb_ret +.align 16 +.L023ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L008ecb_ret +.align 16 +.L024ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L008ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shrl $1,%ecx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %ecx,%ebx +.byte 102,15,56,0,253 +.L026ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + leal 32(%ebp),%edx + xorps %xmm0,%xmm3 + movups (%edx),%xmm0 +.L027ccm64_enc2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L027ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + decl %eax + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + jnz .L026ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L028enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L028enc1_loop_5 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + jmp .L029ccm64_dec_outer +.align 16 +.L029ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movl %ebx,%ecx + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L030ccm64_dec_break + movups (%ebp),%xmm0 + shrl $1,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%ebp),%edx + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups (%edx),%xmm0 +.L031ccm64_dec2_loop: +.byte 102,15,56,220,209 + decl %ecx +.byte 102,15,56,220,217 + movups 16(%edx),%xmm1 +.byte 102,15,56,220,208 + leal 32(%edx),%edx +.byte 102,15,56,220,216 + movups (%edx),%xmm0 + jnz .L031ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leal 16(%esi),%esi +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + jmp .L029ccm64_dec_outer +.align 16 +.L030ccm64_dec_break: + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L032enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L033ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm0 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,203,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,197,0 + incl %ebx +.byte 102,15,58,34,203,1 + incl %ebp +.byte 102,15,58,34,197,1 + incl %ebx +.byte 102,15,58,34,203,2 + incl %ebp +.byte 102,15,58,34,197,2 + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + pshufd $192,%xmm1,%xmm2 + pshufd $128,%xmm1,%xmm3 + cmpl $6,%eax + jb .L034ctr32_tail + movdqa %xmm7,32(%esp) + shrl $1,%ecx + movl %edx,%ebp + movl %ecx,%ebx + subl $6,%eax + jmp .L035ctr32_loop6 +.align 16 +.L035ctr32_loop6: + pshufd $64,%xmm1,%xmm4 + movdqa 32(%esp),%xmm1 + pshufd $192,%xmm0,%xmm5 + por %xmm1,%xmm2 + pshufd $128,%xmm0,%xmm6 + por %xmm1,%xmm3 + pshufd $64,%xmm0,%xmm7 + por %xmm1,%xmm4 + por %xmm1,%xmm5 + por %xmm1,%xmm6 + por %xmm1,%xmm7 + movups (%ebp),%xmm0 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + decl %ecx + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 48(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 64(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm1,48(%esp) +.byte 102,15,56,0,202 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm0,64(%esp) +.byte 102,15,56,0,194 + movups %xmm6,64(%edi) + pshufd $192,%xmm1,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movl %ebx,%ecx + pshufd $128,%xmm1,%xmm3 + subl $6,%eax + jnc .L035ctr32_loop6 + addl $6,%eax + jz .L036ctr32_ret + movl %ebp,%edx + leal 1(,%ecx,2),%ecx + movdqa 32(%esp),%xmm7 +.L034ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L037ctr32_one + pshufd $64,%xmm1,%xmm4 + por %xmm7,%xmm3 + je .L038ctr32_two + pshufd $192,%xmm0,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L039ctr32_three + pshufd $128,%xmm0,%xmm6 + por %xmm7,%xmm5 + je .L040ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L036ctr32_ret +.align 16 +.L033ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L037ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L041enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L041enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L036ctr32_ret +.align 16 +.L038ctr32_two: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L036ctr32_ret +.align 16 +.L039ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L036ctr32_ret +.align 16 +.L040ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L036ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L042enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L042enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L043xts_enc_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L044xts_enc_loop6 +.align 16 +.L044xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,220,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,220,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,241 + movups (%edx),%xmm0 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L044xts_enc_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L043xts_enc_short: + addl $96,%eax + jz .L045xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L046xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L047xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L048xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L049xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L050xts_enc_done +.align 16 +.L046xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L051enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L051enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L047xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L048xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L049xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L050xts_enc_done +.align 16 +.L045xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L052xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L053xts_enc_steal +.align 16 +.L050xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L052xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L053xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L053xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L054enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L054enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L052xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L056xts_dec_short + shrl $1,%ecx + movl %ecx,%ebx + jmp .L057xts_dec_loop6 +.align 16 +.L057xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + leal 32(%ebp),%edx + pxor 16(%esp),%xmm3 +.byte 102,15,56,222,209 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,217 + pxor 48(%esp),%xmm5 + decl %ecx +.byte 102,15,56,222,225 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,241 + movups (%edx),%xmm0 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + movl %ebx,%ecx + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L057xts_dec_loop6 + leal 1(,%ecx,2),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L056xts_dec_short: + addl $96,%eax + jz .L058xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L059xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L060xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L061xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L062xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L063xts_dec_done +.align 16 +.L059xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L064dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L064dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L060xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L061xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L062xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L063xts_dec_done +.align 16 +.L058xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L065xts_dec_ret + movl %eax,112(%esp) + jmp .L066xts_dec_only_one_more +.align 16 +.L063xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L065xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L066xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L067dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L067dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L068xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L068xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L069dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L069dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L065xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L070cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L071cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L072cbc_enc_tail + subl $16,%eax + jmp .L073cbc_enc_loop +.align 16 +.L073cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L074enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L074enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L073cbc_enc_loop + addl $16,%eax + jnz .L072cbc_enc_tail + movaps %xmm2,%xmm7 + jmp .L075cbc_ret +.L072cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L073cbc_enc_loop +.align 16 +.L071cbc_decrypt: + cmpl $80,%eax + jbe .L076cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L077cbc_dec_loop6_enter +.align 16 +.L078cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L077cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L078cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L079cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L076cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L080cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L081cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L082cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L083cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L080cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L084dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L084dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L081cbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L082cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L079cbc_dec_tail_collected +.align 16 +.L083cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +.L079cbc_dec_tail_collected: + andl $15,%eax + jnz .L085cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp .L075cbc_ret +.align 16 +.L085cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +.L075cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +.L070cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + testl %eax,%eax + jz .L086bad_pointer + testl %edx,%edx + jz .L086bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je .L08714rounds + cmpl $192,%ecx + je .L08812rounds + cmpl $128,%ecx + jne .L089bad_keybits +.align 16 +.L09010rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L091key_128_cold +.byte 102,15,58,223,200,2 + call .L092key_128 +.byte 102,15,58,223,200,4 + call .L092key_128 +.byte 102,15,58,223,200,8 + call .L092key_128 +.byte 102,15,58,223,200,16 + call .L092key_128 +.byte 102,15,58,223,200,32 + call .L092key_128 +.byte 102,15,58,223,200,64 + call .L092key_128 +.byte 102,15,58,223,200,128 + call .L092key_128 +.byte 102,15,58,223,200,27 + call .L092key_128 +.byte 102,15,58,223,200,54 + call .L092key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 16 +.L092key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L091key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L08812rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L093key_192a_cold +.byte 102,15,58,223,202,2 + call .L094key_192b +.byte 102,15,58,223,202,4 + call .L095key_192a +.byte 102,15,58,223,202,8 + call .L094key_192b +.byte 102,15,58,223,202,16 + call .L095key_192a +.byte 102,15,58,223,202,32 + call .L094key_192b +.byte 102,15,58,223,202,64 + call .L095key_192a +.byte 102,15,58,223,202,128 + call .L094key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 16 +.L095key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L093key_192a_cold: + movaps %xmm2,%xmm5 +.L096key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L094key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L096key_192b_warm +.align 16 +.L08714rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L097key_256a_cold +.byte 102,15,58,223,200,1 + call .L098key_256b +.byte 102,15,58,223,202,2 + call .L099key_256a +.byte 102,15,58,223,200,2 + call .L098key_256b +.byte 102,15,58,223,202,4 + call .L099key_256a +.byte 102,15,58,223,200,4 + call .L098key_256b +.byte 102,15,58,223,202,8 + call .L099key_256a +.byte 102,15,58,223,200,8 + call .L098key_256b +.byte 102,15,58,223,202,16 + call .L099key_256a +.byte 102,15,58,223,200,16 + call .L098key_256b +.byte 102,15,58,223,202,32 + call .L099key_256a +.byte 102,15,58,223,200,32 + call .L098key_256b +.byte 102,15,58,223,202,64 + call .L099key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 16 +.L099key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L097key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L098key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4 +.L086bad_pointer: + movl $-1,%eax + ret +.align 4 +.L089bad_keybits: + movl $-2,%eax + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L100dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L101dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L101dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +.L100dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/crypto/aes/asm/vpaes-x86.s b/crypto/aes/asm/vpaes-x86.s new file mode 100644 index 0000000..c53a507 --- /dev/null +++ b/crypto/aes/asm/vpaes-x86.s @@ -0,0 +1,661 @@ +.file "vpaes-x86.s" +.text +.align 64 +.L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 64 +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.size _vpaes_preheat,.-_vpaes_preheat +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%edx),%xmm5 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addl $16,%edx + leal 192(%ebp),%ebx + jmp .L000enc_entry +.align 16 +.L001enc_loop: + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa 64(%ebp),%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%ebx,%ecx,1),%xmm1 + movdqa 80(%ebp),%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm3,%xmm0 + subl $1,%eax +.L000enc_entry: + movdqa %xmm6,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm7,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz .L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movl 240(%edx),%eax + leal 608(%ebp),%ebx + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp .L002dec_entry +.align 16 +.L003dec_loop: + movdqa -32(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,56,0,197 + movdqa (%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subl $1,%eax +.byte 102,15,56,0,197 + movdqa 32(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqa 64(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%ebx),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,58,15,237,12 +.L002dec_entry: + movdqa %xmm6,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm7,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%edx),%xmm0 + jnz .L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz .L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp .L005schedule_go +.L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +.L005schedule_go: + cmpl $192,%eax + ja .L006schedule_256 + je .L007schedule_192 +.L008schedule_128: + movl $10,%eax +.L009loop_schedule_128: + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + jmp .L009loop_schedule_128 +.align 16 +.L007schedule_192: + movdqu 8(%esi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +.L011loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .L011loop_schedule_192 +.align 16 +.L006schedule_256: + movdqu 16(%esi),%xmm0 + call _vpaes_schedule_transform + movl $7,%eax +.L012loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call .L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp .L012loop_schedule_256 +.align 16 +.L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz .L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +.L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +.L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz .L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp .L015schedule_mangle_both +.align 16 +.L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +.L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal .L_vpaes_consts+0x30-.L016pic_point,%ebp + call _vpaes_schedule_core +.L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal .L_vpaes_consts+0x30-.L017pic_point,%ebp + call _vpaes_schedule_core +.L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L018pic_point,%ebp + call _vpaes_preheat +.L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_encrypt,.-.L_vpaes_encrypt_begin +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L019pic_point,%ebp + call _vpaes_preheat +.L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_decrypt,.-.L_vpaes_decrypt_begin +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc .L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + call _vpaes_preheat +.L021pic_point: + cmpl $0,%ecx + je .L022cbc_dec_loop + jmp .L023cbc_enc_loop +.align 16 +.L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call _vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L023cbc_enc_loop + jmp .L024cbc_done +.align 16 +.L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call _vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L022cbc_dec_loop +.L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +.L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin |