diff options
author | Brian Carlstrom <bdc@google.com> | 2010-03-09 09:56:55 -0800 |
---|---|---|
committer | Brian Carlstrom <bdc@google.com> | 2010-03-09 09:56:55 -0800 |
commit | 98d58bb80c64b02a33662f0ea80351d4a1535267 (patch) | |
tree | 888bb3c433b6adb3778f6c3539e67ba6e6fc2639 /crypto/aes | |
parent | 1ddd788de7dc2f73716b032bdaeb988dccd95ab5 (diff) | |
download | replicant_openssl-98d58bb80c64b02a33662f0ea80351d4a1535267.zip replicant_openssl-98d58bb80c64b02a33662f0ea80351d4a1535267.tar.gz replicant_openssl-98d58bb80c64b02a33662f0ea80351d4a1535267.tar.bz2 |
Summary: upgrading to openssl-0.9.8m and adding new testssl.sh
Testing Summary:
- Passed new android.testssl/testssl.sh
- General testing with BrowserActivity based program
Details:
Expanded detail in README.android about how to build and test openssl
upgrades based on my first experience.
modified: README.android
Significant rework of import_openssl.sh script that does most of
the work of the upgrade. Most of the existing code became the main
and import functions. The newly regenerate code helps regenerate
patch files, building on the fact that import now keeps and
original unmodified read-only source tree for use for patch
generation. Patch generation relies on additions to openssl.config
for defining which patches include which files. Note that
sometimes a file may be patched multiple times, in that case
manual review is still necessary to prune the patch after
auto-regeneration. Other enhancements to import_openssl.sh include
generating android.testssl and printing Makefile defines for
android-config.mk review.
modified: import_openssl.sh
Test support files for openssl/
Add support for building /system/bin/ssltest as test executible for
use by testssl script. Need confirmation that this is the right way
to define such a test binary.
modified: patches/ssl_Android.mk
Driver script that generates user and CA keys and certs on the
device with /system/bin/openssl before running testssl. Based on
openssl/test/testss for generation and openssl/test/Makefile
test_ssl for test execution.
new file: patches/testssl.sh
Note all following android.testssl files are automatically
imported from openssl, although possible with modifications by
import_openssl.sh
testssl script imported from openssl/test that does the bulk of
the testing. Includes new tests patched in for our additions.
new file: android.testssl/testssl
CA and user certificate configuration files from openssl.
Automatically imported from openssl/test/
new file: android.testssl/CAss.cnf
new file: android.testssl/Uss.cnf
certificate and key test file imported from openssl/apps
new file: android.testssl/server2.pem
Actual 0.9.8m upgrade specific bits
Trying to bring ngm's small records support into 0.9.8m. Needs
signoff by ngm although it does pass testing.
modified: patches/small_records.patch
Update openssl.config for 0.9.8m. Expanded lists of undeeded
directories and files for easier update and review, adding new
excludes. Also added new definitions to support "import_openssl.sh
regenerate" for patch updating.
modified: openssl.config
Updated OPENSSL_VERSION to 0.9.8m
modified: openssl.version
Automatically imported/patched files. Seems like it could be
further pruned in by openssl.config UNNEEDED_SOURCES, but extra
stuff doesn't end up impacting device.
modified: apps/...
modified: crypto/...
modified: include/...
modified: ssl/...
Other Android build stuff.
Note for these patches/... is source, .../Android.mk is derived.
Split LOCAL_CFLAGS additions into lines based on openssl/Makefile
source for easier comparison when upgrading. I knowingly left the
lines long and unwrapped for easy vdiff with openssl/Makefile
modified: android-config.mk
Removed local -DOPENSSL_NO_ECDH already in android-config.mk.
modified: patches/apps_Android.mk
Sync up with changes that had crept into derived crypto/Android.mk
modified: patches/crypto_Android.mk
Change-Id: I73204c56cdaccfc45d03a9c8088a6a93003d7ce6
Diffstat (limited to 'crypto/aes')
-rw-r--r-- | crypto/aes/aes_cfb.c | 1 | ||||
-rw-r--r-- | crypto/aes/aes_x86core.c | 1063 | ||||
-rw-r--r-- | crypto/aes/asm/aes-armv4.pl | 1030 | ||||
-rw-r--r-- | crypto/aes/asm/aes-ppc.pl | 1176 | ||||
-rw-r--r-- | crypto/aes/asm/aes-s390x.pl | 1333 | ||||
-rwxr-xr-x | crypto/aes/asm/aes-sparcv9.pl | 1181 | ||||
-rwxr-xr-x | crypto/aes/asm/aes-x86_64.pl | 2 |
7 files changed, 1 insertions, 5785 deletions
diff --git a/crypto/aes/aes_cfb.c b/crypto/aes/aes_cfb.c index 49f0411..9384ba6 100644 --- a/crypto/aes/aes_cfb.c +++ b/crypto/aes/aes_cfb.c @@ -201,7 +201,6 @@ void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, assert(in && out && key && ivec && num); assert(*num == 0); - memset(out,0,(length+7)/8); for(n=0 ; n < length ; ++n) { c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0; diff --git a/crypto/aes/aes_x86core.c b/crypto/aes/aes_x86core.c deleted file mode 100644 index d323e26..0000000 --- a/crypto/aes/aes_x86core.c +++ /dev/null @@ -1,1063 +0,0 @@ -/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */ -/** - * rijndael-alg-fst.c - * - * @version 3.0 (December 2000) - * - * Optimised ANSI C code for the Rijndael cipher (now AES) - * - * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> - * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be> - * @author Paulo Barreto <paulo.barreto@terra.com.br> - * - * This code is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * This is experimental x86[_64] derivative. It assumes little-endian - * byte order and expects CPU to sustain unaligned memory references. - * It is used as playground for cache-time attack mitigations and - * serves as reference C implementation for x86[_64] assembler. - * - * <appro@fy.chalmers.se> - */ - - -#ifndef AES_DEBUG -# ifndef NDEBUG -# define NDEBUG -# endif -#endif -#include <assert.h> - -#include <stdlib.h> -#include <openssl/aes.h> -#include "aes_locl.h" - -/* - * These two parameters control which table, 256-byte or 2KB, is - * referenced in outer and respectively inner rounds. - */ -#define AES_COMPACT_IN_OUTER_ROUNDS -#ifdef AES_COMPACT_IN_OUTER_ROUNDS -/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while - * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further* - * by factor of ~2. */ -# undef AES_COMPACT_IN_INNER_ROUNDS -#endif - -#if 1 -static void prefetch256(const void *table) -{ - volatile unsigned long *t=(void *)table,ret; - unsigned long sum; - int i; - - /* 32 is common least cache-line size */ - for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i]; - - ret = sum; -} -#else -# define prefetch256(t) -#endif - -#undef GETU32 -#define GETU32(p) (*((u32*)(p))) - -#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) -typedef unsigned __int64 u64; -#define U64(C) C##UI64 -#elif defined(__arch64__) -typedef unsigned long u64; -#define U64(C) C##UL -#else -typedef unsigned long long u64; -#define U64(C) C##ULL -#endif - -#undef ROTATE -#if defined(_MSC_VER) || defined(__ICC) -# define ROTATE(a,n) _lrotl(a,n) -#elif defined(__GNUC__) && __GNUC__>=2 -# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) -# define ROTATE(a,n) ({ register unsigned int ret; \ - asm ( \ - "roll %1,%0" \ - : "=r"(ret) \ - : "I"(n), "0"(a) \ - : "cc"); \ - ret; \ - }) -# endif -#endif -/* -Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03]; -Te0[x] = S [x].[02, 01, 01, 03]; -Te1[x] = S [x].[03, 02, 01, 01]; -Te2[x] = S [x].[01, 03, 02, 01]; -Te3[x] = S [x].[01, 01, 03, 02]; -*/ -#define Te0 (u32)((u64*)((u8*)Te+0)) -#define Te1 (u32)((u64*)((u8*)Te+3)) -#define Te2 (u32)((u64*)((u8*)Te+2)) -#define Te3 (u32)((u64*)((u8*)Te+1)) -/* -Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b]; -Td0[x] = Si[x].[0e, 09, 0d, 0b]; -Td1[x] = Si[x].[0b, 0e, 09, 0d]; -Td2[x] = Si[x].[0d, 0b, 0e, 09]; -Td3[x] = Si[x].[09, 0d, 0b, 0e]; -Td4[x] = Si[x].[01]; -*/ -#define Td0 (u32)((u64*)((u8*)Td+0)) -#define Td1 (u32)((u64*)((u8*)Td+3)) -#define Td2 (u32)((u64*)((u8*)Td+2)) -#define Td3 (u32)((u64*)((u8*)Td+1)) - -static const u64 Te[256] = { - U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8), - U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6), - U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6), - U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591), - U64(0x5030306050303060), U64(0x0301010203010102), - U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56), - U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5), - U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec), - U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f), - U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa), - U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2), - U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb), - U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3), - U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45), - U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453), - U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b), - U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1), - U64(0xae93933dae93933d), U64(0x6a26264c6a26264c), - U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e), - U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83), - U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551), - U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9), - U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab), - U64(0x5331316253313162), U64(0x3f15152a3f15152a), - U64(0x0c0404080c040408), U64(0x52c7c79552c7c795), - U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d), - U64(0x2818183028181830), U64(0xa1969637a1969637), - U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f), - U64(0x0907070e0907070e), U64(0x3612122436121224), - U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df), - U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e), - U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea), - U64(0x1b0909121b090912), U64(0x9e83831d9e83831d), - U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34), - U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc), - U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b), - U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76), - U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d), - U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd), - U64(0x712f2f5e712f2f5e), U64(0x9784841397848413), - U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9), - U64(0x0000000000000000), U64(0x2cededc12cededc1), - U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3), - U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6), - U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d), - U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972), - U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98), - U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85), - U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5), - U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed), - U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a), - U64(0x5533336655333366), U64(0x9485851194858511), - U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9), - U64(0x0602020406020204), U64(0x817f7ffe817f7ffe), - U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78), - U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b), - U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d), - U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05), - U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21), - U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1), - U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677), - U64(0x75dadaaf75dadaaf), U64(0x6321214263212142), - U64(0x3010102030101020), U64(0x1affffe51affffe5), - U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf), - U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18), - U64(0x3513132635131326), U64(0x2fececc32fececc3), - U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735), - U64(0xcc444488cc444488), U64(0x3917172e3917172e), - U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755), - U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a), - U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba), - U64(0x2b1919322b191932), U64(0x957373e6957373e6), - U64(0xa06060c0a06060c0), U64(0x9881811998818119), - U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3), - U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54), - U64(0xab90903bab90903b), U64(0x8388880b8388880b), - U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7), - U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428), - U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc), - U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad), - U64(0x3be0e0db3be0e0db), U64(0x5632326456323264), - U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14), - U64(0xdb494992db494992), U64(0x0a06060c0a06060c), - U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8), - U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd), - U64(0xefacac43efacac43), U64(0xa66262c4a66262c4), - U64(0xa8919139a8919139), U64(0xa4959531a4959531), - U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2), - U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b), - U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda), - U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1), - U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949), - U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac), - U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf), - U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4), - U64(0xe9aeae47e9aeae47), U64(0x1808081018080810), - U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0), - U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c), - U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657), - U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697), - U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1), - U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e), - U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61), - U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f), - U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c), - U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc), - U64(0xd8484890d8484890), U64(0x0503030605030306), - U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c), - U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a), - U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969), - U64(0x9186861791868617), U64(0x58c1c19958c1c199), - U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27), - U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb), - U64(0xb398982bb398982b), U64(0x3311112233111122), - U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9), - U64(0x898e8e07898e8e07), U64(0xa7949433a7949433), - U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c), - U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9), - U64(0x49cece8749cece87), U64(0xff5555aaff5555aa), - U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5), - U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159), - U64(0x8089890980898909), U64(0x170d0d1a170d0d1a), - U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7), - U64(0xc6424284c6424284), U64(0xb86868d0b86868d0), - U64(0xc3414182c3414182), U64(0xb0999929b0999929), - U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e), - U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8), - U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c) -}; - -static const u8 Te4[256] = { - 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, - 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, - 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, - 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, - 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, - 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, - 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, - 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, - 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, - 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, - 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, - 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, - 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, - 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, - 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, - 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, - 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, - 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, - 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, - 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, - 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, - 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, - 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, - 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, - 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, - 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, - 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, - 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, - 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, - 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, - 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, - 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U -}; - -static const u64 Td[256] = { - U64(0x50a7f45150a7f451), U64(0x5365417e5365417e), - U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a), - U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f), - U64(0xab58faacab58faac), U64(0x9303e34b9303e34b), - U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad), - U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5), - U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5), - U64(0x8044352680443526), U64(0x8fa362b58fa362b5), - U64(0x495ab1de495ab1de), U64(0x671bba25671bba25), - U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d), - U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81), - U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b), - U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215), - U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295), - U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458), - U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e), - U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4), - U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927), - U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0), - U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d), - U64(0x184adf63184adf63), U64(0x82311ae582311ae5), - U64(0x6033519760335197), U64(0x457f5362457f5362), - U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb), - U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9), - U64(0x5868487058684870), U64(0x19fd458f19fd458f), - U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52), - U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72), - U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566), - U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f), - U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3), - U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23), - U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed), - U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7), - U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e), - U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506), - U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4), - U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2), - U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4), - U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040), - U64(0x069f715e069f715e), U64(0x51106ebd51106ebd), - U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96), - U64(0xae053eddae053edd), U64(0x46bde64d46bde64d), - U64(0xb58d5491b58d5491), U64(0x055dc471055dc471), - U64(0x6fd406046fd40604), U64(0xff155060ff155060), - U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6), - U64(0xcc434089cc434089), U64(0x779ed967779ed967), - U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907), - U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879), - U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c), - U64(0xc91e84f8c91e84f8), U64(0x0000000000000000), - U64(0x8386800983868009), U64(0x48ed2b3248ed2b32), - U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c), - U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f), - U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36), - U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68), - U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624), - U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793), - U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b), - U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61), - U64(0x694b775a694b775a), U64(0x161a121c161a121c), - U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0), - U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12), - U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2), - U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14), - U64(0x8519f1578519f157), U64(0x4c0775af4c0775af), - U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3), - U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c), - U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b), - U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb), - U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8), - U64(0xcadc31d7cadc31d7), U64(0x1085634210856342), - U64(0x4022971340229713), U64(0x2011c6842011c684), - U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2), - U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7), - U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc), - U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177), - U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9), - U64(0xfa489411fa489411), U64(0x2264e9472264e947), - U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0), - U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322), - U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9), - U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498), - U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5), - U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f), - U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850), - U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54), - U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890), - U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382), - U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069), - U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf), - U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810), - U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb), - U64(0x097826cd097826cd), U64(0xf418596ef418596e), - U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83), - U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa), - U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef), - U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a), - U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029), - U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a), - U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235), - U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc), - U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733), - U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41), - U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117), - U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43), - U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4), - U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c), - U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546), - U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01), - U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb), - U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92), - U64(0x335610e9335610e9), U64(0x1347d66d1347d66d), - U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137), - U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb), - U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7), - U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a), - U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255), - U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773), - U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f), - U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478), - U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9), - U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2), - U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc), - U64(0x8b493c288b493c28), U64(0x41950dff41950dff), - U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08), - U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664), - U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5), - U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0) -}; -static const u8 Td4[256] = { - 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, - 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, - 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, - 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, - 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, - 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, - 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, - 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, - 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, - 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, - 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, - 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, - 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, - 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, - 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, - 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, - 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, - 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, - 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, - 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, - 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, - 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, - 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, - 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, - 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, - 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, - 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, - 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, - 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, - 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, - 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, - 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU -}; - -static const u32 rcon[] = { - 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U, - 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U, - 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ -}; - -/** - * Expand the cipher key into the encryption key schedule. - */ -int AES_set_encrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) { - - u32 *rk; - int i = 0; - u32 temp; - - if (!userKey || !key) - return -1; - if (bits != 128 && bits != 192 && bits != 256) - return -2; - - rk = key->rd_key; - - if (bits==128) - key->rounds = 10; - else if (bits==192) - key->rounds = 12; - else - key->rounds = 14; - - rk[0] = GETU32(userKey ); - rk[1] = GETU32(userKey + 4); - rk[2] = GETU32(userKey + 8); - rk[3] = GETU32(userKey + 12); - if (bits == 128) { - while (1) { - temp = rk[3]; - rk[4] = rk[0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[5] = rk[1] ^ rk[4]; - rk[6] = rk[2] ^ rk[5]; - rk[7] = rk[3] ^ rk[6]; - if (++i == 10) { - return 0; - } - rk += 4; - } - } - rk[4] = GETU32(userKey + 16); - rk[5] = GETU32(userKey + 20); - if (bits == 192) { - while (1) { - temp = rk[ 5]; - rk[ 6] = rk[ 0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[ 7] = rk[ 1] ^ rk[ 6]; - rk[ 8] = rk[ 2] ^ rk[ 7]; - rk[ 9] = rk[ 3] ^ rk[ 8]; - if (++i == 8) { - return 0; - } - rk[10] = rk[ 4] ^ rk[ 9]; - rk[11] = rk[ 5] ^ rk[10]; - rk += 6; - } - } - rk[6] = GETU32(userKey + 24); - rk[7] = GETU32(userKey + 28); - if (bits == 256) { - while (1) { - temp = rk[ 7]; - rk[ 8] = rk[ 0] ^ - (Te4[(temp >> 8) & 0xff] ) ^ - (Te4[(temp >> 16) & 0xff] << 8) ^ - (Te4[(temp >> 24) ] << 16) ^ - (Te4[(temp ) & 0xff] << 24) ^ - rcon[i]; - rk[ 9] = rk[ 1] ^ rk[ 8]; - rk[10] = rk[ 2] ^ rk[ 9]; - rk[11] = rk[ 3] ^ rk[10]; - if (++i == 7) { - return 0; - } - temp = rk[11]; - rk[12] = rk[ 4] ^ - (Te4[(temp ) & 0xff] ) ^ - (Te4[(temp >> 8) & 0xff] << 8) ^ - (Te4[(temp >> 16) & 0xff] << 16) ^ - (Te4[(temp >> 24) ] << 24); - rk[13] = rk[ 5] ^ rk[12]; - rk[14] = rk[ 6] ^ rk[13]; - rk[15] = rk[ 7] ^ rk[14]; - - rk += 8; - } - } - return 0; -} - -/** - * Expand the cipher key into the decryption key schedule. - */ -int AES_set_decrypt_key(const unsigned char *userKey, const int bits, - AES_KEY *key) { - - u32 *rk; - int i, j, status; - u32 temp; - - /* first, start with an encryption schedule */ - status = AES_set_encrypt_key(userKey, bits, key); - if (status < 0) - return status; - - rk = key->rd_key; - - /* invert the order of the round keys: */ - for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { - temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; - temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; - temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; - temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; - } - /* apply the inverse MixColumn transform to all round keys but the first and the last: */ - for (i = 1; i < (key->rounds); i++) { - rk += 4; -#if 1 - for (j = 0; j < 4; j++) { - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - tp1 = rk[j]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - rk[j] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - } -#else - rk[0] = - Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[0] >> 24) ] & 0xff]; - rk[1] = - Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[1] >> 24) ] & 0xff]; - rk[2] = - Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[2] >> 24) ] & 0xff]; - rk[3] = - Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^ - Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^ - Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^ - Td3[Te2[(rk[3] >> 24) ] & 0xff]; -#endif - } - return 0; -} - -/* - * Encrypt a single block - * in and out can overlap - */ -void AES_encrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key) { - - const u32 *rk; - u32 s0, s1, s2, s3, t[4]; - int r; - - assert(in && out && key); - rk = key->rd_key; - - /* - * map byte array block to cipher state - * and add initial round key: - */ - s0 = GETU32(in ) ^ rk[0]; - s1 = GETU32(in + 4) ^ rk[1]; - s2 = GETU32(in + 8) ^ rk[2]; - s3 = GETU32(in + 12) ^ rk[3]; - -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Te4); - - t[0] = Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24; - t[1] = Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24; - t[2] = Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24; - t[3] = Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 r0, r1, r2; - - for (i = 0; i < 4; i++) { - r0 = t[i]; - r1 = r0 & 0x80808080; - r2 = ((r0 & 0x7f7f7f7f) << 1) ^ - ((r1 - (r1 >> 7)) & 0x1b1b1b1b); -#if defined(ROTATE) - t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ - ROTATE(r0,16) ^ ROTATE(r0,8); -#else - t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ - (r0 << 16) ^ (r0 >> 16) ^ - (r0 << 8) ^ (r0 >> 24); -#endif - t[i] ^= rk[4+i]; - } - } -#else - t[0] = Te0[(s0 ) & 0xff] ^ - Te1[(s1 >> 8) & 0xff] ^ - Te2[(s2 >> 16) & 0xff] ^ - Te3[(s3 >> 24) ] ^ - rk[4]; - t[1] = Te0[(s1 ) & 0xff] ^ - Te1[(s2 >> 8) & 0xff] ^ - Te2[(s3 >> 16) & 0xff] ^ - Te3[(s0 >> 24) ] ^ - rk[5]; - t[2] = Te0[(s2 ) & 0xff] ^ - Te1[(s3 >> 8) & 0xff] ^ - Te2[(s0 >> 16) & 0xff] ^ - Te3[(s1 >> 24) ] ^ - rk[6]; - t[3] = Te0[(s3 ) & 0xff] ^ - Te1[(s0 >> 8) & 0xff] ^ - Te2[(s1 >> 16) & 0xff] ^ - Te3[(s2 >> 24) ] ^ - rk[7]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - - /* - * Nr - 2 full rounds: - */ - for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { -#if defined(AES_COMPACT_IN_INNER_ROUNDS) - t[0] = Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24; - t[1] = Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24; - t[2] = Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24; - t[3] = Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 r0, r1, r2; - - for (i = 0; i < 4; i++) { - r0 = t[i]; - r1 = r0 & 0x80808080; - r2 = ((r0 & 0x7f7f7f7f) << 1) ^ - ((r1 - (r1 >> 7)) & 0x1b1b1b1b); -#if defined(ROTATE) - t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ - ROTATE(r0,16) ^ ROTATE(r0,8); -#else - t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ - (r0 << 16) ^ (r0 >> 16) ^ - (r0 << 8) ^ (r0 >> 24); -#endif - t[i] ^= rk[i]; - } - } -#else - t[0] = Te0[(s0 ) & 0xff] ^ - Te1[(s1 >> 8) & 0xff] ^ - Te2[(s2 >> 16) & 0xff] ^ - Te3[(s3 >> 24) ] ^ - rk[0]; - t[1] = Te0[(s1 ) & 0xff] ^ - Te1[(s2 >> 8) & 0xff] ^ - Te2[(s3 >> 16) & 0xff] ^ - Te3[(s0 >> 24) ] ^ - rk[1]; - t[2] = Te0[(s2 ) & 0xff] ^ - Te1[(s3 >> 8) & 0xff] ^ - Te2[(s0 >> 16) & 0xff] ^ - Te3[(s1 >> 24) ] ^ - rk[2]; - t[3] = Te0[(s3 ) & 0xff] ^ - Te1[(s0 >> 8) & 0xff] ^ - Te2[(s1 >> 16) & 0xff] ^ - Te3[(s2 >> 24) ] ^ - rk[3]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - } - /* - * apply last round and - * map cipher state to byte array block: - */ -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Te4); - - *(u32*)(out+0) = - Te4[(s0 ) & 0xff] ^ - Te4[(s1 >> 8) & 0xff] << 8 ^ - Te4[(s2 >> 16) & 0xff] << 16 ^ - Te4[(s3 >> 24) ] << 24 ^ - rk[0]; - *(u32*)(out+4) = - Te4[(s1 ) & 0xff] ^ - Te4[(s2 >> 8) & 0xff] << 8 ^ - Te4[(s3 >> 16) & 0xff] << 16 ^ - Te4[(s0 >> 24) ] << 24 ^ - rk[1]; - *(u32*)(out+8) = - Te4[(s2 ) & 0xff] ^ - Te4[(s3 >> 8) & 0xff] << 8 ^ - Te4[(s0 >> 16) & 0xff] << 16 ^ - Te4[(s1 >> 24) ] << 24 ^ - rk[2]; - *(u32*)(out+12) = - Te4[(s3 ) & 0xff] ^ - Te4[(s0 >> 8) & 0xff] << 8 ^ - Te4[(s1 >> 16) & 0xff] << 16 ^ - Te4[(s2 >> 24) ] << 24 ^ - rk[3]; -#else - *(u32*)(out+0) = - (Te2[(s0 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s3 >> 24) ] & 0xff000000U) ^ - rk[0]; - *(u32*)(out+4) = - (Te2[(s1 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s0 >> 24) ] & 0xff000000U) ^ - rk[1]; - *(u32*)(out+8) = - (Te2[(s2 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s1 >> 24) ] & 0xff000000U) ^ - rk[2]; - *(u32*)(out+12) = - (Te2[(s3 ) & 0xff] & 0x000000ffU) ^ - (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^ - (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^ - (Te1[(s2 >> 24) ] & 0xff000000U) ^ - rk[3]; -#endif -} - -/* - * Decrypt a single block - * in and out can overlap - */ -void AES_decrypt(const unsigned char *in, unsigned char *out, - const AES_KEY *key) { - - const u32 *rk; - u32 s0, s1, s2, s3, t[4]; - int r; - - assert(in && out && key); - rk = key->rd_key; - - /* - * map byte array block to cipher state - * and add initial round key: - */ - s0 = GETU32(in ) ^ rk[0]; - s1 = GETU32(in + 4) ^ rk[1]; - s2 = GETU32(in + 8) ^ rk[2]; - s3 = GETU32(in + 12) ^ rk[3]; - -#if defined(AES_COMPACT_IN_OUTER_ROUNDS) - prefetch256(Td4); - - t[0] = Td4[(s0 ) & 0xff] ^ - Td4[(s3 >> 8) & 0xff] << 8 ^ - Td4[(s2 >> 16) & 0xff] << 16 ^ - Td4[(s1 >> 24) ] << 24; - t[1] = Td4[(s1 ) & 0xff] ^ - Td4[(s0 >> 8) & 0xff] << 8 ^ - Td4[(s3 >> 16) & 0xff] << 16 ^ - Td4[(s2 >> 24) ] << 24; - t[2] = Td4[(s2 ) & 0xff] ^ - Td4[(s1 >> 8) & 0xff] << 8 ^ - Td4[(s0 >> 16) & 0xff] << 16 ^ - Td4[(s3 >> 24) ] << 24; - t[3] = Td4[(s3 ) & 0xff] ^ - Td4[(s2 >> 8) & 0xff] << 8 ^ - Td4[(s1 >> 16) & 0xff] << 16 ^ - Td4[(s0 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - for (i = 0; i < 4; i++) { - tp1 = t[i]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - t[i] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - t[i] ^= rk[4+i]; - } - } -#else - t[0] = Td0[(s0 ) & 0xff] ^ - Td1[(s3 >> 8) & 0xff] ^ - Td2[(s2 >> 16) & 0xff] ^ - Td3[(s1 >> 24) ] ^ - rk[4]; - t[1] = Td0[(s1 ) & 0xff] ^ - Td1[(s0 >> 8) & 0xff] ^ - Td2[(s3 >> 16) & 0xff] ^ - Td3[(s2 >> 24) ] ^ - rk[5]; - t[2] = Td0[(s2 ) & 0xff] ^ - Td1[(s1 >> 8) & 0xff] ^ - Td2[(s0 >> 16) & 0xff] ^ - Td3[(s3 >> 24) ] ^ - rk[6]; - t[3] = Td0[(s3 ) & 0xff] ^ - Td1[(s2 >> 8) & 0xff] ^ - Td2[(s1 >> 16) & 0xff] ^ - Td3[(s0 >> 24) ] ^ - rk[7]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - - /* - * Nr - 2 full rounds: - */ - for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { -#if defined(AES_COMPACT_IN_INNER_ROUNDS) - t[0] = Td4[(s0 ) & 0xff] ^ - Td4[(s3 >> 8) & 0xff] << 8 ^ - Td4[(s2 >> 16) & 0xff] << 16 ^ - Td4[(s1 >> 24) ] << 24; - t[1] = Td4[(s1 ) & 0xff] ^ - Td4[(s0 >> 8) & 0xff] << 8 ^ - Td4[(s3 >> 16) & 0xff] << 16 ^ - Td4[(s2 >> 24) ] << 24; - t[2] = Td4[(s2 ) & 0xff] ^ - Td4[(s1 >> 8) & 0xff] << 8 ^ - Td4[(s0 >> 16) & 0xff] << 16 ^ - Td4[(s3 >> 24) ] << 24; - t[3] = Td4[(s3 ) & 0xff] ^ - Td4[(s2 >> 8) & 0xff] << 8 ^ - Td4[(s1 >> 16) & 0xff] << 16 ^ - Td4[(s0 >> 24) ] << 24; - - /* now do the linear transform using words */ - { int i; - u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; - - for (i = 0; i < 4; i++) { - tp1 = t[i]; - m = tp1 & 0x80808080; - tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp2 & 0x80808080; - tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - m = tp4 & 0x80808080; - tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ - ((m - (m >> 7)) & 0x1b1b1b1b); - tp9 = tp8 ^ tp1; - tpb = tp9 ^ tp2; - tpd = tp9 ^ tp4; - tpe = tp8 ^ tp4 ^ tp2; -#if defined(ROTATE) - t[i] = tpe ^ ROTATE(tpd,16) ^ - ROTATE(tp9,8) ^ ROTATE(tpb,24); -#else - t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ - (tp9 >> 24) ^ (tp9 << 8) ^ - (tpb >> 8) ^ (tpb << 24); -#endif - t[i] ^= rk[i]; - } - } -#else - t[0] = Td0[(s0 ) & 0xff] ^ - Td1[(s3 >> 8) & 0xff] ^ - Td2[(s2 >> 16) & 0xff] ^ - Td3[(s1 >> 24) ] ^ - rk[0]; - t[1] = Td0[(s1 ) & 0xff] ^ - Td1[(s0 >> 8) & 0xff] ^ - Td2[(s3 >> 16) & 0xff] ^ - Td3[(s2 >> 24) ] ^ - rk[1]; - t[2] = Td0[(s2 ) & 0xff] ^ - Td1[(s1 >> 8) & 0xff] ^ - Td2[(s0 >> 16) & 0xff] ^ - Td3[(s3 >> 24) ] ^ - rk[2]; - t[3] = Td0[(s3 ) & 0xff] ^ - Td1[(s2 >> 8) & 0xff] ^ - Td2[(s1 >> 16) & 0xff] ^ - Td3[(s0 >> 24) ] ^ - rk[3]; -#endif - s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; - } - /* - * apply last round and - * map cipher state to byte array block: - */ - prefetch256(Td4); - - *(u32*)(out+0) = - (Td4[(s0 ) & 0xff]) ^ - (Td4[(s3 >> 8) & 0xff] << 8) ^ - (Td4[(s2 >> 16) & 0xff] << 16) ^ - (Td4[(s1 >> 24) ] << 24) ^ - rk[0]; - *(u32*)(out+4) = - (Td4[(s1 ) & 0xff]) ^ - (Td4[(s0 >> 8) & 0xff] << 8) ^ - (Td4[(s3 >> 16) & 0xff] << 16) ^ - (Td4[(s2 >> 24) ] << 24) ^ - rk[1]; - *(u32*)(out+8) = - (Td4[(s2 ) & 0xff]) ^ - (Td4[(s1 >> 8) & 0xff] << 8) ^ - (Td4[(s0 >> 16) & 0xff] << 16) ^ - (Td4[(s3 >> 24) ] << 24) ^ - rk[2]; - *(u32*)(out+12) = - (Td4[(s3 ) & 0xff]) ^ - (Td4[(s2 >> 8) & 0xff] << 8) ^ - (Td4[(s1 >> 16) & 0xff] << 16) ^ - (Td4[(s0 >> 24) ] << 24) ^ - rk[3]; -} diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl deleted file mode 100644 index 15742c1..0000000 --- a/crypto/aes/asm/aes-armv4.pl +++ /dev/null @@ -1,1030 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for ARMv4 - -# January 2007. -# -# Code uses single 1K S-box and is >2 times faster than code generated -# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which -# allows to merge logical or arithmetic operation with shift or rotate -# in one instruction and emit combined result every cycle. The module -# is endian-neutral. The performance is ~42 cycles/byte for 128-bit -# key. - -# May 2007. -# -# AES_set_[en|de]crypt_key is added. - -$s0="r0"; -$s1="r1"; -$s2="r2"; -$s3="r3"; -$t1="r4"; -$t2="r5"; -$t3="r6"; -$i1="r7"; -$i2="r8"; -$i3="r9"; - -$tbl="r10"; -$key="r11"; -$rounds="r12"; - -$code=<<___; -.text -.code 32 - -.type AES_Te,%object -.align 5 -AES_Te: -.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d -.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 -.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d -.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a -.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 -.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b -.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea -.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b -.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a -.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f -.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 -.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f -.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e -.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 -.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d -.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f -.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e -.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb -.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce -.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 -.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c -.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed -.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b -.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a -.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 -.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 -.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 -.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 -.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a -.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 -.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 -.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d -.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f -.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 -.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 -.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 -.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f -.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 -.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c -.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 -.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e -.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 -.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 -.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b -.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 -.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 -.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 -.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 -.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 -.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 -.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 -.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 -.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa -.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 -.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 -.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 -.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 -.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 -.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 -.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a -.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 -.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 -.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 -.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a -@ Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -@ rcon[] -.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.size AES_Te,.-AES_Te - -@ void AES_encrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_encrypt -.type AES_encrypt,%function -.align 5 -AES_encrypt: - sub r3,pc,#8 @ AES_encrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_encrypt-AES_Te @ Te - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - - bl _armv4_AES_encrypt - - ldr $rounds,[sp],#4 @ pop out - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - strb $t3,[$rounds,#2] - strb $s0,[$rounds,#3] - mov $t1,$s1,lsr#24 - mov $t2,$s1,lsr#16 - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - strb $t3,[$rounds,#6] - strb $s1,[$rounds,#7] - mov $t1,$s2,lsr#24 - mov $t2,$s2,lsr#16 - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - strb $t3,[$rounds,#10] - strb $s2,[$rounds,#11] - mov $t1,$s3,lsr#24 - mov $t2,$s3,lsr#16 - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] - - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_encrypt,.-AES_encrypt - -.type _armv4_AES_encrypt,%function -.align 2 -_armv4_AES_encrypt: - str lr,[sp,#-4]! @ push lr - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - ldr $rounds,[$key,#240-16] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - -.Lenc_loop: - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0,lsr#16 - and $i1,lr,$s0 - mov $s0,$s0,lsr#24 - ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0] - ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24] - ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8] - ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16] - - and $i1,lr,$s1,lsr#16 @ i0 - and $i2,lr,$s1 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16] - ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24] - ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0] - ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8] - eor $s0,$s0,$i1,ror#8 - eor $s1,$s1,$t1,ror#24 - eor $t2,$t2,$i2,ror#8 - eor $t3,$t3,$i3,ror#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2,lsr#16 @ i1 - and $i3,lr,$s2 - mov $s2,$s2,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] - ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] - ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] - ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] - eor $s0,$s0,$i1,ror#16 - eor $s1,$s1,$i2,ror#8 - eor $s2,$s2,$t2,ror#16 - eor $t3,$t3,$i3,ror#16 - - and $i1,lr,$s3 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3,lsr#16 @ i2 - mov $s3,$s3,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] - ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] - ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] - ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] - eor $s0,$s0,$i1,ror#24 - eor $s1,$s1,$i2,ror#16 - eor $s2,$s2,$i3,ror#8 - eor $s3,$s3,$t3,ror#8 - - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - subs $rounds,$rounds,#1 - bne .Lenc_loop - - add $tbl,$tbl,#2 - - and $i1,lr,$s0 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0,lsr#16 - mov $s0,$s0,lsr#24 - ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0] - ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24] - ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8] - ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16] - - and $i1,lr,$s1,lsr#16 @ i0 - and $i2,lr,$s1 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16] - ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$t1,$s1,lsl#24 - eor $t2,$i2,$t2,lsl#8 - eor $t3,$i3,$t3,lsl#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2,lsr#16 @ i1 - and $i3,lr,$s2 - mov $s2,$s2,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] - ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$s1,$i2,lsl#16 - eor $s2,$t2,$s2,lsl#24 - eor $t3,$i3,$t3,lsl#8 - - and $i1,lr,$s3 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3,lsr#16 @ i2 - mov $s3,$s3,lsr#24 - ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] - ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] - ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] - ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] - eor $s0,$i1,$s0,lsl#8 - eor $s1,$s1,$i2,lsl#8 - eor $s2,$s2,$i3,lsl#16 - eor $s3,$t3,$s3,lsl#24 - - ldr lr,[sp],#4 @ pop lr - ldr $t1,[$key,#0] - ldr $t2,[$key,#4] - ldr $t3,[$key,#8] - ldr $i1,[$key,#12] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - sub $tbl,$tbl,#2 - mov pc,lr @ return -.size _armv4_AES_encrypt,.-_armv4_AES_encrypt - -.global AES_set_encrypt_key -.type AES_set_encrypt_key,%function -.align 5 -AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key - teq r0,#0 - moveq r0,#-1 - beq .Labrt - teq r2,#0 - moveq r0,#-1 - beq .Labrt - - teq r1,#128 - beq .Lok - teq r1,#192 - beq .Lok - teq r1,#256 - movne r0,#-1 - bne .Labrt - -.Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 - - mov $rounds,r0 @ inp - mov lr,r1 @ bits - mov $key,r2 @ key - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - str $s0,[$key],#16 - str $s1,[$key,#-12] - str $s2,[$key,#-8] - str $s3,[$key,#-4] - - teq lr,#128 - bne .Lnot128 - mov $rounds,#10 - str $rounds,[$key,#240-16] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - -.L128_loop: - and $t2,lr,$s3,lsr#24 - and $i1,lr,$s3,lsr#16 - and $i2,lr,$s3,lsr#8 - and $i3,lr,$s3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $t2,$t2,$t1 - eor $s0,$s0,$t2 @ rk[4]=rk[0]^... - eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4] - eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5] - eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6] - str $s0,[$key],#16 - str $s1,[$key,#-12] - str $s2,[$key,#-8] - str $s3,[$key,#-4] - - subs $rounds,$rounds,#1 - bne .L128_loop - sub r2,$key,#176 - b .Ldone - -.Lnot128: - ldrb $i2,[$rounds,#19] - ldrb $t1,[$rounds,#18] - ldrb $t2,[$rounds,#17] - ldrb $t3,[$rounds,#16] - orr $i2,$i2,$t1,lsl#8 - orr $i2,$i2,$t2,lsl#16 - orr $i2,$i2,$t3,lsl#24 - ldrb $i3,[$rounds,#23] - ldrb $t1,[$rounds,#22] - ldrb $t2,[$rounds,#21] - ldrb $t3,[$rounds,#20] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - orr $i3,$i3,$t3,lsl#24 - str $i2,[$key],#8 - str $i3,[$key,#-4] - - teq lr,#192 - bne .Lnot192 - mov $rounds,#12 - str $rounds,[$key,#240-24] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#8 - -.L192_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - and $i2,lr,$i3,lsr#8 - and $i3,lr,$i3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[6]=rk[0]^... - eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6] - eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7] - eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8] - str $s0,[$key],#24 - str $s1,[$key,#-20] - str $s2,[$key,#-16] - str $s3,[$key,#-12] - - subs $rounds,$rounds,#1 - subeq r2,$key,#216 - beq .Ldone - - ldr $i1,[$key,#-32] - ldr $i2,[$key,#-28] - eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9] - eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10] - str $i1,[$key,#-8] - str $i3,[$key,#-4] - b .L192_loop - -.Lnot192: - ldrb $i2,[$rounds,#27] - ldrb $t1,[$rounds,#26] - ldrb $t2,[$rounds,#25] - ldrb $t3,[$rounds,#24] - orr $i2,$i2,$t1,lsl#8 - orr $i2,$i2,$t2,lsl#16 - orr $i2,$i2,$t3,lsl#24 - ldrb $i3,[$rounds,#31] - ldrb $t1,[$rounds,#30] - ldrb $t2,[$rounds,#29] - ldrb $t3,[$rounds,#28] - orr $i3,$i3,$t1,lsl#8 - orr $i3,$i3,$t2,lsl#16 - orr $i3,$i3,$t3,lsl#24 - str $i2,[$key],#8 - str $i3,[$key,#-4] - - mov $rounds,#14 - str $rounds,[$key,#240-32] - add $t3,$tbl,#256 @ rcon - mov lr,#255 - mov $rounds,#7 - -.L256_loop: - and $t2,lr,$i3,lsr#24 - and $i1,lr,$i3,lsr#16 - and $i2,lr,$i3,lsr#8 - and $i3,lr,$i3 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - ldr $t1,[$t3],#4 @ rcon[i++] - orr $t2,$t2,$i1,lsl#24 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#8 - eor $i3,$t2,$t1 - eor $s0,$s0,$i3 @ rk[8]=rk[0]^... - eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8] - eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9] - eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10] - str $s0,[$key],#32 - str $s1,[$key,#-28] - str $s2,[$key,#-24] - str $s3,[$key,#-20] - - subs $rounds,$rounds,#1 - subeq r2,$key,#256 - beq .Ldone - - and $t2,lr,$s3 - and $i1,lr,$s3,lsr#8 - and $i2,lr,$s3,lsr#16 - and $i3,lr,$s3,lsr#24 - ldrb $t2,[$tbl,$t2] - ldrb $i1,[$tbl,$i1] - ldrb $i2,[$tbl,$i2] - ldrb $i3,[$tbl,$i3] - orr $t2,$t2,$i1,lsl#8 - orr $t2,$t2,$i2,lsl#16 - orr $t2,$t2,$i3,lsl#24 - - ldr $t1,[$key,#-48] - ldr $i1,[$key,#-44] - ldr $i2,[$key,#-40] - ldr $i3,[$key,#-36] - eor $t1,$t1,$t2 @ rk[12]=rk[4]^... - eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12] - eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13] - eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14] - str $t1,[$key,#-16] - str $i1,[$key,#-12] - str $i2,[$key,#-8] - str $i3,[$key,#-4] - b .L256_loop - -.Ldone: mov r0,#0 - ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_set_encrypt_key,.-AES_set_encrypt_key - -.global AES_set_decrypt_key -.type AES_set_decrypt_key,%function -.align 5 -AES_set_decrypt_key: - str lr,[sp,#-4]! @ push lr - bl AES_set_encrypt_key - teq r0,#0 - ldrne lr,[sp],#4 @ pop lr - bne .Labrt - - stmdb sp!,{r4-r12} - - ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov $key,r2 @ which is AES_KEY *key - mov $i1,r2 - add $i2,r2,$rounds,lsl#4 - -.Linv: ldr $s0,[$i1] - ldr $s1,[$i1,#4] - ldr $s2,[$i1,#8] - ldr $s3,[$i1,#12] - ldr $t1,[$i2] - ldr $t2,[$i2,#4] - ldr $t3,[$i2,#8] - ldr $i3,[$i2,#12] - str $s0,[$i2],#-16 - str $s1,[$i2,#16+4] - str $s2,[$i2,#16+8] - str $s3,[$i2,#16+12] - str $t1,[$i1],#16 - str $t2,[$i1,#-12] - str $t3,[$i1,#-8] - str $i3,[$i1,#-4] - teq $i1,$i2 - bne .Linv -___ -$mask80=$i1; -$mask1b=$i2; -$mask7f=$i3; -$code.=<<___; - ldr $s0,[$key,#16]! @ prefetch tp1 - mov $mask80,#0x80 - mov $mask1b,#0x1b - orr $mask80,$mask80,#0x8000 - orr $mask1b,$mask1b,#0x1b00 - orr $mask80,$mask80,$mask80,lsl#16 - orr $mask1b,$mask1b,$mask1b,lsl#16 - sub $rounds,$rounds,#1 - mvn $mask7f,$mask80 - mov $rounds,$rounds,lsl#2 @ (rounds-1)*4 - -.Lmix: and $t1,$s0,$mask80 - and $s1,$s0,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s1,$t1,$s1,lsl#1 @ tp2 - - and $t1,$s1,$mask80 - and $s2,$s1,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s2,$t1,$s2,lsl#1 @ tp4 - - and $t1,$s2,$mask80 - and $s3,$s2,$mask7f - sub $t1,$t1,$t1,lsr#7 - and $t1,$t1,$mask1b - eor $s3,$t1,$s3,lsl#1 @ tp8 - - eor $t1,$s1,$s2 - eor $t2,$s0,$s3 @ tp9 - eor $t1,$t1,$s3 @ tpe - eor $t1,$t1,$s1,ror#24 - eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) - eor $t1,$t1,$s2,ror#16 - eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) - eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24) - - ldr $s0,[$key,#4] @ prefetch tp1 - str $t1,[$key],#4 - subs $rounds,$rounds,#1 - bne .Lmix - - mov r0,#0 - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_set_decrypt_key,.-AES_set_decrypt_key - -.type AES_Td,%object -.align 5 -AES_Td: -.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 -.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 -.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 -.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f -.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 -.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 -.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da -.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 -.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd -.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 -.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 -.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 -.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 -.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a -.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 -.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c -.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 -.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a -.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 -.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 -.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 -.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff -.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 -.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb -.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 -.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e -.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 -.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a -.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e -.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 -.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d -.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 -.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd -.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 -.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 -.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 -.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d -.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 -.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 -.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef -.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 -.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 -.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 -.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 -.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 -.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b -.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 -.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 -.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 -.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 -.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 -.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f -.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df -.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f -.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e -.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 -.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 -.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c -.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf -.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 -.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f -.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 -.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 -.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 -@ Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -@ void AES_decrypt(const unsigned char *in, unsigned char *out, -@ const AES_KEY *key) { -.global AES_decrypt -.type AES_decrypt,%function -.align 5 -AES_decrypt: - sub r3,pc,#8 @ AES_decrypt - stmdb sp!,{r1,r4-r12,lr} - mov $rounds,r0 @ inp - mov $key,r2 - sub $tbl,r3,#AES_decrypt-AES_Td @ Td - - ldrb $s0,[$rounds,#3] @ load input data in endian-neutral - ldrb $t1,[$rounds,#2] @ manner... - ldrb $t2,[$rounds,#1] - ldrb $t3,[$rounds,#0] - orr $s0,$s0,$t1,lsl#8 - orr $s0,$s0,$t2,lsl#16 - orr $s0,$s0,$t3,lsl#24 - ldrb $s1,[$rounds,#7] - ldrb $t1,[$rounds,#6] - ldrb $t2,[$rounds,#5] - ldrb $t3,[$rounds,#4] - orr $s1,$s1,$t1,lsl#8 - orr $s1,$s1,$t2,lsl#16 - orr $s1,$s1,$t3,lsl#24 - ldrb $s2,[$rounds,#11] - ldrb $t1,[$rounds,#10] - ldrb $t2,[$rounds,#9] - ldrb $t3,[$rounds,#8] - orr $s2,$s2,$t1,lsl#8 - orr $s2,$s2,$t2,lsl#16 - orr $s2,$s2,$t3,lsl#24 - ldrb $s3,[$rounds,#15] - ldrb $t1,[$rounds,#14] - ldrb $t2,[$rounds,#13] - ldrb $t3,[$rounds,#12] - orr $s3,$s3,$t1,lsl#8 - orr $s3,$s3,$t2,lsl#16 - orr $s3,$s3,$t3,lsl#24 - - bl _armv4_AES_decrypt - - ldr $rounds,[sp],#4 @ pop out - mov $t1,$s0,lsr#24 @ write output in endian-neutral - mov $t2,$s0,lsr#16 @ manner... - mov $t3,$s0,lsr#8 - strb $t1,[$rounds,#0] - strb $t2,[$rounds,#1] - strb $t3,[$rounds,#2] - strb $s0,[$rounds,#3] - mov $t1,$s1,lsr#24 - mov $t2,$s1,lsr#16 - mov $t3,$s1,lsr#8 - strb $t1,[$rounds,#4] - strb $t2,[$rounds,#5] - strb $t3,[$rounds,#6] - strb $s1,[$rounds,#7] - mov $t1,$s2,lsr#24 - mov $t2,$s2,lsr#16 - mov $t3,$s2,lsr#8 - strb $t1,[$rounds,#8] - strb $t2,[$rounds,#9] - strb $t3,[$rounds,#10] - strb $s2,[$rounds,#11] - mov $t1,$s3,lsr#24 - mov $t2,$s3,lsr#16 - mov $t3,$s3,lsr#8 - strb $t1,[$rounds,#12] - strb $t2,[$rounds,#13] - strb $t3,[$rounds,#14] - strb $s3,[$rounds,#15] - - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - bx lr @ interoperable with Thumb ISA:-) -.size AES_decrypt,.-AES_decrypt - -.type _armv4_AES_decrypt,%function -.align 2 -_armv4_AES_decrypt: - str lr,[sp,#-4]! @ push lr - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - ldr $rounds,[$key,#240-16] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - sub $rounds,$rounds,#1 - mov lr,#255 - -.Ldec_loop: - and $i1,lr,$s0,lsr#16 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0 - mov $s0,$s0,lsr#24 - ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16] - ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24] - ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8] - ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0] - - and $i1,lr,$s1 @ i0 - and $i2,lr,$s1,lsr#16 - and $i3,lr,$s1,lsr#8 - mov $s1,$s1,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0] - ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24] - ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16] - ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8] - eor $s0,$s0,$i1,ror#24 - eor $s1,$s1,$t1,ror#8 - eor $t2,$i2,$t2,ror#8 - eor $t3,$i3,$t3,ror#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2 @ i1 - and $i3,lr,$s2,lsr#16 - mov $s2,$s2,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] - ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] - ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] - ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] - eor $s0,$s0,$i1,ror#16 - eor $s1,$s1,$i2,ror#24 - eor $s2,$s2,$t2,ror#8 - eor $t3,$i3,$t3,ror#8 - - and $i1,lr,$s3,lsr#16 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3 @ i2 - mov $s3,$s3,lsr#24 - ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] - ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] - ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] - ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] - eor $s0,$s0,$i1,ror#8 - eor $s1,$s1,$i2,ror#16 - eor $s2,$s2,$i3,ror#24 - eor $s3,$s3,$t3,ror#8 - - ldr $t1,[$key],#16 - ldr $t2,[$key,#-12] - ldr $t3,[$key,#-8] - ldr $i1,[$key,#-4] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - subs $rounds,$rounds,#1 - bne .Ldec_loop - - add $tbl,$tbl,#1024 - - ldr $t1,[$tbl,#0] @ prefetch Td4 - ldr $t2,[$tbl,#32] - ldr $t3,[$tbl,#64] - ldr $i1,[$tbl,#96] - ldr $i2,[$tbl,#128] - ldr $i3,[$tbl,#160] - ldr $t1,[$tbl,#192] - ldr $t2,[$tbl,#224] - - and $i1,lr,$s0,lsr#16 - and $i2,lr,$s0,lsr#8 - and $i3,lr,$s0 - ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24] - ldrb $t1,[$tbl,$i1] @ Td4[s0>>16] - ldrb $t2,[$tbl,$i2] @ Td4[s0>>8] - ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] - - and $i1,lr,$s1 @ i0 - and $i2,lr,$s1,lsr#16 - and $i3,lr,$s1,lsr#8 - ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] - ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] - ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] - eor $s0,$i1,$s0,lsl#24 - eor $s1,$t1,$s1,lsl#8 - eor $t2,$t2,$i2,lsl#8 - eor $t3,$t3,$i3,lsl#8 - - and $i1,lr,$s2,lsr#8 @ i0 - and $i2,lr,$s2 @ i1 - and $i3,lr,$s2,lsr#16 - ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] - ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] - ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] - ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] - eor $s0,$s0,$i1,lsl#8 - eor $s1,$i2,$s1,lsl#16 - eor $s2,$t2,$s2,lsl#16 - eor $t3,$t3,$i3,lsl#16 - - and $i1,lr,$s3,lsr#16 @ i0 - and $i2,lr,$s3,lsr#8 @ i1 - and $i3,lr,$s3 @ i2 - ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] - ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] - ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] - eor $s0,$s0,$i1,lsl#16 - eor $s1,$s1,$i2,lsl#8 - eor $s2,$i3,$s2,lsl#8 - eor $s3,$t3,$s3,lsl#24 - - ldr lr,[sp],#4 @ pop lr - ldr $t1,[$key,#0] - ldr $t2,[$key,#4] - ldr $t3,[$key,#8] - ldr $i1,[$key,#12] - eor $s0,$s0,$t1 - eor $s1,$s1,$t2 - eor $s2,$s2,$t3 - eor $s3,$s3,$i1 - - sub $tbl,$tbl,#1024 - mov pc,lr @ return -.size _armv4_AES_decrypt,.-_armv4_AES_decrypt -.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" -___ - -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl deleted file mode 100644 index ce42765..0000000 --- a/crypto/aes/asm/aes-ppc.pl +++ /dev/null @@ -1,1176 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# Needs more work: key setup, page boundaries, CBC routine... -# -# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with -# 128-bit key, which is ~40% better than 64-bit code generated by gcc -# 4.0. But these are not the ones currently used! Their "compact" -# counterparts are, for security reason. ppc_AES_encrypt_compact runs -# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - -# at 1/3 of ppc_AES_decrypt. - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=32*$SIZE_T; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$sp="r1"; -$toc="r2"; -$inp="r3"; -$out="r4"; -$key="r5"; - -$Tbl0="r3"; -$Tbl1="r6"; -$Tbl2="r7"; -$Tbl3="r2"; - -$s0="r8"; -$s1="r9"; -$s2="r10"; -$s3="r11"; - -$t0="r12"; -$t1="r13"; -$t2="r14"; -$t3="r15"; - -$acc00="r16"; -$acc01="r17"; -$acc02="r18"; -$acc03="r19"; - -$acc04="r20"; -$acc05="r21"; -$acc06="r22"; -$acc07="r23"; - -$acc08="r24"; -$acc09="r25"; -$acc10="r26"; -$acc11="r27"; - -$acc12="r28"; -$acc13="r29"; -$acc14="r30"; -$acc15="r31"; - -# stay away from TLS pointer -if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } -else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } -$mask80=$Tbl2; -$mask1b=$Tbl3; - -$code.=<<___; -.machine "any" -.text - -.align 7 -LAES_Te: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8` - mtlr r0 - blr - .space `32-24` -LAES_Td: - mflr r0 - bcl 20,31,\$+4 - mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry - addi $Tbl0,$Tbl0,`128-8-32+2048+256` - mtlr r0 - blr - .space `128-32-24` -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - - -.globl .AES_encrypt -.align 7 -.AES_encrypt: - mflr r0 - $STU $sp,-$FRAME($sp) - - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Te - bl Lppc_AES_encrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - - $POP r0,`$FRAME-$SIZE_T*21`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - -.align 4 -Lppc_AES_encrypt: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,3 - addi $Tbl2,$Tbl0,2 - addi $Tbl3,$Tbl0,1 - addi $acc00,$acc00,-1 - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Lenc_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s1,`32-16+3`,21,28 - rlwinm $acc05,$s2,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc06,$s3,`32-16+3`,21,28 - rlwinm $acc07,$s0,`32-16+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc08,$s2,`32-8+3`,21,28 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc10,$s0,`32-8+3`,21,28 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc12,$s3,`0+3`,21,28 - rlwinm $acc13,$s0,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s1,`0+3`,21,28 - rlwinm $acc15,$s2,`0+3`,21,28 - lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 - xor $t0,$t0,$acc00 - xor $t1,$t1,$acc01 - lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 - xor $t2,$t2,$acc02 - xor $t3,$t3,$acc03 - lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 - xor $t0,$t0,$acc04 - xor $t1,$t1,$acc05 - xor $t2,$t2,$acc06 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Lenc_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 - lwz $acc09,`2048+32`($Tbl0) - lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) - lwz $acc08,`2048+128`($Tbl0) - lwz $acc09,`2048+160`($Tbl0) - lwz $acc10,`2048+192`($Tbl0) - lwz $acc11,`2048+224`($Tbl0) - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s1,`32-16`,24,31 - rlwinm $acc05,$s2,`32-16`,24,31 - lbzx $acc00,$Tbl2,$acc00 - lbzx $acc01,$Tbl2,$acc01 - rlwinm $acc06,$s3,`32-16`,24,31 - rlwinm $acc07,$s0,`32-16`,24,31 - lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $acc12,$s3,`0`,24,31 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s1,`0`,24,31 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.align 4 -Lppc_AES_encrypt_compact: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,2048 - lis $mask80,0x8080 - lis $mask1b,0x1b1b - addi $key,$key,16 - ori $mask80,$mask80,0x8080 - ori $mask1b,$mask1b,0x1b1b - mtctr $acc00 -.align 4 -Lenc_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc04,$s1,`32-16`,24,31 - rlwinm $acc05,$s2,`32-16`,24,31 - lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc06,$s3,`32-16`,24,31 - rlwinm $acc07,$s0,`32-16`,24,31 - lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $acc12,$s3,`0`,24,31 - rlwinm $acc13,$s0,`0`,24,31 - lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $acc14,$s1,`0`,24,31 - rlwinm $acc15,$s2,`0`,24,31 - lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - lwz $t1,4($key) - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - lwz $t2,8($key) - lwz $t3,12($key) - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Lenc_compact_done - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc09,$s1,$mask80 - andc $acc10,$s2,$mask80 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - - rotlwi $acc12,$s0,16 # ROTATE(r0,16) - rotlwi $acc13,$s1,16 - rotlwi $acc14,$s2,16 - rotlwi $acc15,$s3,16 - xor $s0,$s0,$acc00 # r0^r2 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) - rotrwi $s1,$s1,24 - rotrwi $s2,$s2,24 - rotrwi $s3,$s3,24 - xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotlwi $acc08,$acc12,8 # ROTATE(r0,24) - rotlwi $acc09,$acc13,8 - rotlwi $acc10,$acc14,8 - rotlwi $acc11,$acc15,8 - xor $s0,$s0,$acc12 # - xor $s1,$s1,$acc13 - xor $s2,$s2,$acc14 - xor $s3,$s3,$acc15 - xor $s0,$s0,$acc08 # - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Lenc_compact_loop -.align 4 -Lenc_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.globl .AES_decrypt -.align 7 -.AES_decrypt: - mflr r0 - $STU $sp,-$FRAME($sp) - - $PUSH r0,`$FRAME-$SIZE_T*21`($sp) - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) - $PUSH r14,`$FRAME-$SIZE_T*18`($sp) - $PUSH r15,`$FRAME-$SIZE_T*17`($sp) - $PUSH r16,`$FRAME-$SIZE_T*16`($sp) - $PUSH r17,`$FRAME-$SIZE_T*15`($sp) - $PUSH r18,`$FRAME-$SIZE_T*14`($sp) - $PUSH r19,`$FRAME-$SIZE_T*13`($sp) - $PUSH r20,`$FRAME-$SIZE_T*12`($sp) - $PUSH r21,`$FRAME-$SIZE_T*11`($sp) - $PUSH r22,`$FRAME-$SIZE_T*10`($sp) - $PUSH r23,`$FRAME-$SIZE_T*9`($sp) - $PUSH r24,`$FRAME-$SIZE_T*8`($sp) - $PUSH r25,`$FRAME-$SIZE_T*7`($sp) - $PUSH r26,`$FRAME-$SIZE_T*6`($sp) - $PUSH r27,`$FRAME-$SIZE_T*5`($sp) - $PUSH r28,`$FRAME-$SIZE_T*4`($sp) - $PUSH r29,`$FRAME-$SIZE_T*3`($sp) - $PUSH r30,`$FRAME-$SIZE_T*2`($sp) - $PUSH r31,`$FRAME-$SIZE_T*1`($sp) - - lwz $s0,0($inp) - lwz $s1,4($inp) - lwz $s2,8($inp) - lwz $s3,12($inp) - bl LAES_Td - bl Lppc_AES_decrypt_compact - stw $s0,0($out) - stw $s1,4($out) - stw $s2,8($out) - stw $s3,12($out) - - $POP r0,`$FRAME-$SIZE_T*21`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) - $POP r14,`$FRAME-$SIZE_T*18`($sp) - $POP r15,`$FRAME-$SIZE_T*17`($sp) - $POP r16,`$FRAME-$SIZE_T*16`($sp) - $POP r17,`$FRAME-$SIZE_T*15`($sp) - $POP r18,`$FRAME-$SIZE_T*14`($sp) - $POP r19,`$FRAME-$SIZE_T*13`($sp) - $POP r20,`$FRAME-$SIZE_T*12`($sp) - $POP r21,`$FRAME-$SIZE_T*11`($sp) - $POP r22,`$FRAME-$SIZE_T*10`($sp) - $POP r23,`$FRAME-$SIZE_T*9`($sp) - $POP r24,`$FRAME-$SIZE_T*8`($sp) - $POP r25,`$FRAME-$SIZE_T*7`($sp) - $POP r26,`$FRAME-$SIZE_T*6`($sp) - $POP r27,`$FRAME-$SIZE_T*5`($sp) - $POP r28,`$FRAME-$SIZE_T*4`($sp) - $POP r29,`$FRAME-$SIZE_T*3`($sp) - $POP r30,`$FRAME-$SIZE_T*2`($sp) - $POP r31,`$FRAME-$SIZE_T*1`($sp) - mtlr r0 - addi $sp,$sp,$FRAME - blr - -.align 4 -Lppc_AES_decrypt: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,3 - addi $Tbl2,$Tbl0,2 - addi $Tbl3,$Tbl0,1 - addi $acc00,$acc00,-1 - addi $key,$key,16 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - mtctr $acc00 -.align 4 -Ldec_loop: - rlwinm $acc00,$s0,`32-24+3`,21,28 - rlwinm $acc01,$s1,`32-24+3`,21,28 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24+3`,21,28 - rlwinm $acc03,$s3,`32-24+3`,21,28 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s3,`32-16+3`,21,28 - rlwinm $acc05,$s0,`32-16+3`,21,28 - lwzx $acc00,$Tbl0,$acc00 - lwzx $acc01,$Tbl0,$acc01 - rlwinm $acc06,$s1,`32-16+3`,21,28 - rlwinm $acc07,$s2,`32-16+3`,21,28 - lwzx $acc02,$Tbl0,$acc02 - lwzx $acc03,$Tbl0,$acc03 - rlwinm $acc08,$s2,`32-8+3`,21,28 - rlwinm $acc09,$s3,`32-8+3`,21,28 - lwzx $acc04,$Tbl1,$acc04 - lwzx $acc05,$Tbl1,$acc05 - rlwinm $acc10,$s0,`32-8+3`,21,28 - rlwinm $acc11,$s1,`32-8+3`,21,28 - lwzx $acc06,$Tbl1,$acc06 - lwzx $acc07,$Tbl1,$acc07 - rlwinm $acc12,$s1,`0+3`,21,28 - rlwinm $acc13,$s2,`0+3`,21,28 - lwzx $acc08,$Tbl2,$acc08 - lwzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s3,`0+3`,21,28 - rlwinm $acc15,$s0,`0+3`,21,28 - lwzx $acc10,$Tbl2,$acc10 - lwzx $acc11,$Tbl2,$acc11 - xor $t0,$t0,$acc00 - xor $t1,$t1,$acc01 - lwzx $acc12,$Tbl3,$acc12 - lwzx $acc13,$Tbl3,$acc13 - xor $t2,$t2,$acc02 - xor $t3,$t3,$acc03 - lwzx $acc14,$Tbl3,$acc14 - lwzx $acc15,$Tbl3,$acc15 - xor $t0,$t0,$acc04 - xor $t1,$t1,$acc05 - xor $t2,$t2,$acc06 - xor $t3,$t3,$acc07 - xor $t0,$t0,$acc08 - xor $t1,$t1,$acc09 - xor $t2,$t2,$acc10 - xor $t3,$t3,$acc11 - xor $s0,$t0,$acc12 - xor $s1,$t1,$acc13 - xor $s2,$t2,$acc14 - xor $s3,$t3,$acc15 - addi $key,$key,16 - bdnz- Ldec_loop - - addi $Tbl2,$Tbl0,2048 - nop - lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 - lwz $acc09,`2048+32`($Tbl0) - lwz $acc10,`2048+64`($Tbl0) - lwz $acc11,`2048+96`($Tbl0) - lwz $acc08,`2048+128`($Tbl0) - lwz $acc09,`2048+160`($Tbl0) - lwz $acc10,`2048+192`($Tbl0) - lwz $acc11,`2048+224`($Tbl0) - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - lwz $t0,0($key) - lwz $t1,4($key) - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lwz $t2,8($key) - lwz $t3,12($key) - rlwinm $acc04,$s3,`32-16`,24,31 - rlwinm $acc05,$s0,`32-16`,24,31 - lbzx $acc00,$Tbl2,$acc00 - lbzx $acc01,$Tbl2,$acc01 - rlwinm $acc06,$s1,`32-16`,24,31 - rlwinm $acc07,$s2,`32-16`,24,31 - lbzx $acc02,$Tbl2,$acc02 - lbzx $acc03,$Tbl2,$acc03 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc04,$Tbl2,$acc04 - lbzx $acc05,$Tbl2,$acc05 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc06,$Tbl2,$acc06 - lbzx $acc07,$Tbl2,$acc07 - rlwinm $acc12,$s1,`0`,24,31 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc08,$Tbl2,$acc08 - lbzx $acc09,$Tbl2,$acc09 - rlwinm $acc14,$s3,`0`,24,31 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc10,$Tbl2,$acc10 - lbzx $acc11,$Tbl2,$acc11 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc12,$Tbl2,$acc12 - lbzx $acc13,$Tbl2,$acc13 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - lbzx $acc14,$Tbl2,$acc14 - lbzx $acc15,$Tbl2,$acc15 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr - -.align 4 -Lppc_AES_decrypt_compact: - lwz $acc00,240($key) - lwz $t0,0($key) - lwz $t1,4($key) - lwz $t2,8($key) - lwz $t3,12($key) - addi $Tbl1,$Tbl0,2048 - lis $mask80,0x8080 - lis $mask1b,0x1b1b - addi $key,$key,16 - ori $mask80,$mask80,0x8080 - ori $mask1b,$mask1b,0x1b1b -___ -$code.=<<___ if ($SIZE_T==8); - insrdi $mask80,$mask80,32,0 - insrdi $mask1b,$mask1b,32,0 -___ -$code.=<<___; - mtctr $acc00 -.align 4 -Ldec_compact_loop: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - rlwinm $acc00,$s0,`32-24`,24,31 - rlwinm $acc01,$s1,`32-24`,24,31 - rlwinm $acc02,$s2,`32-24`,24,31 - rlwinm $acc03,$s3,`32-24`,24,31 - lbzx $acc00,$Tbl1,$acc00 - lbzx $acc01,$Tbl1,$acc01 - rlwinm $acc04,$s3,`32-16`,24,31 - rlwinm $acc05,$s0,`32-16`,24,31 - lbzx $acc02,$Tbl1,$acc02 - lbzx $acc03,$Tbl1,$acc03 - rlwinm $acc06,$s1,`32-16`,24,31 - rlwinm $acc07,$s2,`32-16`,24,31 - lbzx $acc04,$Tbl1,$acc04 - lbzx $acc05,$Tbl1,$acc05 - rlwinm $acc08,$s2,`32-8`,24,31 - rlwinm $acc09,$s3,`32-8`,24,31 - lbzx $acc06,$Tbl1,$acc06 - lbzx $acc07,$Tbl1,$acc07 - rlwinm $acc10,$s0,`32-8`,24,31 - rlwinm $acc11,$s1,`32-8`,24,31 - lbzx $acc08,$Tbl1,$acc08 - lbzx $acc09,$Tbl1,$acc09 - rlwinm $acc12,$s1,`0`,24,31 - rlwinm $acc13,$s2,`0`,24,31 - lbzx $acc10,$Tbl1,$acc10 - lbzx $acc11,$Tbl1,$acc11 - rlwinm $acc14,$s3,`0`,24,31 - rlwinm $acc15,$s0,`0`,24,31 - lbzx $acc12,$Tbl1,$acc12 - lbzx $acc13,$Tbl1,$acc13 - rlwinm $s0,$acc00,24,0,7 - rlwinm $s1,$acc01,24,0,7 - lbzx $acc14,$Tbl1,$acc14 - lbzx $acc15,$Tbl1,$acc15 - rlwinm $s2,$acc02,24,0,7 - rlwinm $s3,$acc03,24,0,7 - rlwimi $s0,$acc04,16,8,15 - rlwimi $s1,$acc05,16,8,15 - rlwimi $s2,$acc06,16,8,15 - rlwimi $s3,$acc07,16,8,15 - rlwimi $s0,$acc08,8,16,23 - rlwimi $s1,$acc09,8,16,23 - rlwimi $s2,$acc10,8,16,23 - rlwimi $s3,$acc11,8,16,23 - lwz $t0,0($key) - lwz $t1,4($key) - or $s0,$s0,$acc12 - or $s1,$s1,$acc13 - lwz $t2,8($key) - lwz $t3,12($key) - or $s2,$s2,$acc14 - or $s3,$s3,$acc15 - - addi $key,$key,16 - bdz Ldec_compact_done -___ -$code.=<<___ if ($SIZE_T==8); - # vectorized permutation improves decrypt performance by 10% - insrdi $s0,$s1,32,0 - insrdi $s2,$s3,32,0 - - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc02,$s2,$mask80 - srdi $acc04,$acc00,7 # r1>>7 - srdi $acc06,$acc02,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc10,$s2,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc02,$acc02,$acc06 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc10,$acc10,$acc10 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc02,$acc02,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc02,$acc02,$acc10 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc06,$acc02,$mask80 - srdi $acc08,$acc04,7 # r1>>7 - srdi $acc10,$acc06,7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - andc $acc14,$acc02,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc06,$acc06,$acc10 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc06,$acc06,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc06,$acc06,$acc14 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc10,$acc06,$mask80 - srdi $acc12,$acc08,7 # r1>>7 - srdi $acc14,$acc10,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - sub $acc10,$acc10,$acc14 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc14,$acc06,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc14,$acc14,$acc14 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc10,$acc10,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc10,$acc10,$acc14 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc02,$acc02,$s2 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc06,$acc06,$s2 - - extrdi $acc01,$acc00,32,0 - extrdi $acc03,$acc02,32,0 - extrdi $acc05,$acc04,32,0 - extrdi $acc07,$acc06,32,0 - extrdi $acc09,$acc08,32,0 - extrdi $acc11,$acc10,32,0 -___ -$code.=<<___ if ($SIZE_T==4); - and $acc00,$s0,$mask80 # r1=r0&0x80808080 - and $acc01,$s1,$mask80 - and $acc02,$s2,$mask80 - and $acc03,$s3,$mask80 - srwi $acc04,$acc00,7 # r1>>7 - srwi $acc05,$acc01,7 - srwi $acc06,$acc02,7 - srwi $acc07,$acc03,7 - andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f - andc $acc09,$s1,$mask80 - andc $acc10,$s2,$mask80 - andc $acc11,$s3,$mask80 - sub $acc00,$acc00,$acc04 # r1-(r1>>7) - sub $acc01,$acc01,$acc05 - sub $acc02,$acc02,$acc06 - sub $acc03,$acc03,$acc07 - add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 - add $acc09,$acc09,$acc09 - add $acc10,$acc10,$acc10 - add $acc11,$acc11,$acc11 - and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc01,$acc01,$mask1b - and $acc02,$acc02,$mask1b - and $acc03,$acc03,$mask1b - xor $acc00,$acc00,$acc08 # r2 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - - and $acc04,$acc00,$mask80 # r1=r2&0x80808080 - and $acc05,$acc01,$mask80 - and $acc06,$acc02,$mask80 - and $acc07,$acc03,$mask80 - srwi $acc08,$acc04,7 # r1>>7 - srwi $acc09,$acc05,7 - srwi $acc10,$acc06,7 - srwi $acc11,$acc07,7 - andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f - andc $acc13,$acc01,$mask80 - andc $acc14,$acc02,$mask80 - andc $acc15,$acc03,$mask80 - sub $acc04,$acc04,$acc08 # r1-(r1>>7) - sub $acc05,$acc05,$acc09 - sub $acc06,$acc06,$acc10 - sub $acc07,$acc07,$acc11 - add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc05,$acc05,$mask1b - and $acc06,$acc06,$mask1b - and $acc07,$acc07,$mask1b - xor $acc04,$acc04,$acc12 # r4 - xor $acc05,$acc05,$acc13 - xor $acc06,$acc06,$acc14 - xor $acc07,$acc07,$acc15 - - and $acc08,$acc04,$mask80 # r1=r4&0x80808080 - and $acc09,$acc05,$mask80 - and $acc10,$acc06,$mask80 - and $acc11,$acc07,$mask80 - srwi $acc12,$acc08,7 # r1>>7 - srwi $acc13,$acc09,7 - srwi $acc14,$acc10,7 - srwi $acc15,$acc11,7 - sub $acc08,$acc08,$acc12 # r1-(r1>>7) - sub $acc09,$acc09,$acc13 - sub $acc10,$acc10,$acc14 - sub $acc11,$acc11,$acc15 - andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f - andc $acc13,$acc05,$mask80 - andc $acc14,$acc06,$mask80 - andc $acc15,$acc07,$mask80 - add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 - add $acc13,$acc13,$acc13 - add $acc14,$acc14,$acc14 - add $acc15,$acc15,$acc15 - and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b - and $acc09,$acc09,$mask1b - and $acc10,$acc10,$mask1b - and $acc11,$acc11,$mask1b - xor $acc08,$acc08,$acc12 # r8 - xor $acc09,$acc09,$acc13 - xor $acc10,$acc10,$acc14 - xor $acc11,$acc11,$acc15 - - xor $acc00,$acc00,$s0 # r2^r0 - xor $acc01,$acc01,$s1 - xor $acc02,$acc02,$s2 - xor $acc03,$acc03,$s3 - xor $acc04,$acc04,$s0 # r4^r0 - xor $acc05,$acc05,$s1 - xor $acc06,$acc06,$s2 - xor $acc07,$acc07,$s3 -___ -$code.=<<___; - rotrwi $s0,$s0,8 # = ROTATE(r0,8) - rotrwi $s1,$s1,8 - rotrwi $s2,$s2,8 - rotrwi $s3,$s3,8 - xor $s0,$s0,$acc00 # ^= r2^r0 - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - xor $acc00,$acc00,$acc08 - xor $acc01,$acc01,$acc09 - xor $acc02,$acc02,$acc10 - xor $acc03,$acc03,$acc11 - xor $s0,$s0,$acc04 # ^= r4^r0 - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 - rotrwi $acc00,$acc00,24 - rotrwi $acc01,$acc01,24 - rotrwi $acc02,$acc02,24 - rotrwi $acc03,$acc03,24 - xor $acc04,$acc04,$acc08 - xor $acc05,$acc05,$acc09 - xor $acc06,$acc06,$acc10 - xor $acc07,$acc07,$acc11 - xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - rotrwi $acc04,$acc04,16 - rotrwi $acc05,$acc05,16 - rotrwi $acc06,$acc06,16 - rotrwi $acc07,$acc07,16 - xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) - xor $s1,$s1,$acc01 - xor $s2,$s2,$acc02 - xor $s3,$s3,$acc03 - rotrwi $acc08,$acc08,8 - rotrwi $acc09,$acc09,8 - rotrwi $acc10,$acc10,8 - rotrwi $acc11,$acc11,8 - xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) - xor $s1,$s1,$acc05 - xor $s2,$s2,$acc06 - xor $s3,$s3,$acc07 - xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) - xor $s1,$s1,$acc09 - xor $s2,$s2,$acc10 - xor $s3,$s3,$acc11 - - b Ldec_compact_loop -.align 4 -Ldec_compact_done: - xor $s0,$s0,$t0 - xor $s1,$s1,$t1 - xor $s2,$s2,$t2 - xor $s3,$s3,$t3 - blr -.long 0 -.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" -.align 7 -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl deleted file mode 100644 index 4b27afd..0000000 --- a/crypto/aes/asm/aes-s390x.pl +++ /dev/null @@ -1,1333 +0,0 @@ -#!/usr/bin/env perl - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# AES for s390x. - -# April 2007. -# -# Software performance improvement over gcc-generated code is ~70% and -# in absolute terms is ~73 cycles per byte processed with 128-bit key. -# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are -# *strictly* in-order execution and issued instruction [in this case -# load value from memory is critical] has to complete before execution -# flow proceeds. S-boxes are compressed to 2KB[+256B]. -# -# As for hardware acceleration support. It's basically a "teaser," as -# it can and should be improved in several ways. Most notably support -# for CBC is not utilized, nor multiple blocks are ever processed. -# Then software key schedule can be postponed till hardware support -# detection... Performance improvement over assembler is reportedly -# ~2.5x, but can reach >8x [naturally on larger chunks] if proper -# support is implemented. - -# May 2007. -# -# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided -# for 128-bit keys, if hardware support is detected. - -# Januray 2009. -# -# Add support for hardware AES192/256 and reschedule instructions to -# minimize/avoid Address Generation Interlock hazard and to favour -# dual-issue z10 pipeline. This gave ~25% improvement on z10 and -# almost 50% on z9. The gain is smaller on z10, because being dual- -# issue z10 makes it improssible to eliminate the interlock condition: -# critial path is not long enough. Yet it spends ~24 cycles per byte -# processed with 128-bit key. -# -# Unlike previous version hardware support detection takes place only -# at the moment of key schedule setup, which is denoted in key->rounds. -# This is done, because deferred key setup can't be made MT-safe, not -# for key lengthes longer than 128 bits. -# -# Add AES_cbc_encrypt, which gives incredible performance improvement, -# it was measured to be ~6.6x. It's less than previously mentioned 8x, -# because software implementation was optimized. - -$softonly=0; # allow hardware support - -$t0="%r0"; $mask="%r0"; -$t1="%r1"; -$t2="%r2"; $inp="%r2"; -$t3="%r3"; $out="%r3"; $bits="%r3"; -$key="%r4"; -$i1="%r5"; -$i2="%r6"; -$i3="%r7"; -$s0="%r8"; -$s1="%r9"; -$s2="%r10"; -$s3="%r11"; -$tbl="%r12"; -$rounds="%r13"; -$ra="%r14"; -$sp="%r15"; - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code=<<___; -.text - -.type AES_Te,\@object -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; -# Te4[256] -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -# rcon[] -.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 -.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 -.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 -.align 256 -.size AES_Te,.-AES_Te - -# void AES_encrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_encrypt -.type AES_encrypt,\@function -AES_encrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Lesoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Lesoft: -___ -$code.=<<___; - stmg %r3,$ra,24($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Te - bras $ra,_s390x_AES_encrypt - - lg $out,24($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lmg %r6,$ra,48($sp) - br $ra -.size AES_encrypt,.-AES_encrypt - -.type _s390x_AES_encrypt,\@function -.align 16 -_s390x_AES_encrypt: - stg $ra,152($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Lenc_loop -.align 16 -.Lenc_loop: - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - ngr $t1,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - ngr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Te0[s0>>24] - l $t1,1($t1,$tbl) # Te3[s0>>0] - l $t2,2($t2,$tbl) # Te2[s0>>8] - l $t3,3($t3,$tbl) # Te1[s0>>16] - - x $s0,3($i1,$tbl) # Te1[s1>>16] - l $s1,0($s1,$tbl) # Te0[s1>>24] - x $t2,1($i2,$tbl) # Te3[s1>>0] - x $t3,2($i3,$tbl) # Te2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - nr $s2,$mask - ngr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - sllg $t1,$s3,`0+3` # i0 - nr $ra,$mask - la $key,16($key) - ngr $t1,$mask - - x $s0,2($i1,$tbl) # Te2[s2>>8] - x $s1,3($i2,$tbl) # Te1[s2>>16] - l $s2,0($s2,$tbl) # Te0[s2>>24] - x $t3,1($i3,$tbl) # Te3[s2>>0] - - srlg $i3,$s3,`16-3` # i2 - xr $s2,$t2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,1($t1,$tbl) # Te3[s3>>0] - x $s1,2($ra,$tbl) # Te2[s3>>8] - x $s2,3($i3,$tbl) # Te1[s3>>16] - l $s3,0($s3,$tbl) # Te0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Lenc_loop - .align 16 - - sllg $t1,$s0,`0+3` - srlg $t2,$s0,`8-3` - ngr $t1,$mask - srlg $t3,$s0,`16-3` - srl $s0,`24-3` - nr $s0,$mask - nr $t2,$mask - nr $t3,$mask - - srlg $i1,$s1,`16-3` # i0 - sllg $i2,$s1,`0+3` - ngr $i2,$mask - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - nr $i1,$mask - nr $s1,$mask - nr $i3,$mask - - llgc $s0,2($s0,$tbl) # Te4[s0>>24] - llgc $t1,2($t1,$tbl) # Te4[s0>>0] - sll $s0,24 - llgc $t2,2($t2,$tbl) # Te4[s0>>8] - llgc $t3,2($t3,$tbl) # Te4[s0>>16] - sll $t2,8 - sll $t3,16 - - llgc $i1,2($i1,$tbl) # Te4[s1>>16] - llgc $s1,2($s1,$tbl) # Te4[s1>>24] - llgc $i2,2($i2,$tbl) # Te4[s1>>0] - llgc $i3,2($i3,$tbl) # Te4[s1>>8] - sll $i1,16 - sll $s1,24 - sll $i3,8 - or $s0,$i1 - or $s1,$t1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,`8-3` # i0 - srlg $i2,$s2,`16-3` # i1 - nr $i1,$mask - nr $i2,$mask - sllg $i3,$s2,`0+3` - srl $s2,`24-3` - ngr $i3,$mask - nr $s2,$mask - - sllg $t1,$s3,`0+3` # i0 - srlg $ra,$s3,`8-3` # i1 - ngr $t1,$mask - - llgc $i1,2($i1,$tbl) # Te4[s2>>8] - llgc $i2,2($i2,$tbl) # Te4[s2>>16] - sll $i1,8 - llgc $s2,2($s2,$tbl) # Te4[s2>>24] - llgc $i3,2($i3,$tbl) # Te4[s2>>0] - sll $i2,16 - nr $ra,$mask - sll $s2,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $t3,$i3 - - srlg $i3,$s3,`16-3` # i2 - srl $s3,`24-3` - nr $i3,$mask - nr $s3,$mask - - l $t0,16($key) - l $t2,20($key) - - llgc $i1,2($t1,$tbl) # Te4[s3>>0] - llgc $i2,2($ra,$tbl) # Te4[s3>>8] - llgc $i3,2($i3,$tbl) # Te4[s3>>16] - llgc $s3,2($s3,$tbl) # Te4[s3>>24] - sll $i2,8 - sll $i3,16 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$i3 - or $s3,$t3 - - lg $ra,152($sp) - xr $s0,$t0 - xr $s1,$t2 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_encrypt,.-_s390x_AES_encrypt -___ - -$code.=<<___; -.type AES_Td,\@object -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; -# Td4[256] -.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 -.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb -.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 -.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb -.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d -.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e -.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 -.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 -.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 -.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 -.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda -.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 -.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a -.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 -.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 -.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b -.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea -.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 -.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 -.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e -.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 -.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b -.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 -.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 -.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 -.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f -.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d -.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef -.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 -.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 -.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 -.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.size AES_Td,.-AES_Td - -# void AES_decrypt(const unsigned char *inp, unsigned char *out, -# const AES_KEY *key) { -.globl AES_decrypt -.type AES_decrypt,\@function -AES_decrypt: -___ -$code.=<<___ if (!$softonly); - l %r0,240($key) - lhi %r1,16 - clr %r0,%r1 - jl .Ldsoft - - la %r1,0($key) - #la %r2,0($inp) - la %r4,0($out) - lghi %r3,16 # single block length - .long 0xb92e0042 # km %r4,%r2 - brc 1,.-4 # can this happen? - br %r14 -.align 64 -.Ldsoft: -___ -$code.=<<___; - stmg %r3,$ra,24($sp) - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - - larl $tbl,AES_Td - bras $ra,_s390x_AES_decrypt - - lg $out,24($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - lmg %r6,$ra,48($sp) - br $ra -.size AES_decrypt,.-AES_decrypt - -.type _s390x_AES_decrypt,\@function -.align 16 -_s390x_AES_decrypt: - stg $ra,152($sp) - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $s3,12($key) - l $rounds,240($key) - llill $mask,`0xff<<3` - aghi $rounds,-1 - j .Ldec_loop -.align 16 -.Ldec_loop: - srlg $t1,$s0,`16-3` - srlg $t2,$s0,`8-3` - sllg $t3,$s0,`0+3` - srl $s0,`24-3` - nr $s0,$mask - nr $t1,$mask - nr $t2,$mask - ngr $t3,$mask - - sllg $i1,$s1,`0+3` # i0 - srlg $i2,$s1,`16-3` - srlg $i3,$s1,`8-3` - srl $s1,`24-3` - ngr $i1,$mask - nr $s1,$mask - nr $i2,$mask - nr $i3,$mask - - l $s0,0($s0,$tbl) # Td0[s0>>24] - l $t1,3($t1,$tbl) # Td1[s0>>16] - l $t2,2($t2,$tbl) # Td2[s0>>8] - l $t3,1($t3,$tbl) # Td3[s0>>0] - - x $s0,1($i1,$tbl) # Td3[s1>>0] - l $s1,0($s1,$tbl) # Td0[s1>>24] - x $t2,3($i2,$tbl) # Td1[s1>>16] - x $t3,2($i3,$tbl) # Td2[s1>>8] - - srlg $i1,$s2,`8-3` # i0 - sllg $i2,$s2,`0+3` # i1 - srlg $i3,$s2,`16-3` - srl $s2,`24-3` - nr $i1,$mask - ngr $i2,$mask - nr $s2,$mask - nr $i3,$mask - - xr $s1,$t1 - srlg $ra,$s3,`8-3` # i1 - srlg $t1,$s3,`16-3` # i0 - nr $ra,$mask - la $key,16($key) - nr $t1,$mask - - x $s0,2($i1,$tbl) # Td2[s2>>8] - x $s1,1($i2,$tbl) # Td3[s2>>0] - l $s2,0($s2,$tbl) # Td0[s2>>24] - x $t3,3($i3,$tbl) # Td1[s2>>16] - - sllg $i3,$s3,`0+3` # i2 - srl $s3,`24-3` - ngr $i3,$mask - nr $s3,$mask - - xr $s2,$t2 - x $s0,0($key) - x $s1,4($key) - x $s2,8($key) - x $t3,12($key) - - x $s0,3($t1,$tbl) # Td1[s3>>16] - x $s1,2($ra,$tbl) # Td2[s3>>8] - x $s2,1($i3,$tbl) # Td3[s3>>0] - l $s3,0($s3,$tbl) # Td0[s3>>24] - xr $s3,$t3 - - brct $rounds,.Ldec_loop - .align 16 - - l $t1,`2048+0`($tbl) # prefetch Td4 - l $t2,`2048+64`($tbl) - l $t3,`2048+128`($tbl) - l $i1,`2048+192`($tbl) - llill $mask,0xff - - srlg $i3,$s0,24 # i0 - srlg $t1,$s0,16 - srlg $t2,$s0,8 - nr $s0,$mask # i3 - nr $t1,$mask - - srlg $i1,$s1,24 - nr $t2,$mask - srlg $i2,$s1,16 - srlg $ra,$s1,8 - nr $s1,$mask # i0 - nr $i2,$mask - nr $ra,$mask - - llgc $i3,2048($i3,$tbl) # Td4[s0>>24] - llgc $t1,2048($t1,$tbl) # Td4[s0>>16] - llgc $t2,2048($t2,$tbl) # Td4[s0>>8] - sll $t1,16 - llgc $t3,2048($s0,$tbl) # Td4[s0>>0] - sllg $s0,$i3,24 - sll $t2,8 - - llgc $s1,2048($s1,$tbl) # Td4[s1>>0] - llgc $i1,2048($i1,$tbl) # Td4[s1>>24] - llgc $i2,2048($i2,$tbl) # Td4[s1>>16] - sll $i1,24 - llgc $i3,2048($ra,$tbl) # Td4[s1>>8] - sll $i2,16 - sll $i3,8 - or $s0,$s1 - or $t1,$i1 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s2,8 # i0 - srlg $i2,$s2,24 - srlg $i3,$s2,16 - nr $s2,$mask # i1 - nr $i1,$mask - nr $i3,$mask - llgc $i1,2048($i1,$tbl) # Td4[s2>>8] - llgc $s1,2048($s2,$tbl) # Td4[s2>>0] - llgc $i2,2048($i2,$tbl) # Td4[s2>>24] - llgc $i3,2048($i3,$tbl) # Td4[s2>>16] - sll $i1,8 - sll $i2,24 - or $s0,$i1 - sll $i3,16 - or $t2,$i2 - or $t3,$i3 - - srlg $i1,$s3,16 # i0 - srlg $i2,$s3,8 # i1 - srlg $i3,$s3,24 - nr $s3,$mask # i2 - nr $i1,$mask - nr $i2,$mask - - lg $ra,152($sp) - or $s1,$t1 - l $t0,16($key) - l $t1,20($key) - - llgc $i1,2048($i1,$tbl) # Td4[s3>>16] - llgc $i2,2048($i2,$tbl) # Td4[s3>>8] - sll $i1,16 - llgc $s2,2048($s3,$tbl) # Td4[s3>>0] - llgc $s3,2048($i3,$tbl) # Td4[s3>>24] - sll $i2,8 - sll $s3,24 - or $s0,$i1 - or $s1,$i2 - or $s2,$t2 - or $s3,$t3 - - xr $s0,$t0 - xr $s1,$t1 - x $s2,24($key) - x $s3,28($key) - - br $ra -.size _s390x_AES_decrypt,.-_s390x_AES_decrypt -___ - -$code.=<<___; -# void AES_set_encrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl AES_set_encrypt_key -.type AES_set_encrypt_key,\@function -.align 16 -AES_set_encrypt_key: - lghi $t0,0 - clgr $inp,$t0 - je .Lminus1 - clgr $key,$t0 - je .Lminus1 - - lghi $t0,128 - clr $bits,$t0 - je .Lproceed - lghi $t0,192 - clr $bits,$t0 - je .Lproceed - lghi $t0,256 - clr $bits,$t0 - je .Lproceed - lghi %r2,-2 - br %r14 - -.align 16 -.Lproceed: -___ -$code.=<<___ if (!$softonly); - # convert bits to km code, [128,192,256]->[18,19,20] - lhi %r5,-128 - lhi %r0,18 - ar %r5,$bits - srl %r5,6 - ar %r5,%r0 - - lghi %r0,0 # query capability vector - la %r1,16($sp) - .long 0xb92f0042 # kmc %r4,%r2 - - llihh %r1,0x8000 - srlg %r1,%r1,0(%r5) - ng %r1,16($sp) - jz .Lekey_internal - - lmg %r0,%r1,0($inp) # just copy 128 bits... - stmg %r0,%r1,0($key) - lhi %r0,192 - cr $bits,%r0 - jl 1f - lg %r1,16($inp) - stg %r1,16($key) - je 1f - lg %r1,24($inp) - stg %r1,24($key) -1: st $bits,236($key) # save bits - st %r5,240($key) # save km code - lghi %r2,0 - br %r14 -___ -$code.=<<___; -.align 16 -.Lekey_internal: - stmg %r6,%r13,48($sp) # all non-volatile regs - - larl $tbl,AES_Te+2048 - - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - st $s0,0($key) - st $s1,4($key) - st $s2,8($key) - st $s3,12($key) - lghi $t0,128 - cr $bits,$t0 - jne .Lnot128 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,10 - st $rounds,240($key) - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t2,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L128_loop: - la $t2,0($t2,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 - icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 - icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 - icm $t2,1,0($i3) # Te4[rk[3]>>24] - x $t2,256($t3,$tbl) # rcon[i] - xr $s0,$t2 # rk[4]=rk[0]^... - xr $s1,$s0 # rk[5]=rk[1]^rk[4] - xr $s2,$s1 # rk[6]=rk[2]^rk[5] - xr $s3,$s2 # rk[7]=rk[3]^rk[6] - - llgfr $t2,$s3 # temp=rk[3] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - nr $t2,$mask - nr $i1,$mask - srlg $i3,$s3,24 - nr $i2,$mask - - st $s0,16($key) - st $s1,20($key) - st $s2,24($key) - st $s3,28($key) - la $key,16($key) # key+=4 - la $t3,4($t3) # i++ - brct $rounds,.L128_loop - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.Lnot128: - llgf $t0,16($inp) - llgf $t1,20($inp) - st $t0,16($key) - st $t1,20($key) - lghi $t0,192 - cr $bits,$t0 - jne .Lnot192 - - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,12 - st $rounds,240($key) - lghi $rounds,8 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L192_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[5]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[6]=rk[0]^... - xr $s1,$s0 # rk[7]=rk[1]^rk[6] - xr $s2,$s1 # rk[8]=rk[2]^rk[7] - xr $s3,$s2 # rk[9]=rk[3]^rk[8] - - st $s0,24($key) - st $s1,28($key) - st $s2,32($key) - st $s3,36($key) - brct $rounds,.L192_continue - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.L192_continue: - lgr $t1,$s3 - x $t1,16($key) # rk[10]=rk[4]^rk[9] - st $t1,40($key) - x $t1,20($key) # rk[11]=rk[5]^rk[10] - st $t1,44($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,24($key) # key+=6 - la $t3,4($t3) # i++ - j .L192_loop - -.align 16 -.Lnot192: - llgf $t0,24($inp) - llgf $t1,28($inp) - st $t0,24($key) - st $t1,28($key) - llill $mask,0xff - lghi $t3,0 # i=0 - lghi $rounds,14 - st $rounds,240($key) - lghi $rounds,7 - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - -.align 16 -.L256_loop: - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 - icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 - icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 - icm $t1,1,0($i3) # Te4[rk[7]>>24] - x $t1,256($t3,$tbl) # rcon[i] - xr $s0,$t1 # rk[8]=rk[0]^... - xr $s1,$s0 # rk[9]=rk[1]^rk[8] - xr $s2,$s1 # rk[10]=rk[2]^rk[9] - xr $s3,$s2 # rk[11]=rk[3]^rk[10] - st $s0,32($key) - st $s1,36($key) - st $s2,40($key) - st $s3,44($key) - brct $rounds,.L256_continue - lghi %r2,0 - lmg %r6,%r13,48($sp) - br $ra - -.align 16 -.L256_continue: - lgr $t1,$s3 # temp=rk[11] - srlg $i1,$s3,8 - srlg $i2,$s3,16 - srlg $i3,$s3,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - la $t1,0($t1,$tbl) - la $i1,0($i1,$tbl) - la $i2,0($i2,$tbl) - la $i3,0($i3,$tbl) - llgc $t1,0($t1) # Te4[rk[11]>>0] - icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 - icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 - icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 - x $t1,16($key) # rk[12]=rk[4]^... - st $t1,48($key) - x $t1,20($key) # rk[13]=rk[5]^rk[12] - st $t1,52($key) - x $t1,24($key) # rk[14]=rk[6]^rk[13] - st $t1,56($key) - x $t1,28($key) # rk[15]=rk[7]^rk[14] - st $t1,60($key) - - srlg $i1,$t1,8 - srlg $i2,$t1,16 - srlg $i3,$t1,24 - nr $t1,$mask - nr $i1,$mask - nr $i2,$mask - - la $key,32($key) # key+=8 - la $t3,4($t3) # i++ - j .L256_loop - -.Lminus1: - lghi %r2,-1 - br $ra -.size AES_set_encrypt_key,.-AES_set_encrypt_key - -# void AES_set_decrypt_key(const unsigned char *in, int bits, -# AES_KEY *key) { -.globl AES_set_decrypt_key -.type AES_set_decrypt_key,\@function -.align 16 -AES_set_decrypt_key: - stg $key,32($sp) # I rely on AES_set_encrypt_key to - stg $ra,112($sp) # save non-volatile registers! - bras $ra,AES_set_encrypt_key - lg $key,32($sp) - lg $ra,112($sp) - ltgr %r2,%r2 - bnzr $ra -___ -$code.=<<___ if (!$softonly); - l $t0,240($key) - lhi $t1,16 - cr $t0,$t1 - jl .Lgo - oill $t0,0x80 # set "decrypt" bit - st $t0,240($key) - br $ra - -.align 16 -.Ldkey_internal: - stg $key,32($sp) - stg $ra,40($sp) - bras $ra,.Lekey_internal - lg $key,32($sp) - lg $ra,40($sp) -___ -$code.=<<___; - -.Lgo: llgf $rounds,240($key) - la $i1,0($key) - sllg $i2,$rounds,4 - la $i2,0($i2,$key) - srl $rounds,1 - lghi $t1,-16 - -.align 16 -.Linv: lmg $s0,$s1,0($i1) - lmg $s2,$s3,0($i2) - stmg $s0,$s1,0($i2) - stmg $s2,$s3,0($i1) - la $i1,16($i1) - la $i2,0($t1,$i2) - brct $rounds,.Linv -___ -$mask80=$i1; -$mask1b=$i2; -$maskfe=$i3; -$code.=<<___; - llgf $rounds,240($key) - aghi $rounds,-1 - sll $rounds,2 # (rounds-1)*4 - llilh $mask80,0x8080 - llilh $mask1b,0x1b1b - llilh $maskfe,0xfefe - oill $mask80,0x8080 - oill $mask1b,0x1b1b - oill $maskfe,0xfefe - -.align 16 -.Lmix: l $s0,16($key) # tp1 - lr $s1,$s0 - ngr $s1,$mask80 - srlg $t1,$s1,7 - slr $s1,$t1 - nr $s1,$mask1b - sllg $t1,$s0,1 - nr $t1,$maskfe - xr $s1,$t1 # tp2 - - lr $s2,$s1 - ngr $s2,$mask80 - srlg $t1,$s2,7 - slr $s2,$t1 - nr $s2,$mask1b - sllg $t1,$s1,1 - nr $t1,$maskfe - xr $s2,$t1 # tp4 - - lr $s3,$s2 - ngr $s3,$mask80 - srlg $t1,$s3,7 - slr $s3,$t1 - nr $s3,$mask1b - sllg $t1,$s2,1 - nr $t1,$maskfe - xr $s3,$t1 # tp8 - - xr $s1,$s0 # tp2^tp1 - xr $s2,$s0 # tp4^tp1 - rll $s0,$s0,24 # = ROTATE(tp1,8) - xr $s2,$s3 # ^=tp8 - xr $s0,$s1 # ^=tp2^tp1 - xr $s1,$s3 # tp2^tp1^tp8 - xr $s0,$s2 # ^=tp4^tp1^tp8 - rll $s1,$s1,8 - rll $s2,$s2,16 - xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) - rll $s3,$s3,24 - xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) - xr $s0,$s3 # ^= ROTATE(tp8,8) - - st $s0,16($key) - la $key,4($key) - brct $rounds,.Lmix - - lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! - lghi %r2,0 - br $ra -.size AES_set_decrypt_key,.-AES_set_decrypt_key -___ - -#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, -# size_t length, const AES_KEY *key, -# unsigned char *ivec, const int enc) -{ -my $inp="%r2"; -my $out="%r4"; # length and out are swapped -my $len="%r3"; -my $key="%r5"; -my $ivp="%r6"; - -$code.=<<___; -.globl AES_cbc_encrypt -.type AES_cbc_encrypt,\@function -.align 16 -AES_cbc_encrypt: - xgr %r3,%r4 # flip %r3 and %r4, out and len - xgr %r4,%r3 - xgr %r3,%r4 -___ -$code.=<<___ if (!$softonly); - lhi %r0,16 - cl %r0,240($key) - jh .Lcbc_software - - lg %r0,0($ivp) # copy ivec - lg %r1,8($ivp) - stmg %r0,%r1,16($sp) - lmg %r0,%r1,0($key) # copy key, cover 256 bit - stmg %r0,%r1,32($sp) - lmg %r0,%r1,16($key) - stmg %r0,%r1,48($sp) - l %r0,240($key) # load kmc code - lghi $key,15 # res=len%16, len-=res; - ngr $key,$len - slgr $len,$key - la %r1,16($sp) # parameter block - ivec || key - jz .Lkmc_truncated - .long 0xb92f0042 # kmc %r4,%r2 - brc 1,.-4 # pay attention to "partial completion" - ltr $key,$key - jnz .Lkmc_truncated -.Lkmc_done: - lmg %r0,%r1,16($sp) # copy ivec to caller - stg %r0,0($ivp) - stg %r1,8($ivp) - br $ra -.align 16 -.Lkmc_truncated: - ahi $key,-1 # it's the way it's encoded in mvc - tmll %r0,0x80 - jnz .Lkmc_truncated_dec - lghi %r1,0 - stg %r1,128($sp) - stg %r1,136($sp) - bras %r1,1f - mvc 128(1,$sp),0($inp) -1: ex $key,0(%r1) - la %r1,16($sp) # restore parameter block - la $inp,128($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - j .Lkmc_done -.align 16 -.Lkmc_truncated_dec: - stg $out,64($sp) - la $out,128($sp) - lghi $len,16 - .long 0xb92f0042 # kmc %r4,%r2 - lg $out,64($sp) - bras %r1,2f - mvc 0(1,$out),128($sp) -2: ex $key,0(%r1) - j .Lkmc_done -.align 16 -.Lcbc_software: -___ -$code.=<<___; - stmg $key,$ra,40($sp) - lhi %r0,0 - cl %r0,164($sp) - je .Lcbc_decrypt - - larl $tbl,AES_Te - - llgf $s0,0($ivp) - llgf $s1,4($ivp) - llgf $s2,8($ivp) - llgf $s3,12($ivp) - - lghi $t0,16 - slgr $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow -.Lcbc_enc_loop: - stmg $inp,$out,16($sp) - x $s0,0($inp) - x $s1,4($inp) - x $s2,8($inp) - x $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_encrypt - - lmg $inp,$key,16($sp) - st $s0,0($out) - st $s1,4($out) - st $s2,8($out) - st $s3,12($out) - - la $inp,16($inp) - la $out,16($out) - lghi $t0,16 - ltgr $len,$len - jz .Lcbc_enc_done - slgr $len,$t0 - brc 4,.Lcbc_enc_tail # if borrow - j .Lcbc_enc_loop -.align 16 -.Lcbc_enc_done: - lg $ivp,48($sp) - st $s0,0($ivp) - st $s1,4($ivp) - st $s2,8($ivp) - st $s3,12($ivp) - - lmg %r7,$ra,56($sp) - br $ra - -.align 16 -.Lcbc_enc_tail: - aghi $len,15 - lghi $t0,0 - stg $t0,128($sp) - stg $t0,136($sp) - bras $t1,3f - mvc 128(1,$sp),0($inp) -3: ex $len,0($t1) - lghi $len,0 - la $inp,128($sp) - j .Lcbc_enc_loop - -.align 16 -.Lcbc_decrypt: - larl $tbl,AES_Td - - lg $t0,0($ivp) - lg $t1,8($ivp) - stmg $t0,$t1,128($sp) - -.Lcbc_dec_loop: - stmg $inp,$out,16($sp) - llgf $s0,0($inp) - llgf $s1,4($inp) - llgf $s2,8($inp) - llgf $s3,12($inp) - lgr %r4,$key - - bras $ra,_s390x_AES_decrypt - - lmg $inp,$key,16($sp) - sllg $s0,$s0,32 - sllg $s2,$s2,32 - lr $s0,$s1 - lr $s2,$s3 - - lg $t0,0($inp) - lg $t1,8($inp) - xg $s0,128($sp) - xg $s2,136($sp) - lghi $s1,16 - slgr $len,$s1 - brc 4,.Lcbc_dec_tail # if borrow - brc 2,.Lcbc_dec_done # if zero - stg $s0,0($out) - stg $s2,8($out) - stmg $t0,$t1,128($sp) - - la $inp,16($inp) - la $out,16($out) - j .Lcbc_dec_loop - -.Lcbc_dec_done: - stg $s0,0($out) - stg $s2,8($out) -.Lcbc_dec_exit: - lmg $ivp,$ra,48($sp) - stmg $t0,$t1,0($ivp) - - br $ra - -.align 16 -.Lcbc_dec_tail: - aghi $len,15 - stg $s0,128($sp) - stg $s2,136($sp) - bras $s1,4f - mvc 0(1,$out),128($sp) -4: ex $len,0($s1) - j .Lcbc_dec_exit -.size AES_cbc_encrypt,.-AES_cbc_encrypt -___ -} -$code.=<<___; -.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" -___ - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; diff --git a/crypto/aes/asm/aes-sparcv9.pl b/crypto/aes/asm/aes-sparcv9.pl deleted file mode 100755 index c57b3a2..0000000 --- a/crypto/aes/asm/aes-sparcv9.pl +++ /dev/null @@ -1,1181 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# Version 1.1 -# -# The major reason for undertaken effort was to mitigate the hazard of -# cache-timing attack. This is [currently and initially!] addressed in -# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. -# 2. References to them are scheduled for L2 cache latency, meaning -# that the tables don't have to reside in L1 cache. Once again, this -# is an initial draft and one should expect more countermeasures to -# be implemented... -# -# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last -# round. -# -# Even though performance was not the primary goal [on the contrary, -# extra shifts "induced" by compressed S-box and longer loop epilogue -# "induced" by scheduling for L2 have negative effect on performance], -# the code turned out to run in ~23 cycles per processed byte en-/ -# decrypted with 128-bit key. This is pretty good result for code -# with mentioned qualities and UltraSPARC core. Compared to Sun C -# generated code my encrypt procedure runs just few percents faster, -# while decrypt one - whole 50% faster [yes, Sun C failed to generate -# optimal decrypt procedure]. Compared to GNU C generated code both -# procedures are more than 60% faster:-) - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } -$locals=16; - -$acc0="%l0"; -$acc1="%o0"; -$acc2="%o1"; -$acc3="%o2"; - -$acc4="%l1"; -$acc5="%o3"; -$acc6="%o4"; -$acc7="%o5"; - -$acc8="%l2"; -$acc9="%o7"; -$acc10="%g1"; -$acc11="%g2"; - -$acc12="%l3"; -$acc13="%g3"; -$acc14="%g4"; -$acc15="%g5"; - -$t0="%l4"; -$t1="%l5"; -$t2="%l6"; -$t3="%l7"; - -$s0="%i0"; -$s1="%i1"; -$s2="%i2"; -$s3="%i3"; -$tbl="%i4"; -$key="%i5"; -$rounds="%i7"; # aliases with return address, which is off-loaded to stack - -sub _data_word() -{ my $i; - while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } -} - -$code.=<<___ if ($bits==64); -.register %g2,#scratch -.register %g3,#scratch -___ -$code.=<<___; -.section ".text",#alloc,#execinstr - -.align 256 -AES_Te: -___ -&_data_word( - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); -$code.=<<___; - .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -.type AES_Te,#object -.size AES_Te,(.-AES_Te) - -.align 64 -.skip 16 -_sparcv9_AES_encrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+12],$t3 - srl $s0,21,$acc0 - xor $t1,$s1,$s1 - ld [$key+16],$t0 - srl $s1,13,$acc1 ! - xor $t2,$s2,$s2 - ld [$key+20],$t1 - xor $t3,$s3,$s3 - ld [$key+24],$t2 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - nop -.Lenc_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s0,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s3,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Lenc_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - srl $t1,13,$acc1 - xor $acc15,$t3,$t3 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t3,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t2,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t0,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t3,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t1,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t0,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t2,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch te4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch te4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch te4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch te4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch te4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch te4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch te4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch te4 - srl $s1,13,$acc1 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Lenc_loop - and $acc0,2040,$acc0 - -.align 32 -.Lenc_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - srl $t1,16,$acc1 ! - xor $acc15,$t3,$t3 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t2,16,$acc5 ! - and $t3,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t3,16,$acc9 - and $t0,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t0,16,$acc13 - and $t1,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t2,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_encrypt,#function -.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) - -.align 32 -.globl AES_encrypt -AES_encrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_enc - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_enc: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Te-1b,%o4 - call _sparcv9_AES_encrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_encrypt,#function -.size AES_encrypt,(.-AES_encrypt) - -___ - -$code.=<<___; -.align 256 -AES_Td: -___ -&_data_word( - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); -$code.=<<___; - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -.type AES_Td,#object -.size AES_Td,(.-AES_Td) - -.align 64 -.skip 16 -_sparcv9_AES_decrypt: - save %sp,-$frame-$locals,%sp - stx %i7,[%sp+$bias+$frame+0] ! off-load return address - ld [$key+240],$rounds - ld [$key+0],$t0 - ld [$key+4],$t1 ! - ld [$key+8],$t2 - ld [$key+12],$t3 - srl $rounds,1,$rounds - xor $t0,$s0,$s0 - ld [$key+16],$t0 - xor $t1,$s1,$s1 - ld [$key+20],$t1 - srl $s0,21,$acc0 ! - xor $t2,$s2,$s2 - ld [$key+24],$t2 - xor $t3,$s3,$s3 - and $acc0,2040,$acc0 - ld [$key+28],$t3 - srl $s3,13,$acc1 - nop -.Ldec_loop: - srl $s2,5,$acc2 ! - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $s1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - srl $s1,21,$acc4 - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 ! - srl $s0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $s3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 - fmovs %f0,%f0 - sll $s2,3,$acc7 ! - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $s2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - srl $s1,13,$acc9 - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 ! - srl $s0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $s3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 - fmovs %f0,%f0 - srl $s3,21,$acc12 ! - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $s2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - srl $s1,5,$acc14 - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 ! - sll $s0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - and $acc15,2040,$acc15 - add $key,32,$key - ldx [$tbl+$acc14],$acc14 - fmovs %f0,%f0 - subcc $rounds,1,$rounds ! - ldx [$tbl+$acc15],$acc15 - bz,a,pn %icc,.Ldec_last - add $tbl,2048,$rounds - - srlx $acc1,8,$acc1 - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - fmovs %f0,%f0 - srlx $acc2,16,$acc2 ! - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 ! - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - fmovs %f0,%f0 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 ! - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 ! - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,21,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 - srl $t3,13,$acc1 - - and $acc0,2040,$acc0 ! - srl $t2,5,$acc2 - and $acc1,2040,$acc1 - ldx [$tbl+$acc0],$acc0 - sll $t1,3,$acc3 - and $acc2,2040,$acc2 - ldx [$tbl+$acc1],$acc1 - fmovs %f0,%f0 - srl $t1,21,$acc4 ! - and $acc3,2040,$acc3 - ldx [$tbl+$acc2],$acc2 - srl $t0,13,$acc5 - and $acc4,2040,$acc4 - ldx [$tbl+$acc3],$acc3 - srl $t3,5,$acc6 - and $acc5,2040,$acc5 - ldx [$tbl+$acc4],$acc4 ! - sll $t2,3,$acc7 - and $acc6,2040,$acc6 - ldx [$tbl+$acc5],$acc5 - srl $t2,21,$acc8 - and $acc7,2040,$acc7 - ldx [$tbl+$acc6],$acc6 - fmovs %f0,%f0 - srl $t1,13,$acc9 ! - and $acc8,2040,$acc8 - ldx [$tbl+$acc7],$acc7 - srl $t0,5,$acc10 - and $acc9,2040,$acc9 - ldx [$tbl+$acc8],$acc8 - sll $t3,3,$acc11 - and $acc10,2040,$acc10 - ldx [$tbl+$acc9],$acc9 ! - srl $t3,21,$acc12 - and $acc11,2040,$acc11 - ldx [$tbl+$acc10],$acc10 - srl $t2,13,$acc13 - and $acc12,2040,$acc12 - ldx [$tbl+$acc11],$acc11 - fmovs %f0,%f0 - srl $t1,5,$acc14 ! - and $acc13,2040,$acc13 - ldx [$tbl+$acc12],$acc12 - sll $t0,3,$acc15 - and $acc14,2040,$acc14 - ldx [$tbl+$acc13],$acc13 - srlx $acc1,8,$acc1 - and $acc15,2040,$acc15 - ldx [$tbl+$acc14],$acc14 ! - - srlx $acc2,16,$acc2 - xor $acc0,$s0,$s0 - ldx [$tbl+$acc15],$acc15 - srlx $acc3,24,$acc3 - xor $acc1,$s0,$s0 - ld [$key+16],$t0 - fmovs %f0,%f0 - srlx $acc5,8,$acc5 ! - xor $acc2,$s0,$s0 - ld [$key+20],$t1 - srlx $acc6,16,$acc6 - xor $acc3,$s0,$s0 - ld [$key+24],$t2 - srlx $acc7,24,$acc7 - xor $acc4,$s1,$s1 - ld [$key+28],$t3 ! - srlx $acc9,8,$acc9 - xor $acc5,$s1,$s1 - ldx [$tbl+2048+0],%g0 ! prefetch td4 - srlx $acc10,16,$acc10 - xor $acc6,$s1,$s1 - ldx [$tbl+2048+32],%g0 ! prefetch td4 - srlx $acc11,24,$acc11 - xor $acc7,$s1,$s1 - ldx [$tbl+2048+64],%g0 ! prefetch td4 - srlx $acc13,8,$acc13 - xor $acc8,$s2,$s2 - ldx [$tbl+2048+96],%g0 ! prefetch td4 - srlx $acc14,16,$acc14 ! - xor $acc9,$s2,$s2 - ldx [$tbl+2048+128],%g0 ! prefetch td4 - srlx $acc15,24,$acc15 - xor $acc10,$s2,$s2 - ldx [$tbl+2048+160],%g0 ! prefetch td4 - srl $s0,21,$acc0 - xor $acc11,$s2,$s2 - ldx [$tbl+2048+192],%g0 ! prefetch td4 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - ldx [$tbl+2048+224],%g0 ! prefetch td4 - and $acc0,2040,$acc0 ! - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - ba .Ldec_loop - srl $s3,13,$acc1 - -.align 32 -.Ldec_last: - srlx $acc1,8,$acc1 ! - xor $acc0,$t0,$t0 - ld [$key+0],$s0 - srlx $acc2,16,$acc2 - xor $acc1,$t0,$t0 - ld [$key+4],$s1 - srlx $acc3,24,$acc3 - xor $acc2,$t0,$t0 - ld [$key+8],$s2 ! - srlx $acc5,8,$acc5 - xor $acc3,$t0,$t0 - ld [$key+12],$s3 - srlx $acc6,16,$acc6 - xor $acc4,$t1,$t1 - srlx $acc7,24,$acc7 - xor $acc5,$t1,$t1 - srlx $acc9,8,$acc9 ! - xor $acc6,$t1,$t1 - srlx $acc10,16,$acc10 - xor $acc7,$t1,$t1 - srlx $acc11,24,$acc11 - xor $acc8,$t2,$t2 - srlx $acc13,8,$acc13 - xor $acc9,$t2,$t2 - srlx $acc14,16,$acc14 ! - xor $acc10,$t2,$t2 - srlx $acc15,24,$acc15 - xor $acc11,$t2,$t2 - xor $acc12,$acc14,$acc14 - xor $acc13,$t3,$t3 - srl $t0,24,$acc0 - xor $acc14,$t3,$t3 - xor $acc15,$t3,$t3 ! - srl $t3,16,$acc1 - - srl $t2,8,$acc2 - and $acc1,255,$acc1 - ldub [$rounds+$acc0],$acc0 - srl $t1,24,$acc4 - and $acc2,255,$acc2 - ldub [$rounds+$acc1],$acc1 - srl $t0,16,$acc5 ! - and $t1,255,$acc3 - ldub [$rounds+$acc2],$acc2 - ldub [$rounds+$acc3],$acc3 - srl $t3,8,$acc6 - and $acc5,255,$acc5 - ldub [$rounds+$acc4],$acc4 - fmovs %f0,%f0 - srl $t2,24,$acc8 ! - and $acc6,255,$acc6 - ldub [$rounds+$acc5],$acc5 - srl $t1,16,$acc9 - and $t2,255,$acc7 - ldub [$rounds+$acc6],$acc6 - ldub [$rounds+$acc7],$acc7 - fmovs %f0,%f0 - srl $t0,8,$acc10 ! - and $acc9,255,$acc9 - ldub [$rounds+$acc8],$acc8 - srl $t3,24,$acc12 - and $acc10,255,$acc10 - ldub [$rounds+$acc9],$acc9 - srl $t2,16,$acc13 - and $t3,255,$acc11 - ldub [$rounds+$acc10],$acc10 ! - srl $t1,8,$acc14 - and $acc13,255,$acc13 - ldub [$rounds+$acc11],$acc11 - ldub [$rounds+$acc12],$acc12 - and $acc14,255,$acc14 - ldub [$rounds+$acc13],$acc13 - and $t0,255,$acc15 - ldub [$rounds+$acc14],$acc14 ! - - sll $acc0,24,$acc0 - xor $acc3,$s0,$s0 - ldub [$rounds+$acc15],$acc15 - sll $acc1,16,$acc1 - xor $acc0,$s0,$s0 - ldx [%sp+$bias+$frame+0],%i7 ! restore return address - fmovs %f0,%f0 - sll $acc2,8,$acc2 ! - xor $acc1,$s0,$s0 - sll $acc4,24,$acc4 - xor $acc2,$s0,$s0 - sll $acc5,16,$acc5 - xor $acc7,$s1,$s1 - sll $acc6,8,$acc6 - xor $acc4,$s1,$s1 - sll $acc8,24,$acc8 ! - xor $acc5,$s1,$s1 - sll $acc9,16,$acc9 - xor $acc11,$s2,$s2 - sll $acc10,8,$acc10 - xor $acc6,$s1,$s1 - sll $acc12,24,$acc12 - xor $acc8,$s2,$s2 - sll $acc13,16,$acc13 ! - xor $acc9,$s2,$s2 - sll $acc14,8,$acc14 - xor $acc10,$s2,$s2 - xor $acc12,$acc14,$acc14 - xor $acc13,$s3,$s3 - xor $acc14,$s3,$s3 - xor $acc15,$s3,$s3 - - ret - restore -.type _sparcv9_AES_decrypt,#function -.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) - -.align 32 -.globl AES_decrypt -AES_decrypt: - or %o0,%o1,%g1 - andcc %g1,3,%g0 - bnz,pn %xcc,.Lunaligned_dec - save %sp,-$frame,%sp - - ld [%i0+0],%o0 - ld [%i0+4],%o1 - ld [%i0+8],%o2 - ld [%i0+12],%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - st %o0,[%i1+0] - st %o1,[%i1+4] - st %o2,[%i1+8] - st %o3,[%i1+12] - - ret - restore - -.align 32 -.Lunaligned_dec: - ldub [%i0+0],%l0 - ldub [%i0+1],%l1 - ldub [%i0+2],%l2 - - sll %l0,24,%l0 - ldub [%i0+3],%l3 - sll %l1,16,%l1 - ldub [%i0+4],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+5],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+6],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o0 - ldub [%i0+7],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - ldub [%i0+8],%l0 - or %l7,%l6,%l6 - ldub [%i0+9],%l1 - or %l4,%l6,%o1 - ldub [%i0+10],%l2 - - sll %l0,24,%l0 - ldub [%i0+11],%l3 - sll %l1,16,%l1 - ldub [%i0+12],%l4 - sll %l2,8,%l2 - or %l1,%l0,%l0 - ldub [%i0+13],%l5 - sll %l4,24,%l4 - or %l3,%l2,%l2 - ldub [%i0+14],%l6 - sll %l5,16,%l5 - or %l0,%l2,%o2 - ldub [%i0+15],%l7 - - sll %l6,8,%l6 - or %l5,%l4,%l4 - or %l7,%l6,%l6 - or %l4,%l6,%o3 - -1: call .+8 - add %o7,AES_Td-1b,%o4 - call _sparcv9_AES_decrypt - mov %i2,%o5 - - srl %o0,24,%l0 - srl %o0,16,%l1 - stb %l0,[%i1+0] - srl %o0,8,%l2 - stb %l1,[%i1+1] - stb %l2,[%i1+2] - srl %o1,24,%l4 - stb %o0,[%i1+3] - - srl %o1,16,%l5 - stb %l4,[%i1+4] - srl %o1,8,%l6 - stb %l5,[%i1+5] - stb %l6,[%i1+6] - srl %o2,24,%l0 - stb %o1,[%i1+7] - - srl %o2,16,%l1 - stb %l0,[%i1+8] - srl %o2,8,%l2 - stb %l1,[%i1+9] - stb %l2,[%i1+10] - srl %o3,24,%l4 - stb %o2,[%i1+11] - - srl %o3,16,%l5 - stb %l4,[%i1+12] - srl %o3,8,%l6 - stb %l5,[%i1+13] - stb %l6,[%i1+14] - stb %o3,[%i1+15] - - ret - restore -.type AES_decrypt,#function -.size AES_decrypt,(.-AES_decrypt) -___ - -# fmovs instructions substituting for FP nops were originally added -# to meet specific instruction alignment requirements to maximize ILP. -# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have -# undesired effect, so just omit them and sacrifice some portion of -# percent in performance... -$code =~ s/fmovs.*$//gem; - -print $code; diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl index f616f17..b008ab5 100755 --- a/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/aes/asm/aes-x86_64.pl @@ -1181,12 +1181,12 @@ AES_cbc_encrypt: .Lcbc_cleanup: cmpl \$0,$mark # was the key schedule copied? lea $aes_key,%rdi - mov $_rsp,%rsp je .Lcbc_exit mov \$240/8,%ecx xor %rax,%rax .long 0x90AB48F3 # rep stosq .Lcbc_exit: + mov $_rsp,%rsp popfq pop %r15 pop %r14 |