diff options
Diffstat (limited to 'src/crypto/cpu-x86-asm.pl')
-rw-r--r-- | src/crypto/cpu-x86-asm.pl | 334 |
1 files changed, 0 insertions, 334 deletions
diff --git a/src/crypto/cpu-x86-asm.pl b/src/crypto/cpu-x86-asm.pl deleted file mode 100644 index 319c436..0000000 --- a/src/crypto/cpu-x86-asm.pl +++ /dev/null @@ -1,334 +0,0 @@ -#!/usr/bin/env perl - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -push(@INC, "${dir}perlasm", "perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],"crypto/cpu-x86-asm"); - -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&function_begin("OPENSSL_ia32_cpuid"); - &xor ("edx","edx"); - &pushf (); - &pop ("eax"); - &mov ("ecx","eax"); - &xor ("eax",1<<21); - &push ("eax"); - &popf (); - &pushf (); - &pop ("eax"); - &xor ("ecx","eax"); - &xor ("eax","eax"); - &bt ("ecx",21); - &jnc (&label("nocpuid")); - &mov ("esi",&wparam(0)); - &mov (&DWP(8,"esi"),"eax"); # clear 3rd word - &cpuid (); - &mov ("edi","eax"); # max value for standard query level - - &xor ("eax","eax"); - &cmp ("ebx",0x756e6547); # "Genu" - &setne (&LB("eax")); - &mov ("ebp","eax"); - &cmp ("edx",0x49656e69); # "ineI" - &setne (&LB("eax")); - &or ("ebp","eax"); - &cmp ("ecx",0x6c65746e); # "ntel" - &setne (&LB("eax")); - &or ("ebp","eax"); # 0 indicates Intel CPU - &jz (&label("intel")); - - &cmp ("ebx",0x68747541); # "Auth" - &setne (&LB("eax")); - &mov ("esi","eax"); - &cmp ("edx",0x69746E65); # "enti" - &setne (&LB("eax")); - &or ("esi","eax"); - &cmp ("ecx",0x444D4163); # "cAMD" - &setne (&LB("eax")); - &or ("esi","eax"); # 0 indicates AMD CPU - &jnz (&label("intel")); - - # AMD specific - &mov ("eax",0x80000000); - &cpuid (); - &cmp ("eax",0x80000001); - &jb (&label("intel")); - &mov ("esi","eax"); - &mov ("eax",0x80000001); - &cpuid (); - &or ("ebp","ecx"); - &and ("ebp",1<<11|1); # isolate XOP bit - &cmp ("esi",0x80000008); - &jb (&label("intel")); - - &mov ("eax",0x80000008); - &cpuid (); - &movz ("esi",&LB("ecx")); # number of cores - 1 - &inc ("esi"); # number of cores - - &mov ("eax",1); - &xor ("ecx","ecx"); - &cpuid (); - &bt ("edx",28); - &jnc (&label("generic")); - &shr ("ebx",16); - &and ("ebx",0xff); - &cmp ("ebx","esi"); - &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit - &jmp (&label("generic")); - -&set_label("intel"); - &cmp ("edi",7); - &jb (&label("cacheinfo")); - - &mov ("esi",&wparam(0)); - &mov ("eax",7); - &xor ("ecx","ecx"); - &cpuid (); - &mov (&DWP(8,"esi"),"ebx"); - -&set_label("cacheinfo"); - &cmp ("edi",4); - &mov ("edi",-1); - &jb (&label("nocacheinfo")); - - &mov ("eax",4); - &mov ("ecx",0); # query L1D - &cpuid (); - &mov ("edi","eax"); - &shr ("edi",14); - &and ("edi",0xfff); # number of cores -1 per L1D - -&set_label("nocacheinfo"); - &mov ("eax",1); - &xor ("ecx","ecx"); - &cpuid (); - &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 - &cmp ("ebp",0); - &jne (&label("notintel")); - &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs -&set_label("notintel"); - &bt ("edx",28); # test hyper-threading bit - &jnc (&label("generic")); - &and ("edx",0xefffffff); - &cmp ("edi",0); - &je (&label("generic")); - - &or ("edx",0x10000000); - &shr ("ebx",16); - &cmp (&LB("ebx"),1); - &ja (&label("generic")); - &and ("edx",0xefffffff); # clear hyper-threading bit if not - -&set_label("generic"); - &and ("ebp",1<<11); # isolate AMD XOP flag - &and ("ecx",0xfffff7ff); # force 11th bit to 0 - &mov ("esi","edx"); - &or ("ebp","ecx"); # merge AMD XOP flag - - &bt ("ecx",27); # check OSXSAVE bit - &jnc (&label("clear_avx")); - &xor ("ecx","ecx"); - &data_byte(0x0f,0x01,0xd0); # xgetbv - &and ("eax",6); - &cmp ("eax",6); - &je (&label("done")); - &cmp ("eax",2); - &je (&label("clear_avx")); -&set_label("clear_xmm"); - &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits - &and ("esi",0xfeffffff); # clear FXSR -&set_label("clear_avx"); - &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits - &mov ("edi",&wparam(0)); - &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 -&set_label("done"); - &mov ("eax","esi"); - &mov ("edx","ebp"); -&set_label("nocpuid"); -&function_end("OPENSSL_ia32_cpuid"); - -&external_label("OPENSSL_ia32cap_P"); - -&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"ecx"),4); - &jnc (&label("notsc")); - &rdtsc (); -&set_label("notsc"); - &ret (); -&function_end_B("OPENSSL_rdtsc"); - -# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], -# but it's safe to call it on any [supported] 32-bit platform... -# Just check for [non-]zero return value... -&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &bt (&DWP(0,"ecx"),4); - &jnc (&label("nohalt")); # no TSC - - &data_word(0x9058900e); # push %cs; pop %eax - &and ("eax",3); - &jnz (&label("nohalt")); # not enough privileges - - &pushf (); - &pop ("eax"); - &bt ("eax",9); - &jnc (&label("nohalt")); # interrupts are disabled - - &rdtsc (); - &push ("edx"); - &push ("eax"); - &halt (); - &rdtsc (); - - &sub ("eax",&DWP(0,"esp")); - &sbb ("edx",&DWP(4,"esp")); - &add ("esp",8); - &ret (); - -&set_label("nohalt"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &ret (); -&function_end_B("OPENSSL_instrument_halt"); - -# Essentially there is only one use for this function. Under DJGPP: -# -# #include <go32.h> -# ... -# i=OPENSSL_far_spin(_dos_ds,0x46c); -# ... -# to obtain the number of spins till closest timer interrupt. - -&function_begin_B("OPENSSL_far_spin"); - &pushf (); - &pop ("eax"); - &bt ("eax",9); - &jnc (&label("nospin")); # interrupts are disabled - - &mov ("eax",&DWP(4,"esp")); - &mov ("ecx",&DWP(8,"esp")); - &data_word (0x90d88e1e); # push %ds, mov %eax,%ds - &xor ("eax","eax"); - &mov ("edx",&DWP(0,"ecx")); - &jmp (&label("spin")); - - &align (16); -&set_label("spin"); - &inc ("eax"); - &cmp ("edx",&DWP(0,"ecx")); - &je (&label("spin")); - - &data_word (0x1f909090); # pop %ds - &ret (); - -&set_label("nospin"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &ret (); -&function_end_B("OPENSSL_far_spin"); - -&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); - &xor ("eax","eax"); - &xor ("edx","edx"); - &picmeup("ecx","OPENSSL_ia32cap_P"); - &mov ("ecx",&DWP(0,"ecx")); - &bt (&DWP(0,"ecx"),1); - &jnc (&label("no_x87")); - if ($sse2) { - &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits - &cmp ("ecx",1<<26|1<<24); - &jne (&label("no_sse2")); - &pxor ("xmm0","xmm0"); - &pxor ("xmm1","xmm1"); - &pxor ("xmm2","xmm2"); - &pxor ("xmm3","xmm3"); - &pxor ("xmm4","xmm4"); - &pxor ("xmm5","xmm5"); - &pxor ("xmm6","xmm6"); - &pxor ("xmm7","xmm7"); - &set_label("no_sse2"); - } - # just a bunch of fldz to zap the fp/mm bank followed by finit... - &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); -&set_label("no_x87"); - &lea ("eax",&DWP(4,"esp")); - &ret (); -&function_end_B("OPENSSL_wipe_cpu"); - -&function_begin_B("OPENSSL_atomic_add"); - &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg - &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg - &push ("ebx"); - &nop (); - &mov ("eax",&DWP(0,"edx")); -&set_label("spin"); - &lea ("ebx",&DWP(0,"eax","ecx")); - &nop (); - &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded - &jne (&label("spin")); - &mov ("eax","ebx"); # OpenSSL expects the new value - &pop ("ebx"); - &ret (); -&function_end_B("OPENSSL_atomic_add"); - -# This function can become handy under Win32 in situations when -# we don't know which calling convention, __stdcall or __cdecl(*), -# indirect callee is using. In C it can be deployed as -# -#ifdef OPENSSL_CPUID_OBJ -# type OPENSSL_indirect_call(void *f,...); -# ... -# OPENSSL_indirect_call(func,[up to $max arguments]); -#endif -# -# (*) it's designed to work even for __fastcall if number of -# arguments is 1 or 2! -&function_begin_B("OPENSSL_indirect_call"); - { - my ($max,$i)=(7,); # $max has to be chosen as 4*n-1 - # in order to preserve eventual - # stack alignment - &push ("ebp"); - &mov ("ebp","esp"); - &sub ("esp",$max*4); - &mov ("ecx",&DWP(12,"ebp")); - &mov (&DWP(0,"esp"),"ecx"); - &mov ("edx",&DWP(16,"ebp")); - &mov (&DWP(4,"esp"),"edx"); - for($i=2;$i<$max;$i++) - { - # Some copies will be redundant/bogus... - &mov ("eax",&DWP(12+$i*4,"ebp")); - &mov (&DWP(0+$i*4,"esp"),"eax"); - } - &call_ptr (&DWP(8,"ebp"));# make the call... - &mov ("esp","ebp"); # ... and just restore the stack pointer - # without paying attention to what we called, - # (__cdecl *func) or (__stdcall *one). - &pop ("ebp"); - &ret (); - } -&function_end_B("OPENSSL_indirect_call"); - -&function_begin_B("OPENSSL_ia32_rdrand"); - &mov ("ecx",8); -&set_label("loop"); - &rdrand ("eax"); - &jc (&label("break")); - &loop (&label("loop")); -&set_label("break"); - &cmp ("eax",0); - &cmove ("eax","ecx"); - &ret (); -&function_end_B("OPENSSL_ia32_rdrand"); - -&hidden("OPENSSL_ia32cap_P"); - -&asm_finish(); |