diff options
Diffstat (limited to 'src/crypto/cpu-intel.c')
-rw-r--r-- | src/crypto/cpu-intel.c | 157 |
1 files changed, 12 insertions, 145 deletions
diff --git a/src/crypto/cpu-intel.c b/src/crypto/cpu-intel.c index 924bab0..df0e127 100644 --- a/src/crypto/cpu-intel.c +++ b/src/crypto/cpu-intel.c @@ -68,58 +68,8 @@ #include <stdio.h> #include <string.h> -#if defined(OPENSSL_WINDOWS) -#pragma warning(push, 3) -#include <immintrin.h> -#include <intrin.h> -#pragma warning(pop) -#endif - - -/* OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX - * is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through - * |*out_edx|. */ -static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx, - uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) { -#if defined(OPENSSL_WINDOWS) - int tmp[4]; - __cpuid(tmp, (int)leaf); - *out_eax = (uint32_t)tmp[0]; - *out_ebx = (uint32_t)tmp[1]; - *out_ecx = (uint32_t)tmp[2]; - *out_edx = (uint32_t)tmp[3]; -#elif defined(__pic__) && defined(OPENSSL_32_BIT) - /* Inline assembly may not clobber the PIC register. For 32-bit, this is EBX. - * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602. */ - __asm__ volatile ( - "xor %%ecx, %%ecx\n" - "mov %%ebx, %%edi\n" - "cpuid\n" - "xchg %%edi, %%ebx\n" - : "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx) - : "a"(leaf) - ); -#else - __asm__ volatile ( - "xor %%ecx, %%ecx\n" - "cpuid\n" - : "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx) - : "a"(leaf) - ); -#endif -} - -/* OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR). - * Currently only XCR0 is defined by Intel so |xcr| should always be zero. */ -static uint64_t OPENSSL_xgetbv(uint32_t xcr) { -#if defined(OPENSSL_WINDOWS) - return (uint64_t)_xgetbv(xcr); -#else - uint32_t eax, edx; - __asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); - return (((uint64_t)edx) << 32) | eax; -#endif -} +/* OPENSSL_ia32_cpuid is defined in cpu-x86_64-asm.pl. */ +extern uint64_t OPENSSL_ia32_cpuid(uint32_t*); /* handle_cpu_env applies the value from |in| to the CPUID values in |out[0]| * and |out[1]|. See the comment in |OPENSSL_cpuid_setup| about this. */ @@ -141,101 +91,18 @@ static void handle_cpu_env(uint32_t *out, const char *in) { } void OPENSSL_cpuid_setup(void) { - /* Determine the vendor and maximum input value. */ - uint32_t eax, ebx, ecx, edx; - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0); - - uint32_t num_ids = eax; - - int is_intel = ebx == 0x756e6547 /* Genu */ && - edx == 0x49656e69 /* ineI */ && - ecx == 0x6c65746e /* ntel */; - int is_amd = ebx == 0x68747541 /* Auth */ && - edx == 0x69746e65 /* enti */ && - ecx == 0x444d4163 /* cAMD */; - - int has_amd_xop = 0; - if (is_amd) { - /* AMD-specific logic. - * See http://developer.amd.com/wordpress/media/2012/10/254811.pdf */ - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000); - uint32_t num_extended_ids = eax; - if (num_extended_ids >= 0x80000001) { - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001); - if (ecx & (1 << 11)) { - has_amd_xop = 1; - } - } - } - - uint32_t extended_features = 0; - if (num_ids >= 7) { - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7); - extended_features = ebx; - } - - /* Determine the number of cores sharing an L1 data cache to adjust the - * hyper-threading bit. */ - uint32_t cores_per_cache = 0; - if (is_amd) { - /* AMD CPUs never share an L1 data cache between threads but do set the HTT - * bit on multi-core CPUs. */ - cores_per_cache = 1; - } else if (num_ids >= 4) { - /* TODO(davidben): The Intel manual says this CPUID leaf enumerates all - * caches using ECX and doesn't say which is first. Does this matter? */ - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4); - cores_per_cache = 1 + ((eax >> 14) & 0xfff); - } - - OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1); - - /* Adjust the hyper-threading bit. */ - if (edx & (1 << 28)) { - uint32_t num_logical_cores = (ebx >> 16) & 0xff; - if (cores_per_cache == 1 || num_logical_cores <= 1) { - edx &= ~(1 << 28); - } - } - - /* Reserved bit #20 was historically repurposed to control the in-memory - * representation of RC4 state. Always set it to zero. */ - edx &= ~(1 << 20); - - /* Reserved bit #30 is repurposed to signal an Intel CPU. */ - if (is_intel) { - edx |= (1 << 30); - } else { - edx &= ~(1 << 30); - } - - /* The SDBG bit is repurposed to denote AMD XOP support. */ - if (has_amd_xop) { - ecx |= (1 << 11); - } else { - ecx &= ~(1 << 11); - } - - uint64_t xcr0 = 0; - if (ecx & (1 << 27)) { - /* XCR0 may only be queried if the OSXSAVE bit is set. */ - xcr0 = OPENSSL_xgetbv(0); - } - /* See Intel manual, section 14.3. */ - if ((xcr0 & 6) != 6) { - /* YMM registers cannot be used. */ - ecx &= ~(1 << 28); /* AVX */ - ecx &= ~(1 << 12); /* FMA */ - ecx &= ~(1 << 11); /* AMD XOP */ - extended_features &= ~(1 << 5); /* AVX2 */ - } + const char *env1, *env2; - OPENSSL_ia32cap_P[0] = edx; - OPENSSL_ia32cap_P[1] = ecx; - OPENSSL_ia32cap_P[2] = extended_features; - OPENSSL_ia32cap_P[3] = 0; +#if defined(OPENSSL_X86_64) + OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); +#else + uint64_t vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); + /* 1<<10 sets a reserved bit to indicate that the variable + * was already initialised. */ + OPENSSL_ia32cap_P[0] = ((uint32_t)vec) | (1 << 10); + OPENSSL_ia32cap_P[1] = vec >> 32; +#endif - const char *env1, *env2; env1 = getenv("OPENSSL_ia32cap"); if (env1 == NULL) { return; |