diff options
Diffstat (limited to 'libm/i387/fenv.c')
-rw-r--r-- | libm/i387/fenv.c | 486 |
1 files changed, 244 insertions, 242 deletions
diff --git a/libm/i387/fenv.c b/libm/i387/fenv.c index 89ddc55..f64f8dc 100644 --- a/libm/i387/fenv.c +++ b/libm/i387/fenv.c @@ -31,29 +31,31 @@ #include "npx.h" #include "fenv.h" +#define ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO) + /* * As compared to the x87 control word, the SSE unit's control word * has the rounding control bits offset by 3 and the exception mask * bits offset by 7. */ -#define _SSE_ROUND_SHIFT 3 -#define _SSE_EMASK_SHIFT 7 +#define _SSE_ROUND_SHIFT 3 +#define _SSE_EMASK_SHIFT 7 const fenv_t __fe_dfl_env = { - __INITIAL_NPXCW__, /*__control*/ - 0x0000, /*__mxcsr_hi*/ - 0x0000, /*__status*/ - 0x1f80, /*__mxcsr_lo*/ - 0xffffffff, /*__tag*/ - { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ + __INITIAL_NPXCW__, /*__control*/ + 0x0000, /*__mxcsr_hi*/ + 0x0000, /*__status*/ + 0x1f80, /*__mxcsr_lo*/ + 0xffffffff, /*__tag*/ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ }; #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) -#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ - : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ - "st(5)", "st(6)", "st(7)") +#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ + : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ + "st(5)", "st(6)", "st(7)") #define __fnclex() __asm __volatile("fnclex") #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) @@ -68,22 +70,22 @@ enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; #define __HAS_SSE() 1 #else #define __HAS_SSE() (__has_sse == __SSE_YES || \ - (__has_sse == __SSE_UNK && __test_sse())) + (__has_sse == __SSE_UNK && __test_sse())) #endif enum __sse_support __has_sse = #ifdef __SSE__ - __SSE_YES; + __SSE_YES; #else - __SSE_UNK; + __SSE_UNK; #endif #ifndef __SSE__ -#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) -#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) -#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ - "cpuid\n\tpopl %%ebx" \ - : "=d" (*(x)) : : "eax", "ecx") +#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) +#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) +#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ + "cpuid\n\tpopl %%ebx" \ + : "=d" (*(x)) : : "eax", "ecx") /* * Test for SSE support on this processor. We need to do this because @@ -94,298 +96,298 @@ enum __sse_support __has_sse = int __test_sse(void) { - int flag, nflag; - int dx_features; - - /* Am I a 486? */ - getfl(&flag); - nflag = flag ^ 0x200000; - setfl(nflag); - getfl(&nflag); - if (flag != nflag) { - /* Not a 486, so CPUID should work. */ - cpuid_dx(&dx_features); - if (dx_features & 0x2000000) { - __has_sse = __SSE_YES; - return (1); - } - } - __has_sse = __SSE_NO; - return (0); + int flag, nflag; + int dx_features; + + /* Am I a 486? */ + getfl(&flag); + nflag = flag ^ 0x200000; + setfl(nflag); + getfl(&nflag); + if (flag != nflag) { + /* Not a 486, so CPUID should work. */ + cpuid_dx(&dx_features); + if (dx_features & 0x2000000) { + __has_sse = __SSE_YES; + return (1); + } + } + __has_sse = __SSE_NO; + return (0); } #endif /* __SSE__ */ int fesetexceptflag(const fexcept_t *flagp, int excepts) { - fenv_t env; - __uint32_t mxcsr; - - excepts &= FE_ALL_EXCEPT; - if (excepts) { /* Do nothing if excepts is 0 */ - __fnstenv(&env); - env.__status &= ~excepts; - env.__status |= *flagp & excepts; - __fnclex(); - __fldenv(env); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - mxcsr &= ~excepts; - mxcsr |= *flagp & excepts; - __ldmxcsr(mxcsr); - } - } - - return (0); + fenv_t env; + __uint32_t mxcsr; + + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstenv(&env); + env.__status &= ~excepts; + env.__status |= *flagp & excepts; + __fnclex(); + __fldenv(env); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~excepts; + mxcsr |= *flagp & excepts; + __ldmxcsr(mxcsr); + } + } + + return (0); } int feraiseexcept(int excepts) { - fexcept_t ex = excepts; + fexcept_t ex = excepts; - fesetexceptflag(&ex, excepts); - __fwait(); - return (0); + fesetexceptflag(&ex, excepts); + __fwait(); + return (0); } int fegetenv(fenv_t *envp) { - __uint32_t mxcsr; - - __fnstenv(envp); - /* - * fnstenv masks all exceptions, so we need to restore - * the old control word to avoid this side effect. - */ - __fldcw(envp->__control); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - envp->__mxcsr_hi = mxcsr >> 16; - envp->__mxcsr_lo = mxcsr & 0xffff; - } - return (0); + __uint32_t mxcsr; + + __fnstenv(envp); + /* + * fnstenv masks all exceptions, so we need to restore + * the old control word to avoid this side effect. + */ + __fldcw(envp->__control); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + envp->__mxcsr_hi = mxcsr >> 16; + envp->__mxcsr_lo = mxcsr & 0xffff; + } + return (0); } int feholdexcept(fenv_t *envp) { - __uint32_t mxcsr; - fenv_t env; - - __fnstenv(&env); - *envp = env; - env.__status &= ~FE_ALL_EXCEPT; - env.__control |= FE_ALL_EXCEPT; - __fnclex(); - __fldenv(env); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - envp->__mxcsr_hi = mxcsr >> 16; - envp->__mxcsr_lo = mxcsr & 0xffff; - mxcsr &= ~FE_ALL_EXCEPT; - mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; - __ldmxcsr(mxcsr); - } - return (0); + __uint32_t mxcsr; + fenv_t env; + + __fnstenv(&env); + *envp = env; + env.__status &= ~FE_ALL_EXCEPT; + env.__control |= FE_ALL_EXCEPT; + __fnclex(); + __fldenv(env); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + envp->__mxcsr_hi = mxcsr >> 16; + envp->__mxcsr_lo = mxcsr & 0xffff; + mxcsr &= ~FE_ALL_EXCEPT; + mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; + __ldmxcsr(mxcsr); + } + return (0); } int feupdateenv(const fenv_t *envp) { - __uint32_t mxcsr; - __uint16_t status; - - __fnstsw(&status); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - } else { - mxcsr = 0; - } - fesetenv(envp); - feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); - return (0); + __uint32_t mxcsr; + __uint16_t status; + + __fnstsw(&status); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + fesetenv(envp); + feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); + return (0); } int feenableexcept(int mask) { - __uint32_t mxcsr; - __uint16_t control, omask; - - mask &= FE_ALL_EXCEPT; - __fnstcw(&control); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - } else { - mxcsr = 0; - } - omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; - if (mask) { - control &= ~mask; - __fldcw(control); - if (__HAS_SSE()) { - mxcsr &= ~(mask << _SSE_EMASK_SHIFT); - __ldmxcsr(mxcsr); - } - } - return (omask); + __uint32_t mxcsr; + __uint16_t control, omask; + + mask &= FE_ALL_EXCEPT; + __fnstcw(&control); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + if (mask) { + control &= ~mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr &= ~(mask << _SSE_EMASK_SHIFT); + __ldmxcsr(mxcsr); + } + } + return (omask); } int fedisableexcept(int mask) { - __uint32_t mxcsr; - __uint16_t control, omask; - - mask &= FE_ALL_EXCEPT; - __fnstcw(&control); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - } else { - mxcsr = 0; - } - omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; - if (mask) { - control |= mask; - __fldcw(control); - if (__HAS_SSE()) { - mxcsr |= mask << _SSE_EMASK_SHIFT; - __ldmxcsr(mxcsr); - } - } - return (omask); + __uint32_t mxcsr; + __uint16_t control, omask; + + mask &= FE_ALL_EXCEPT; + __fnstcw(&control); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + if (mask) { + control |= mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr |= mask << _SSE_EMASK_SHIFT; + __ldmxcsr(mxcsr); + } + } + return (omask); } int feclearexcept(int excepts) { - fenv_t env; - __uint32_t mxcsr; - - excepts &= FE_ALL_EXCEPT; - if (excepts) { /* Do nothing if excepts is 0 */ - __fnstenv(&env); - env.__status &= ~excepts; - __fnclex(); - __fldenv(env); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - mxcsr &= ~excepts; - __ldmxcsr(mxcsr); - } - } - return (0); + fenv_t env; + __uint32_t mxcsr; + + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstenv(&env); + env.__status &= ~excepts; + __fnclex(); + __fldenv(env); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~excepts; + __ldmxcsr(mxcsr); + } + } + return (0); } int fegetexceptflag(fexcept_t *flagp, int excepts) { - __uint32_t mxcsr; - __uint16_t status; - - excepts &= FE_ALL_EXCEPT; - __fnstsw(&status); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - } else { - mxcsr = 0; - } - *flagp = (status | mxcsr) & excepts; - return (0); + __uint32_t mxcsr; + __uint16_t status; + + excepts &= FE_ALL_EXCEPT; + __fnstsw(&status); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + *flagp = (status | mxcsr) & excepts; + return (0); } int fetestexcept(int excepts) { - __uint32_t mxcsr; - __uint16_t status; - - excepts &= FE_ALL_EXCEPT; - if (excepts) { /* Do nothing if excepts is 0 */ - __fnstsw(&status); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - } else { - mxcsr = 0; - } - return ((status | mxcsr) & excepts); - } - return (0); + __uint32_t mxcsr; + __uint16_t status; + + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstsw(&status); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + return ((status | mxcsr) & excepts); + } + return (0); } int fegetround(void) { - __uint16_t control; - - /* - * We assume that the x87 and the SSE unit agree on the - * rounding mode. Reading the control word on the x87 turns - * out to be about 5 times faster than reading it on the SSE - * unit on an Opteron 244. - */ - __fnstcw(&control); - return (control & _ROUND_MASK); + __uint16_t control; + + /* + * We assume that the x87 and the SSE unit agree on the + * rounding mode. Reading the control word on the x87 turns + * out to be about 5 times faster than reading it on the SSE + * unit on an Opteron 244. + */ + __fnstcw(&control); + return (control & ROUND_MASK); } int fesetround(int round) { - __uint32_t mxcsr; - __uint16_t control; - - if (round & ~_ROUND_MASK) { - return (-1); - } else { - __fnstcw(&control); - control &= ~_ROUND_MASK; - control |= round; - __fldcw(control); - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); - mxcsr |= round << _SSE_ROUND_SHIFT; - __ldmxcsr(mxcsr); - } - return (0); - } + __uint32_t mxcsr; + __uint16_t control; + + if (round & ~ROUND_MASK) { + return (-1); + } else { + __fnstcw(&control); + control &= ~ROUND_MASK; + control |= round; + __fldcw(control); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT); + mxcsr |= round << _SSE_ROUND_SHIFT; + __ldmxcsr(mxcsr); + } + return (0); + } } int fesetenv(const fenv_t *envp) { - fenv_t env = *envp; - __uint32_t mxcsr; - - mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); - env.__mxcsr_hi = 0xffff; - env.__mxcsr_lo = 0xffff; - /* - * XXX Using fldenvx() instead of fldenv() tells the compiler that this - * instruction clobbers the i387 register stack. This happens because - * we restore the tag word from the saved environment. Normally, this - * would happen anyway and we wouldn't care, because the ABI allows - * function calls to clobber the i387 regs. However, fesetenv() is - * inlined, so we need to be more careful. - */ - __fldenvx(env); - if (__HAS_SSE()) { - __ldmxcsr(mxcsr); - } - return (0); + fenv_t env = *envp; + __uint32_t mxcsr; + + mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); + env.__mxcsr_hi = 0xffff; + env.__mxcsr_lo = 0xffff; + /* + * XXX Using fldenvx() instead of fldenv() tells the compiler that this + * instruction clobbers the i387 register stack. This happens because + * we restore the tag word from the saved environment. Normally, this + * would happen anyway and we wouldn't care, because the ABI allows + * function calls to clobber the i387 regs. However, fesetenv() is + * inlined, so we need to be more careful. + */ + __fldenvx(env); + if (__HAS_SSE()) { + __ldmxcsr(mxcsr); + } + return (0); } int fegetexcept(void) { - __uint16_t control; - - /* - * We assume that the masks for the x87 and the SSE unit are - * the same. - */ - __fnstcw(&control); - return (~control & FE_ALL_EXCEPT); + __uint16_t control; + + /* + * We assume that the masks for the x87 and the SSE unit are + * the same. + */ + __fnstcw(&control); + return (~control & FE_ALL_EXCEPT); } |