diff options
author | Bruce Beare <bruce.j.beare@intel.com> | 2011-09-15 10:59:01 -0700 |
---|---|---|
committer | David 'Digit' Turner <digit@google.com> | 2011-09-29 17:35:33 +0200 |
commit | 6cda7b6249c05ebcaebeb86badf957ec0f04e4ad (patch) | |
tree | 14578aaa3b23795029c105c6e1af50e8a05b8891 /libm | |
parent | 50a83255d80f98b857c3f72dd2225d4bbc720ca3 (diff) | |
download | bionic-6cda7b6249c05ebcaebeb86badf957ec0f04e4ad.zip bionic-6cda7b6249c05ebcaebeb86badf957ec0f04e4ad.tar.gz bionic-6cda7b6249c05ebcaebeb86badf957ec0f04e4ad.tar.bz2 |
Bionic: x86: Fix libm macro definitions
http://code.google.com/p/android/issues/detail?id=19276
GCC would remove inline asm due to lack of knowledge
of FPU register changes.
Change-Id: I9f9e8623fa6580843b7cd8178439ace8c2db2d51
Signed-off-by: Mark D Horn <mark.d.horn@intel.com>
Signed-off-by: Bruce Beare <bruce.j.beare@intel.com>
Author: Jingwei Zhang <jingwei.zhang@intel.com>
Diffstat (limited to 'libm')
-rw-r--r-- | libm/i387/fenv.c | 285 | ||||
-rw-r--r-- | libm/include/i387/fenv.h | 167 |
2 files changed, 246 insertions, 206 deletions
diff --git a/libm/i387/fenv.c b/libm/i387/fenv.c index aabe270..89ddc55 100644 --- a/libm/i387/fenv.c +++ b/libm/i387/fenv.c @@ -31,16 +31,46 @@ #include "npx.h" #include "fenv.h" +/* + * As compared to the x87 control word, the SSE unit's control word + * has the rounding control bits offset by 3 and the exception mask + * bits offset by 7. + */ +#define _SSE_ROUND_SHIFT 3 +#define _SSE_EMASK_SHIFT 7 + const fenv_t __fe_dfl_env = { - __INITIAL_NPXCW__, - 0x0000, - 0x0000, - 0x1f80, - 0xffffffff, + __INITIAL_NPXCW__, /*__control*/ + 0x0000, /*__mxcsr_hi*/ + 0x0000, /*__status*/ + 0x1f80, /*__mxcsr_lo*/ + 0xffffffff, /*__tag*/ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ }; +#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) +#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) +#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ + : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ + "st(5)", "st(6)", "st(7)") +#define __fnclex() __asm __volatile("fnclex") +#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) +#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) +#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw))) +#define __fwait() __asm __volatile("fwait") +#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) +#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) + +/* After testing for SSE support once, we cache the result in __has_sse. */ +enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; +#ifdef __SSE__ +#define __HAS_SSE() 1 +#else +#define __HAS_SSE() (__has_sse == __SSE_YES || \ + (__has_sse == __SSE_UNK && __test_sse())) +#endif + enum __sse_support __has_sse = #ifdef __SSE__ __SSE_YES; @@ -48,6 +78,7 @@ enum __sse_support __has_sse = __SSE_UNK; #endif +#ifndef __SSE__ #define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) #define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ @@ -82,23 +113,27 @@ __test_sse(void) __has_sse = __SSE_NO; return (0); } +#endif /* __SSE__ */ int fesetexceptflag(const fexcept_t *flagp, int excepts) { fenv_t env; - int mxcsr; - - __fnstenv(&env); - env.__status &= ~excepts; - env.__status |= *flagp & excepts; - __fldenv(env); + __uint32_t mxcsr; - if (__HAS_SSE()) { - __stmxcsr(&mxcsr); - mxcsr &= ~excepts; - mxcsr |= *flagp & excepts; - __ldmxcsr(mxcsr); + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstenv(&env); + env.__status &= ~excepts; + env.__status |= *flagp & excepts; + __fnclex(); + __fldenv(env); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~excepts; + mxcsr |= *flagp & excepts; + __ldmxcsr(mxcsr); + } } return (0); @@ -117,32 +152,38 @@ feraiseexcept(int excepts) int fegetenv(fenv_t *envp) { - int control, mxcsr; + __uint32_t mxcsr; + __fnstenv(envp); /* - * fnstenv masks all exceptions, so we need to save and - * restore the control word to avoid this side effect. + * fnstenv masks all exceptions, so we need to restore + * the old control word to avoid this side effect. */ - __fnstcw(&control); - __fnstenv(envp); + __fldcw(envp->__control); if (__HAS_SSE()) { __stmxcsr(&mxcsr); - __set_mxcsr(*envp, mxcsr); + envp->__mxcsr_hi = mxcsr >> 16; + envp->__mxcsr_lo = mxcsr & 0xffff; } - __fldcw(control); return (0); } int feholdexcept(fenv_t *envp) { - int mxcsr; + __uint32_t mxcsr; + fenv_t env; - __fnstenv(envp); + __fnstenv(&env); + *envp = env; + env.__status &= ~FE_ALL_EXCEPT; + env.__control |= FE_ALL_EXCEPT; __fnclex(); + __fldenv(env); if (__HAS_SSE()) { __stmxcsr(&mxcsr); - __set_mxcsr(*envp, mxcsr); + envp->__mxcsr_hi = mxcsr >> 16; + envp->__mxcsr_lo = mxcsr & 0xffff; mxcsr &= ~FE_ALL_EXCEPT; mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; __ldmxcsr(mxcsr); @@ -153,60 +194,198 @@ feholdexcept(fenv_t *envp) int feupdateenv(const fenv_t *envp) { - int mxcsr; - short status; + __uint32_t mxcsr; + __uint16_t status; __fnstsw(&status); - if (__HAS_SSE()) + if (__HAS_SSE()) { __stmxcsr(&mxcsr); - else + } else { mxcsr = 0; + } fesetenv(envp); feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); return (0); } int -__feenableexcept(int mask) +feenableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr; + __uint16_t control, omask; mask &= FE_ALL_EXCEPT; __fnstcw(&control); - if (__HAS_SSE()) + if (__HAS_SSE()) { __stmxcsr(&mxcsr); - else + } else { mxcsr = 0; - omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; - control &= ~mask; - __fldcw(control); - if (__HAS_SSE()) { - mxcsr &= ~(mask << _SSE_EMASK_SHIFT); - __ldmxcsr(mxcsr); } - return (~omask); + omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + if (mask) { + control &= ~mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr &= ~(mask << _SSE_EMASK_SHIFT); + __ldmxcsr(mxcsr); + } + } + return (omask); } int -__fedisableexcept(int mask) +fedisableexcept(int mask) { - int mxcsr, control, omask; + __uint32_t mxcsr; + __uint16_t control, omask; mask &= FE_ALL_EXCEPT; __fnstcw(&control); - if (__HAS_SSE()) + if (__HAS_SSE()) { __stmxcsr(&mxcsr); - else + } else { mxcsr = 0; - omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; - control |= mask; - __fldcw(control); + } + omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; + if (mask) { + control |= mask; + __fldcw(control); + if (__HAS_SSE()) { + mxcsr |= mask << _SSE_EMASK_SHIFT; + __ldmxcsr(mxcsr); + } + } + return (omask); +} + +int +feclearexcept(int excepts) +{ + fenv_t env; + __uint32_t mxcsr; + + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstenv(&env); + env.__status &= ~excepts; + __fnclex(); + __fldenv(env); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~excepts; + __ldmxcsr(mxcsr); + } + } + return (0); +} + +int +fegetexceptflag(fexcept_t *flagp, int excepts) +{ + __uint32_t mxcsr; + __uint16_t status; + + excepts &= FE_ALL_EXCEPT; + __fnstsw(&status); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + *flagp = (status | mxcsr) & excepts; + return (0); +} + +int +fetestexcept(int excepts) +{ + __uint32_t mxcsr; + __uint16_t status; + + excepts &= FE_ALL_EXCEPT; + if (excepts) { /* Do nothing if excepts is 0 */ + __fnstsw(&status); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + } else { + mxcsr = 0; + } + return ((status | mxcsr) & excepts); + } + return (0); +} + +int +fegetround(void) +{ + __uint16_t control; + + /* + * We assume that the x87 and the SSE unit agree on the + * rounding mode. Reading the control word on the x87 turns + * out to be about 5 times faster than reading it on the SSE + * unit on an Opteron 244. + */ + __fnstcw(&control); + return (control & _ROUND_MASK); +} + +int +fesetround(int round) +{ + __uint32_t mxcsr; + __uint16_t control; + + if (round & ~_ROUND_MASK) { + return (-1); + } else { + __fnstcw(&control); + control &= ~_ROUND_MASK; + control |= round; + __fldcw(control); + if (__HAS_SSE()) { + __stmxcsr(&mxcsr); + mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); + mxcsr |= round << _SSE_ROUND_SHIFT; + __ldmxcsr(mxcsr); + } + return (0); + } +} + +int +fesetenv(const fenv_t *envp) +{ + fenv_t env = *envp; + __uint32_t mxcsr; + + mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); + env.__mxcsr_hi = 0xffff; + env.__mxcsr_lo = 0xffff; + /* + * XXX Using fldenvx() instead of fldenv() tells the compiler that this + * instruction clobbers the i387 register stack. This happens because + * we restore the tag word from the saved environment. Normally, this + * would happen anyway and we wouldn't care, because the ABI allows + * function calls to clobber the i387 regs. However, fesetenv() is + * inlined, so we need to be more careful. + */ + __fldenvx(env); if (__HAS_SSE()) { - mxcsr |= mask << _SSE_EMASK_SHIFT; __ldmxcsr(mxcsr); } - return (~omask); + return (0); } -__weak_reference(__feenableexcept, feenableexcept); -__weak_reference(__fedisableexcept, fedisableexcept); +int +fegetexcept(void) +{ + __uint16_t control; + + /* + * We assume that the masks for the x87 and the SSE unit are + * the same. + */ + __fnstcw(&control); + return (~control & FE_ALL_EXCEPT); +} diff --git a/libm/include/i387/fenv.h b/libm/include/i387/fenv.h index 4281f10..710494c 100644 --- a/libm/include/i387/fenv.h +++ b/libm/include/i387/fenv.h @@ -45,13 +45,6 @@ typedef struct { char __other[16]; } fenv_t; -#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \ - ((env).__mxcsr_lo)) -#define __set_mxcsr(env, x) do { \ - (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \ - (env).__mxcsr_lo = (__uint16_t)(x); \ -} while (0) - typedef __uint16_t fexcept_t; /* Exception flags */ @@ -72,167 +65,35 @@ typedef __uint16_t fexcept_t; #define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \ FE_UPWARD | FE_TOWARDZERO) -/* - * As compared to the x87 control word, the SSE unit's control word - * has the rounding control bits offset by 3 and the exception mask - * bits offset by 7. - */ -#define _SSE_ROUND_SHIFT 3 -#define _SSE_EMASK_SHIFT 7 - -/* After testing for SSE support once, we cache the result in __has_sse. */ -enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; -extern enum __sse_support __has_sse; -int __test_sse(void); -#ifdef __SSE__ -#define __HAS_SSE() 1 -#else -#define __HAS_SSE() (__has_sse == __SSE_YES || \ - (__has_sse == __SSE_UNK && __test_sse())) -#endif - __BEGIN_DECLS /* Default floating-point environment */ extern const fenv_t __fe_dfl_env; #define FE_DFL_ENV (&__fe_dfl_env) -#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) -#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) -#define __fnclex() __asm __volatile("fnclex") -#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) -#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) -#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=a" (*(__sw))) -#define __fwait() __asm __volatile("fwait") -#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) -#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) - -static __inline int -feclearexcept(int __excepts) -{ - fenv_t __env; - int __mxcsr; - - if (__excepts == FE_ALL_EXCEPT) { - __fnclex(); - } else { - __fnstenv(&__env); - __env.__status &= ~__excepts; - __fldenv(__env); - } - if (__HAS_SSE()) { - __stmxcsr(&__mxcsr); - __mxcsr &= ~__excepts; - __ldmxcsr(__mxcsr); - } - return (0); -} - -static __inline int -fegetexceptflag(fexcept_t *__flagp, int __excepts) -{ - int __mxcsr, __status; - - __fnstsw(&__status); - if (__HAS_SSE()) - __stmxcsr(&__mxcsr); - else - __mxcsr = 0; - *__flagp = (__mxcsr | __status) & __excepts; - return (0); -} - -int fesetexceptflag(const fexcept_t *__flagp, int __excepts); -int feraiseexcept(int __excepts); - -static __inline int -fetestexcept(int __excepts) -{ - int __mxcsr; - short __status; - - __fnstsw(&__status); - if (__HAS_SSE()) - __stmxcsr(&__mxcsr); - else - __mxcsr = 0; - return ((__status | __mxcsr) & __excepts); -} - -static __inline int -fegetround(void) -{ - int __control; +/* C99 floating-point exception functions */ +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +/* feraiseexcept does not set the inexact flag on overflow/underflow */ +int feraiseexcept(int excepts); +int fetestexcept(int excepts); - /* - * We assume that the x87 and the SSE unit agree on the - * rounding mode. Reading the control word on the x87 turns - * out to be about 5 times faster than reading it on the SSE - * unit on an Opteron 244. - */ - __fnstcw(&__control); - return (__control & _ROUND_MASK); -} - -static __inline int -fesetround(int __round) -{ - int __mxcsr, __control; - - if (__round & ~_ROUND_MASK) - return (-1); - - __fnstcw(&__control); - __control &= ~_ROUND_MASK; - __control |= __round; - __fldcw(__control); - - if (__HAS_SSE()) { - __stmxcsr(&__mxcsr); - __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); - __mxcsr |= __round << _SSE_ROUND_SHIFT; - __ldmxcsr(__mxcsr); - } - - return (0); -} +/* C99 rounding control functions */ +int fegetround(void); +int fesetround(int round); +/* C99 floating-point environment functions */ int fegetenv(fenv_t *__envp); int feholdexcept(fenv_t *__envp); - -static __inline int -fesetenv(const fenv_t *__envp) -{ - fenv_t __env = *__envp; - int __mxcsr; - - __mxcsr = __get_mxcsr(__env); - __set_mxcsr(__env, 0xffffffff); - __fldenv(__env); - if (__HAS_SSE()) - __ldmxcsr(__mxcsr); - return (0); -} - +int fesetenv(const fenv_t *envp); int feupdateenv(const fenv_t *__envp); #if __BSD_VISIBLE - +/* Additional support functions to set/query floating point traps */ int feenableexcept(int __mask); int fedisableexcept(int __mask); - -static __inline int -fegetexcept(void) -{ - int __control; - - /* - * We assume that the masks for the x87 and the SSE unit are - * the same. - */ - __fnstcw(&__control); - return (~__control & FE_ALL_EXCEPT); -} +int fegetexcept(void); #endif /* __BSD_VISIBLE */ |