summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2014-06-09 20:54:22 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-06-09 20:54:22 +0000
commit87472963c7c50240002fd83efec69c267c0d6ec7 (patch)
tree57aaa17251e634a0930926a61e7bf5a3cefdeaa6
parent36ed5971c45425bf258c72e7baaa68ebdd70c7bb (diff)
parenta147a1da5c268e9d556c207be0d3da0a519b2d54 (diff)
downloadbionic-87472963c7c50240002fd83efec69c267c0d6ec7.zip
bionic-87472963c7c50240002fd83efec69c267c0d6ec7.tar.gz
bionic-87472963c7c50240002fd83efec69c267c0d6ec7.tar.bz2
Merge "AArch64: libm: Fix ARM64 fenv_t and refactor ARM64 libm implementation."
-rw-r--r--benchmarks/math_benchmark.cpp40
-rw-r--r--libm/arm64/fenv.c198
-rw-r--r--libm/include/arm64/machine/fenv.h107
3 files changed, 210 insertions, 135 deletions
diff --git a/benchmarks/math_benchmark.cpp b/benchmarks/math_benchmark.cpp
index 3602de4..a9748cd 100644
--- a/benchmarks/math_benchmark.cpp
+++ b/benchmarks/math_benchmark.cpp
@@ -16,6 +16,7 @@
#include "benchmark.h"
+#include <fenv.h>
#include <math.h>
// Avoid optimization.
@@ -113,10 +114,49 @@ static void BM_math_isinf_ZERO(int iters) {
}
BENCHMARK(BM_math_isinf_ZERO);
+static void BM_math_sin_fast(int iters) {
+ StartBenchmarkTiming();
+ d = 1.0;
+ for (int i = 0; i < iters; ++i) {
+ d += sin(d);
+ }
+
+ StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_fast);
+static void BM_math_sin_feupdateenv(int iters) {
+ StartBenchmarkTiming();
+ d = 1.0;
+ for (int i = 0; i < iters; ++i) {
+ fenv_t __libc_save_rm;
+ feholdexcept(&__libc_save_rm);
+ fesetround(FE_TONEAREST);
+ d += sin(d);
+ feupdateenv(&__libc_save_rm);
+ }
+ StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_feupdateenv);
+
+static void BM_math_sin_fesetenv(int iters) {
+ StartBenchmarkTiming();
+
+ d = 1.0;
+ for (int i = 0; i < iters; ++i) {
+ fenv_t __libc_save_rm;
+ feholdexcept(&__libc_save_rm);
+ fesetround(FE_TONEAREST);
+ d += sin(d);
+ fesetenv(&__libc_save_rm);
+ }
+
+ StopBenchmarkTiming();
+}
+BENCHMARK(BM_math_sin_fesetenv);
static void BM_math_fpclassify_NORMAL(int iters) {
StartBenchmarkTiming();
diff --git a/libm/arm64/fenv.c b/libm/arm64/fenv.c
index 9db21ef..ce560a7 100644
--- a/libm/arm64/fenv.c
+++ b/libm/arm64/fenv.c
@@ -28,114 +28,168 @@
#include <fenv.h>
-/*
- * Hopefully the system ID byte is immutable, so it's valid to use
- * this as a default environment.
- */
-const fenv_t __fe_dfl_env = 0;
+#define FPCR_EXCEPT_SHIFT 8
+#define FPCR_EXCEPT_MASK (FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT)
+
+#define FPCR_RMODE_SHIFT 22
+
+const fenv_t __fe_dfl_env = { 0 /* control */, 0 /* status */};
+
+typedef __uint32_t fpu_control_t; // FPCR, Floating-point Control Register.
+typedef __uint32_t fpu_status_t; // FPSR, Floating-point Status Register.
+
+#define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r" (__fpcr))
+#define __get_fpsr(__fpsr) __asm__ __volatile__("mrs %0,fpsr" : "=r" (__fpsr))
+#define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : :"ri" (__fpcr))
+#define __set_fpsr(__fpsr) __asm__ __volatile__("msr fpsr,%0" : :"ri" (__fpsr))
-int fegetenv(fenv_t* __envp) {
- fenv_t _fpcr, _fpsr;
- __asm__ __volatile__("mrs %0,fpcr" : "=r" (_fpcr));
- __asm__ __volatile__("mrs %0,fpsr" : "=r" (_fpsr));
- *__envp = (_fpcr | _fpsr);
+int fegetenv(fenv_t* envp) {
+ __get_fpcr(envp->__control);
+ __get_fpsr(envp->__status);
return 0;
}
-int fesetenv(const fenv_t* __envp) {
- fenv_t _fpcr = (*__envp & FPCR_MASK);
- fenv_t _fpsr = (*__envp & FPSR_MASK);
- __asm__ __volatile__("msr fpcr,%0" : :"ri" (_fpcr));
- __asm__ __volatile__("msr fpsr,%0" : :"ri" (_fpsr));
+int fesetenv(const fenv_t* envp) {
+ fpu_control_t fpcr;
+
+ __get_fpcr(fpcr);
+ if (envp->__control != fpcr) {
+ __set_fpcr(envp->__control);
+ }
+ __set_fpsr(envp->__status);
return 0;
}
-int feclearexcept(int __excepts) {
- fexcept_t __fpscr;
- fegetenv(&__fpscr);
- __fpscr &= ~__excepts;
- fesetenv(&__fpscr);
+int feclearexcept(int excepts) {
+ fpu_status_t fpsr;
+
+ excepts &= FE_ALL_EXCEPT;
+ __get_fpsr(fpsr);
+ fpsr &= ~excepts;
+ __set_fpsr(fpsr);
return 0;
}
-int fegetexceptflag(fexcept_t* __flagp, int __excepts) {
- fexcept_t __fpscr;
- fegetenv(&__fpscr);
- *__flagp = __fpscr & __excepts;
+int fegetexceptflag(fexcept_t* flagp, int excepts) {
+ fpu_status_t fpsr;
+
+ excepts &= FE_ALL_EXCEPT;
+ __get_fpsr(fpsr);
+ *flagp = fpsr & excepts;
return 0;
}
-int fesetexceptflag(const fexcept_t* __flagp, int __excepts) {
- fexcept_t __fpscr;
- fegetenv(&__fpscr);
- __fpscr &= ~__excepts;
- __fpscr |= *__flagp & __excepts;
- fesetenv(&__fpscr);
+int fesetexceptflag(const fexcept_t* flagp, int excepts) {
+ fpu_status_t fpsr;
+
+ excepts &= FE_ALL_EXCEPT;
+ __get_fpsr(fpsr);
+ fpsr &= ~excepts;
+ fpsr |= *flagp & excepts;
+ __set_fpsr(fpsr);
return 0;
}
-int feraiseexcept(int __excepts) {
- fexcept_t __ex = __excepts;
- fesetexceptflag(&__ex, __excepts);
+int feraiseexcept(int excepts) {
+ fexcept_t ex = excepts;
+
+ fesetexceptflag(&ex, excepts);
return 0;
}
-int fetestexcept(int __excepts) {
- fexcept_t __fpscr;
- fegetenv(&__fpscr);
- return (__fpscr & __excepts);
+int fetestexcept(int excepts) {
+ fpu_status_t fpsr;
+
+ excepts &= FE_ALL_EXCEPT;
+ __get_fpsr(fpsr);
+ return (fpsr & excepts);
}
int fegetround(void) {
- fenv_t _fpscr;
- fegetenv(&_fpscr);
- return ((_fpscr >> _FPSCR_RMODE_SHIFT) & 0x3);
+ fpu_control_t fpcr;
+
+ __get_fpcr(fpcr);
+ return ((fpcr >> FPCR_RMODE_SHIFT) & FE_TOWARDZERO);
}
-int fesetround(int __round) {
- fenv_t _fpscr;
- fegetenv(&_fpscr);
- _fpscr &= ~(0x3 << _FPSCR_RMODE_SHIFT);
- _fpscr |= (__round << _FPSCR_RMODE_SHIFT);
- fesetenv(&_fpscr);
+int fesetround(int round) {
+ fpu_control_t fpcr, new_fpcr;
+
+ round &= FE_TOWARDZERO;
+ __get_fpcr(fpcr);
+ new_fpcr = fpcr & ~(FE_TOWARDZERO << FPCR_RMODE_SHIFT);
+ new_fpcr |= (round << FPCR_RMODE_SHIFT);
+ if (new_fpcr != fpcr) {
+ __set_fpcr(new_fpcr);
+ }
return 0;
}
-int feholdexcept(fenv_t* __envp) {
- fenv_t __env;
- fegetenv(&__env);
- *__envp = __env;
- __env &= ~(FE_ALL_EXCEPT | _FPSCR_ENABLE_MASK);
- fesetenv(&__env);
+int feholdexcept(fenv_t* envp) {
+ fenv_t env;
+ fpu_status_t fpsr;
+ fpu_control_t fpcr, new_fpcr;
+
+ __get_fpsr(fpsr);
+ __get_fpcr(fpcr);
+ env.__status = fpsr;
+ env.__control = fpcr;
+ *envp = env;
+
+ // Set exceptions to untrapped.
+ new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FPCR_EXCEPT_SHIFT);
+ if (new_fpcr != fpcr) {
+ __set_fpcr(new_fpcr);
+ }
+
+ // Clear all exceptions.
+ fpsr &= ~FE_ALL_EXCEPT;
+ __set_fpsr(fpsr);
return 0;
}
-int feupdateenv(const fenv_t* __envp) {
- fexcept_t __fpscr;
- fegetenv(&__fpscr);
- fesetenv(__envp);
- feraiseexcept(__fpscr & FE_ALL_EXCEPT);
+int feupdateenv(const fenv_t* envp) {
+ fpu_status_t fpsr;
+ fpu_control_t fpcr;
+
+ // Set FPU Control register.
+ __get_fpcr(fpcr);
+ if (envp->__control != fpcr) {
+ __set_fpcr(envp->__control);
+ }
+
+ // Set FPU Status register to status | currently raised exceptions.
+ __get_fpsr(fpsr);
+ fpsr = envp->__status | (fpsr & FE_ALL_EXCEPT);
+ __set_fpsr(fpsr);
return 0;
}
-int feenableexcept(int __mask) {
- fenv_t __old_fpscr, __new_fpscr;
- fegetenv(&__old_fpscr);
- __new_fpscr = __old_fpscr | (__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT;
- fesetenv(&__new_fpscr);
- return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
+int feenableexcept(int mask) {
+ fpu_control_t old_fpcr, new_fpcr;
+
+ __get_fpcr(old_fpcr);
+ new_fpcr = old_fpcr | ((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
+ if (new_fpcr != old_fpcr) {
+ __set_fpcr(new_fpcr);
+ }
+ return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
}
-int fedisableexcept(int __mask) {
- fenv_t __old_fpscr, __new_fpscr;
- fegetenv(&__old_fpscr);
- __new_fpscr = __old_fpscr & ~((__mask & FE_ALL_EXCEPT) << _FPSCR_ENABLE_SHIFT);
- fesetenv(&__new_fpscr);
- return ((__old_fpscr >> _FPSCR_ENABLE_SHIFT) & FE_ALL_EXCEPT);
+int fedisableexcept(int mask) {
+ fpu_control_t old_fpcr, new_fpcr;
+
+ __get_fpcr(old_fpcr);
+ new_fpcr = old_fpcr & ~((mask & FE_ALL_EXCEPT) << FPCR_EXCEPT_SHIFT);
+ if (new_fpcr != old_fpcr) {
+ __set_fpcr(new_fpcr);
+ }
+ return ((old_fpcr >> FPCR_EXCEPT_SHIFT) & FE_ALL_EXCEPT);
}
int fegetexcept(void) {
- fenv_t __fpscr;
- fegetenv(&__fpscr);
- return ((__fpscr & _FPSCR_ENABLE_MASK) >> _FPSCR_ENABLE_SHIFT);
+ fpu_control_t fpcr;
+
+ __get_fpcr(fpcr);
+ return ((fpcr & FPCR_EXCEPT_MASK) >> FPCR_EXCEPT_SHIFT);
}
diff --git a/libm/include/arm64/machine/fenv.h b/libm/include/arm64/machine/fenv.h
index 2efeee3..a8568b8 100644
--- a/libm/include/arm64/machine/fenv.h
+++ b/libm/include/arm64/machine/fenv.h
@@ -27,15 +27,44 @@
*/
/*
- * Rewritten for Android.
+ * In ARMv8, AArch64 state, floating-point operation is controlled by:
*
- * The ARM FPSCR (Floating-point Status and Control Register) described here:
- * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0344b/Chdfafia.html
- * has been split into the FPCR (Floating-point Control Register) and FPSR
- * (Floating-point Status Register) on the ARMv8. These are described briefly in
- * "Procedure Call Standard for the ARM 64-bit Architecture"
- * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf
- * section 5.1.2 SIMD and Floating-Point Registers
+ * * FPCR - 32Bit Floating-Point Control Register:
+ * * [31:27] - Reserved, Res0;
+ * * [26] - AHP, Alternative half-precision control bit;
+ * * [25] - DN, Default NaN mode control bit;
+ * * [24] - FZ, Flush-to-zero mode control bit;
+ * * [23:22] - RMode, Rounding Mode control field:
+ * * 00 - Round to Nearest (RN) mode;
+ * * 01 - Round towards Plus Infinity (RP) mode;
+ * * 10 - Round towards Minus Infinity (RM) mode;
+ * * 11 - Round towards Zero (RZ) mode.
+ * * [21:20] - Stride, ignored during AArch64 execution;
+ * * [19] - Reserved, Res0;
+ * * [18:16] - Len, ignored during AArch64 execution;
+ * * [15] - IDE, Input Denormal exception trap;
+ * * [14:13] - Reserved, Res0;
+ * * [12] - IXE, Inexact exception trap;
+ * * [11] - UFE, Underflow exception trap;
+ * * [10] - OFE, Overflow exception trap;
+ * * [9] - DZE, Division by Zero exception;
+ * * [8] - IOE, Invalid Operation exception;
+ * * [7:0] - Reserved, Res0.
+ *
+ * * FPSR - 32Bit Floating-Point Status Register:
+ * * [31] - N, Negative condition flag for AArch32 (AArch64 sets PSTATE.N);
+ * * [30] - Z, Zero condition flag for AArch32 (AArch64 sets PSTATE.Z);
+ * * [29] - C, Carry conditon flag for AArch32 (AArch64 sets PSTATE.C);
+ * * [28] - V, Overflow conditon flag for AArch32 (AArch64 sets PSTATE.V);
+ * * [27] - QC, Cumulative saturation bit, Advanced SIMD only;
+ * * [26:8] - Reserved, Res0;
+ * * [7] - IDC, Input Denormal cumulative exception;
+ * * [6:5] - Reserved, Res0;
+ * * [4] - IXC, Inexact cumulative exception;
+ * * [3] - UFC, Underflow cumulative exception;
+ * * [2] - OFC, Overflow cumulative exception;
+ * * [1] - DZC, Division by Zero cumulative exception;
+ * * [0] - IOC, Invalid Operation cumulative exception.
*/
#ifndef _ARM64_FENV_H_
@@ -45,7 +74,11 @@
__BEGIN_DECLS
-typedef __uint32_t fenv_t;
+typedef struct {
+ __uint32_t __control; /* FPCR, Floating-point Control Register */
+ __uint32_t __status; /* FPSR, Floating-point Status Register */
+} fenv_t;
+
typedef __uint32_t fexcept_t;
/* Exception flags. */
@@ -54,11 +87,9 @@ typedef __uint32_t fexcept_t;
#define FE_OVERFLOW 0x04
#define FE_UNDERFLOW 0x08
#define FE_INEXACT 0x10
+#define FE_DENORMAL 0x80
#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_INEXACT | FE_INVALID | \
- FE_OVERFLOW | FE_UNDERFLOW)
-
-#define _FPSCR_ENABLE_SHIFT 8
-#define _FPSCR_ENABLE_MASK (FE_ALL_EXCEPT << _FPSCR_ENABLE_SHIFT)
+ FE_OVERFLOW | FE_UNDERFLOW | FE_DENORMAL)
/* Rounding modes. */
#define FE_TONEAREST 0x0
@@ -66,56 +97,6 @@ typedef __uint32_t fexcept_t;
#define FE_DOWNWARD 0x2
#define FE_TOWARDZERO 0x3
-#define _FPSCR_RMODE_SHIFT 22
-
-#define FPCR_IOE (1 << 8)
-#define FPCR_DZE (1 << 9)
-#define FPCR_OFE (1 << 10)
-#define FPCR_UFE (1 << 11)
-#define FPCR_IXE (1 << 12)
-#define FPCR_IDE (1 << 15)
-#define FPCR_LEN (7 << 16)
-#define FPCR_STRIDE (3 << 20)
-#define FPCR_RMODE (3 << 22)
-#define FPCR_FZ (1 << 24)
-#define FPCR_DN (1 << 25)
-#define FPCR_AHP (1 << 26)
-#define FPCR_MASK (FPCR_IOE | \
- FPCR_DZE | \
- FPCR_OFE | \
- FPCR_UFE | \
- FPCR_IXE | \
- FPCR_IDE | \
- FPCR_LEN | \
- FPCR_STRIDE | \
- FPCR_RMODE | \
- FPCR_FZ | \
- FPCR_DN | \
- FPCR_AHP )
-
-#define FPSR_IOC (1 << 0)
-#define FPSR_DZC (1 << 1)
-#define FPSR_OFC (1 << 2)
-#define FPSR_UFC (1 << 3)
-#define FPSR_IXC (1 << 4)
-#define FPSR_IDC (1 << 7)
-#define FPSR_QC (1 << 27)
-#define FPSR_V (1 << 28)
-#define FPSR_C (1 << 29)
-#define FPSR_Z (1 << 30)
-#define FPSR_N (1 << 31)
-#define FPSR_MASK (FPSR_IOC | \
- FPSR_DZC | \
- FPSR_OFC | \
- FPSR_UFC | \
- FPSR_IXC | \
- FPSR_IDC | \
- FPSR_QC | \
- FPSR_V | \
- FPSR_C | \
- FPSR_Z | \
- FPSR_N )
-
__END_DECLS
#endif /* !_ARM64_FENV_H_ */