summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLucas Crowthers <lucasc@codeaurora.org>2013-04-25 15:56:16 -0400
committerSteve Kondik <shade@chemlab.org>2013-05-25 18:48:06 -0700
commitefe2f93bbdcdfd64e0bda0544e7f73405585cc1c (patch)
tree6663818806d067dfc337555596a22c28218040f5
parented26d701c4955bc3a4720bc2c7d9b6654a6956f1 (diff)
downloadbionic-efe2f93bbdcdfd64e0bda0544e7f73405585cc1c.zip
bionic-efe2f93bbdcdfd64e0bda0544e7f73405585cc1c.tar.gz
bionic-efe2f93bbdcdfd64e0bda0544e7f73405585cc1c.tar.bz2
libm: optimistic pow calling
Call optimized pow optimistically and revert to full range implementation if we detect an out-of-range input. Change-Id: I02657c2a254e9796e795a8fcc6ae43e24c9d37b9
-rw-r--r--libm/arm/e_pow.S23
-rw-r--r--libm/src/e_pow.c20
-rw-r--r--libm/src/math_private.h3
3 files changed, 28 insertions, 18 deletions
diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S
index 8ca9c79..b057308 100644
--- a/libm/arm/e_pow.S
+++ b/libm/arm/e_pow.S
@@ -34,6 +34,7 @@
#define int_1 d29
#define double_1 d28
@ sign and 2^int_n fixup:
+#define maxrange r12
#define expadjustment d7
#define literals r10
@ Values which exist within both polynomial implementations:
@@ -100,13 +101,20 @@
#define vmov_f32 fconsts
#define vmovne_f64 fconstdne
-ENTRY(pow_neon)
+#if defined(KRAIT_NEON_OPTIMIZATION)
+#define KRAIT_NO_AAPCS_VFP_MODE
+#endif
+
+ENTRY(pow)
#if defined(KRAIT_NO_AAPCS_VFP_MODE)
@ ARM ABI has inputs coming in via r registers, lets move to a d register
vmov x, x_lw, x_hw
#endif
push {r4, r5, r6, r7, r8, r9, r10, lr}
+ movw maxrange, #0x0000
+ movt maxrange, #0x4010
+
@ pre-staged bp values
vldr bpa, .LbpA
vldr bpb, .LbpB
@@ -176,6 +184,11 @@ ENTRY(pow_neon)
.Lxle2to3over5:
+ cmp x_hw, maxrange
+ cmpls y_hw, maxrange
+ movt maxrange, #0x3f00
+ cmpls maxrange, x_hw
+
@ load log2 polynomial series constants
vldm literals!, {k4, k3, k2, k1}
@@ -187,6 +200,8 @@ ENTRY(pow_neon)
vsub.f64 u, ss, bp
vadd.f64 v, ss, bp
+ bhi .LuseFullImpl
+
@ s = (x-1)/(x+1)
vdiv.f64 ss, u, v
@@ -393,6 +408,10 @@ ENTRY(pow_neon)
@ leave directly returning whatever is in Return_lw and Return_hw
pop {r4, r5, r6, r7, r8, r9, r10, pc}
+.LuseFullImpl:
+ pop {r4, r5, r6, r7, r8, r9, r10, lr}
+ b __full_ieee754_pow
+
.align 6
.LliteralTable:
@ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)):
@@ -440,4 +459,4 @@ ENTRY(pow_neon)
.Ltwoto1o4: @ 2^1/4
.long 0x0a31b715, 0x3ff306fe
-END(pow_neon)
+END(pow)
diff --git a/libm/src/e_pow.c b/libm/src/e_pow.c
index 67171e6..5bb7f7b 100644
--- a/libm/src/e_pow.c
+++ b/libm/src/e_pow.c
@@ -61,14 +61,6 @@ static char rcsid[] = "$FreeBSD: src/lib/msun/src/e_pow.c,v 1.11 2005/02/04 18:2
#include "math.h"
#include "math_private.h"
-#if defined(__ARM_NEON__)
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
-double pow_neon(double x, double y);
-#else
-double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp")));
-#endif
-#endif
-
static const double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
@@ -103,7 +95,11 @@ ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
double
+#if defined(__ARM_NEON__)
+__full_ieee754_pow(double x, double y)
+#else
__ieee754_pow(double x, double y)
+#endif
{
double z,ax,z_h,z_l,p_h,p_l;
double y1,t1,t2,r,s,t,u,v,w;
@@ -229,14 +225,6 @@ __ieee754_pow(double x, double y)
t1 = u+v;
SET_LOW_WORD(t1,0);
t2 = v-(t1-u);
-#if defined(__ARM_NEON__)
- } else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) {
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
- return pow_neon(x,y);
-#else
- return pow_neon(x,y,lx,hx);
-#endif
-#endif
} else {
double ss,s2,s_h,s_l,t_h,t_l;
n = 0;
diff --git a/libm/src/math_private.h b/libm/src/math_private.h
index 7cda2e9..888272d 100644
--- a/libm/src/math_private.h
+++ b/libm/src/math_private.h
@@ -257,6 +257,9 @@ cpackl(long double x, long double y)
#define __ieee754_ldexpf ldexpf
/* fdlibm kernel function */
+#if defined(__ARM_NEON__)
+double __full_ieee754_pow(double,double);
+#endif
#if defined(KRAIT_NEON_OPTIMIZATION)
int __ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp")));
double __kernel_sin(double,double,int) __attribute__((pcs("aapcs-vfp")));