From efe2f93bbdcdfd64e0bda0544e7f73405585cc1c Mon Sep 17 00:00:00 2001 From: Lucas Crowthers Date: Thu, 25 Apr 2013 15:56:16 -0400 Subject: libm: optimistic pow calling Call optimized pow optimistically and revert to full range implementation if we detect an out-of-range input. Change-Id: I02657c2a254e9796e795a8fcc6ae43e24c9d37b9 --- libm/arm/e_pow.S | 23 +++++++++++++++++++++-- libm/src/e_pow.c | 20 ++++---------------- libm/src/math_private.h | 3 +++ 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S index 8ca9c79..b057308 100644 --- a/libm/arm/e_pow.S +++ b/libm/arm/e_pow.S @@ -34,6 +34,7 @@ #define int_1 d29 #define double_1 d28 @ sign and 2^int_n fixup: +#define maxrange r12 #define expadjustment d7 #define literals r10 @ Values which exist within both polynomial implementations: @@ -100,13 +101,20 @@ #define vmov_f32 fconsts #define vmovne_f64 fconstdne -ENTRY(pow_neon) +#if defined(KRAIT_NEON_OPTIMIZATION) +#define KRAIT_NO_AAPCS_VFP_MODE +#endif + +ENTRY(pow) #if defined(KRAIT_NO_AAPCS_VFP_MODE) @ ARM ABI has inputs coming in via r registers, lets move to a d register vmov x, x_lw, x_hw #endif push {r4, r5, r6, r7, r8, r9, r10, lr} + movw maxrange, #0x0000 + movt maxrange, #0x4010 + @ pre-staged bp values vldr bpa, .LbpA vldr bpb, .LbpB @@ -176,6 +184,11 @@ ENTRY(pow_neon) .Lxle2to3over5: + cmp x_hw, maxrange + cmpls y_hw, maxrange + movt maxrange, #0x3f00 + cmpls maxrange, x_hw + @ load log2 polynomial series constants vldm literals!, {k4, k3, k2, k1} @@ -187,6 +200,8 @@ ENTRY(pow_neon) vsub.f64 u, ss, bp vadd.f64 v, ss, bp + bhi .LuseFullImpl + @ s = (x-1)/(x+1) vdiv.f64 ss, u, v @@ -393,6 +408,10 @@ ENTRY(pow_neon) @ leave directly returning whatever is in Return_lw and Return_hw pop {r4, r5, r6, r7, r8, r9, r10, pc} +.LuseFullImpl: + pop {r4, r5, r6, r7, r8, r9, r10, lr} + b __full_ieee754_pow + .align 6 .LliteralTable: @ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)): @@ -440,4 +459,4 @@ ENTRY(pow_neon) .Ltwoto1o4: @ 2^1/4 .long 0x0a31b715, 0x3ff306fe -END(pow_neon) +END(pow) diff --git a/libm/src/e_pow.c b/libm/src/e_pow.c index 67171e6..5bb7f7b 100644 --- a/libm/src/e_pow.c +++ b/libm/src/e_pow.c @@ -61,14 +61,6 @@ static char rcsid[] = "$FreeBSD: src/lib/msun/src/e_pow.c,v 1.11 2005/02/04 18:2 #include "math.h" #include "math_private.h" -#if defined(__ARM_NEON__) -#if defined(KRAIT_NO_AAPCS_VFP_MODE) -double pow_neon(double x, double y); -#else -double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp"))); -#endif -#endif - static const double bp[] = {1.0, 1.5,}, dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ @@ -103,7 +95,11 @@ ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ double +#if defined(__ARM_NEON__) +__full_ieee754_pow(double x, double y) +#else __ieee754_pow(double x, double y) +#endif { double z,ax,z_h,z_l,p_h,p_l; double y1,t1,t2,r,s,t,u,v,w; @@ -229,14 +225,6 @@ __ieee754_pow(double x, double y) t1 = u+v; SET_LOW_WORD(t1,0); t2 = v-(t1-u); -#if defined(__ARM_NEON__) - } else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) { -#if defined(KRAIT_NO_AAPCS_VFP_MODE) - return pow_neon(x,y); -#else - return pow_neon(x,y,lx,hx); -#endif -#endif } else { double ss,s2,s_h,s_l,t_h,t_l; n = 0; diff --git a/libm/src/math_private.h b/libm/src/math_private.h index 7cda2e9..888272d 100644 --- a/libm/src/math_private.h +++ b/libm/src/math_private.h @@ -257,6 +257,9 @@ cpackl(long double x, long double y) #define __ieee754_ldexpf ldexpf /* fdlibm kernel function */ +#if defined(__ARM_NEON__) +double __full_ieee754_pow(double,double); +#endif #if defined(KRAIT_NEON_OPTIMIZATION) int __ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp"))); double __kernel_sin(double,double,int) __attribute__((pcs("aapcs-vfp"))); -- cgit v1.1