summaryrefslogtreecommitdiffstats
path: root/libm/x86/e_hypot.S
diff options
context:
space:
mode:
authorJingwei Zhang <jingwei.zhang@intel.com>2014-10-31 18:29:18 +0800
committerChristopher Ferris <cferris@google.com>2015-03-09 13:19:08 -0700
commit5d4f0e6a26b66f1dab8d20a65af4469c6dd7370d (patch)
treea110442bd4fa9b13f610fd29f5b6dd42fdbf32fc /libm/x86/e_hypot.S
parentedc1d3e3c6266eadcd05a9aa7bcdec701ba060aa (diff)
downloadbionic-5d4f0e6a26b66f1dab8d20a65af4469c6dd7370d.zip
bionic-5d4f0e6a26b66f1dab8d20a65af4469c6dd7370d.tar.gz
bionic-5d4f0e6a26b66f1dab8d20a65af4469c6dd7370d.tar.bz2
Add the optimized implementation of 18 math functions for x86 and x86_64 respectively
Change-Id: I31bf601448a9427f825517f3a0ff24de47f49bfa Signed-off-by: Jingwei Zhang <jingwei.zhang@intel.com> Signed-off-by: Mingwei Shi <mingwei.shi@intel.com>
Diffstat (limited to 'libm/x86/e_hypot.S')
-rw-r--r--libm/x86/e_hypot.S221
1 files changed, 221 insertions, 0 deletions
diff --git a/libm/x86/e_hypot.S b/libm/x86/e_hypot.S
new file mode 100644
index 0000000..aa6ab64
--- /dev/null
+++ b/libm/x86/e_hypot.S
@@ -0,0 +1,221 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/******************************************************************************/
+// ALGORITHM DESCRIPTION
+// ---------------------
+//
+// X87 version:
+// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
+//
+// SSE version:
+// Swap x, y if |x|<|y|
+// For x=2^k*x, get y=y*2^(-k)
+// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits)
+//
+// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
+// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
+//
+// Result is 2^k*(S + Se), where Se = S*e
+// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
+//
+// Return 2^k*(S+Se)
+//
+// For |y/x|<2^(-64), return x
+//
+// For cases where maximum biased exponent is either greater than 7fdh or
+// below 32, take a special path to check for special cases (0, NaN, Inf),
+// possible overflow, and more accurate computation for denormal results
+//
+// Special cases:
+// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
+// hypot(x,+-0) is equivalent to fabs(x)
+// hypot(x,y) = y if (x==NaN or x==INF) and y==INF
+// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
+// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
+//
+/******************************************************************************/
+
+#include <private/bionic_asm.h>
+# -- Begin static_func
+ .text
+ .align __bionic_asm_align
+ .type static_func, @function
+static_func:
+..B1.1:
+ call ..L2
+..L2:
+ popl %eax
+ lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
+ lea static_const_table@GOTOFF(%eax), %eax
+ ret
+ .size static_func,.-static_func
+# -- End static_func
+
+# -- Begin hypot
+ENTRY(hypot)
+# parameter 1: 8 + %ebp
+# parameter 2: 16 + %ebp
+..B2.1:
+..B2.2:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $152, %esp
+ movl %ebx, 96(%esp)
+ call static_func
+ movl %eax, %ebx
+ movapd (%ebx), %xmm3
+ movsd 160(%esp), %xmm0
+ movsd 168(%esp), %xmm1
+ andpd %xmm3, %xmm0
+ andpd %xmm3, %xmm1
+ pextrw $3, %xmm0, %eax
+ pextrw $3, %xmm1, %edx
+ cmpl $24528, %eax
+ ja .L_2TAG_PACKET_0.0.2
+ cmpl $24528, %edx
+ ja .L_2TAG_PACKET_0.0.2
+.L_2TAG_PACKET_1.0.2:
+ fldl 160(%esp)
+ fldl 168(%esp)
+ fxch %st(1)
+ fmul %st(0), %st
+ fxch %st(1)
+ nop
+ fmul %st(0), %st
+ faddp %st, %st(1)
+ fsqrt
+ jmp .L_2TAG_PACKET_2.0.2
+.L_2TAG_PACKET_0.0.2:
+ cmpl $32752, %eax
+ movl %eax, %ecx
+ jae .L_2TAG_PACKET_3.0.2
+ subl %edx, %ecx
+ cmpl $32752, %edx
+ jae .L_2TAG_PACKET_3.0.2
+ addl $928, %ecx
+ addl %edx, %eax
+ cmpl $1856, %ecx
+ ja .L_2TAG_PACKET_4.0.2
+ cmpl $49056, %eax
+ jb .L_2TAG_PACKET_1.0.2
+ fldl 160(%esp)
+ fldl 168(%esp)
+ fxch %st(1)
+ fmul %st(0), %st
+ fxch %st(1)
+ nop
+ fmul %st(0), %st
+ faddp %st, %st(1)
+ fsqrt
+.L_2TAG_PACKET_5.0.2:
+ fstl (%esp)
+ fstpt 16(%esp)
+ xorl %eax, %eax
+ movw 24(%esp), %ax
+ cmpl $17407, %eax
+ jae .L_2TAG_PACKET_6.0.2
+ fldl (%esp)
+ jmp .L_2TAG_PACKET_7.0.2
+.L_2TAG_PACKET_4.0.2:
+ movsd %xmm0, 32(%esp)
+ movsd %xmm1, 40(%esp)
+ fldl 32(%esp)
+ faddl 40(%esp)
+ jmp .L_2TAG_PACKET_5.0.2
+.L_2TAG_PACKET_6.0.2:
+ movl $46, %edx
+.L_2TAG_PACKET_8.0.2:
+ movsd 160(%esp), %xmm0
+ movsd 168(%esp), %xmm1
+ fldl (%esp)
+ jmp .L_2TAG_PACKET_7.0.2
+.L_2TAG_PACKET_3.0.2:
+ shufpd $0, %xmm1, %xmm0
+ movdqa %xmm0, %xmm2
+ movdqa 16(%ebx), %xmm3
+ movsd %xmm0, 32(%esp)
+ movsd %xmm1, 40(%esp)
+ cmppd $3, %xmm0, %xmm2
+ cmppd $0, %xmm0, %xmm3
+ movmskpd %xmm2, %edx
+ movmskpd %xmm3, %eax
+ testl %edx, %edx
+ je .L_2TAG_PACKET_9.0.2
+ fldl 32(%esp)
+ fmull 40(%esp)
+ testl $1, %eax
+ jne .L_2TAG_PACKET_10.0.2
+ testl $2, %eax
+ jne .L_2TAG_PACKET_11.0.2
+ jmp .L_2TAG_PACKET_2.0.2
+.L_2TAG_PACKET_9.0.2:
+ fldl 32(%esp)
+ faddl 40(%esp)
+ jmp .L_2TAG_PACKET_2.0.2
+.L_2TAG_PACKET_10.0.2:
+ fstpl 40(%esp)
+ fldl 32(%esp)
+ jmp .L_2TAG_PACKET_7.0.2
+.L_2TAG_PACKET_11.0.2:
+ fstpl 32(%esp)
+ fldl 40(%esp)
+ jmp .L_2TAG_PACKET_7.0.2
+.L_2TAG_PACKET_2.0.2:
+.L_2TAG_PACKET_7.0.2:
+ movl 96(%esp), %ebx
+ movl %ebp, %esp
+ popl %ebp
+ ret
+..B2.3:
+END(hypot)
+# -- End hypot
+
+# Start file scope ASM
+.weak hypotl
+.equ hypotl, hypot
+# End file scope ASM
+ .section .rodata, "a"
+ .align 16
+ .align 16
+static_const_table:
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .long 0
+ .long 2146435072
+ .long 0
+ .long 2146435072
+ .type static_const_table,@object
+ .size static_const_table,32
+ .data
+ .section .note.GNU-stack, ""
+# End