summaryrefslogtreecommitdiffstats
path: root/libm/x86_64/e_hypot.S
diff options
context:
space:
mode:
Diffstat (limited to 'libm/x86_64/e_hypot.S')
-rw-r--r--libm/x86_64/e_hypot.S210
1 files changed, 210 insertions, 0 deletions
diff --git a/libm/x86_64/e_hypot.S b/libm/x86_64/e_hypot.S
new file mode 100644
index 0000000..089b2b4
--- /dev/null
+++ b/libm/x86_64/e_hypot.S
@@ -0,0 +1,210 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/******************************************************************************/
+// ALGORITHM DESCRIPTION
+// ---------------------
+//
+// X87 version:
+// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
+//
+// SSE version:
+// Swap x, y if |x|<|y|
+// For x=2^k*x, get y=y*2^(-k)
+// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits)
+//
+// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
+// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
+//
+// Result is 2^k*(S + Se), where Se = S*e
+// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
+//
+// Return 2^k*(S+Se)
+//
+// For |y/x|<2^(-64), return x
+//
+// For cases where maximum biased exponent is either greater than 7fdh or
+// below 32, take a special path to check for special cases (0, NaN, Inf),
+// possible overflow, and more accurate computation for denormal results
+//
+// Special cases:
+// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
+// hypot(x,+-0) is equivalent to fabs(x)
+// hypot(x,y) = y if (x==NaN or x==INF) and y==INF
+// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
+// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
+//
+/******************************************************************************/
+
+#include <private/bionic_asm.h>
+# -- Begin hypot
+ENTRY(hypot)
+# parameter 1: %xmm0
+# parameter 2: %xmm1
+..B1.1:
+..___tag_value_hypot.1:
+..___tag_value_hypot.3:
+..B1.2:
+ subq $64, %rsp
+ movapd static_const_table(%rip), %xmm3
+ movsd %xmm0, 48(%rsp)
+ movsd %xmm1, 56(%rsp)
+ andpd %xmm3, %xmm0
+ andpd %xmm3, %xmm1
+ pextrw $3, %xmm0, %eax
+ pextrw $3, %xmm1, %edx
+ cmpl $24528, %eax
+ ja .L_2TAG_PACKET_0.0.1
+ cmpl $24528, %edx
+ ja .L_2TAG_PACKET_0.0.1
+.L_2TAG_PACKET_1.0.1:
+ fldl 48(%rsp)
+ fldl 56(%rsp)
+ fxch %st(1)
+ fmul %st(0), %st
+ fxch %st(1)
+ nop
+ fmul %st(0), %st
+ faddp %st, %st(1)
+ fsqrt
+ jmp .L_2TAG_PACKET_2.0.1
+.L_2TAG_PACKET_0.0.1:
+ cmpl $32752, %eax
+ movl %eax, %ecx
+ jae .L_2TAG_PACKET_3.0.1
+ subl %edx, %ecx
+ cmpl $32752, %edx
+ jae .L_2TAG_PACKET_3.0.1
+ addl $928, %ecx
+ addl %edx, %eax
+ cmpl $1856, %ecx
+ ja .L_2TAG_PACKET_4.0.1
+ cmpl $49056, %eax
+ jb .L_2TAG_PACKET_1.0.1
+ fldl 48(%rsp)
+ fldl 56(%rsp)
+ fxch %st(1)
+ fmul %st(0), %st
+ fxch %st(1)
+ nop
+ fmul %st(0), %st
+ faddp %st, %st(1)
+ fsqrt
+.L_2TAG_PACKET_5.0.1:
+ fstl (%rsp)
+ fstpt 16(%rsp)
+ xorl %eax, %eax
+ movw 24(%rsp), %ax
+ cmpl $17407, %eax
+ jae .L_2TAG_PACKET_6.0.1
+ fldl (%rsp)
+ jmp .L_2TAG_PACKET_7.0.1
+.L_2TAG_PACKET_4.0.1:
+ movsd %xmm0, 32(%rsp)
+ movsd %xmm1, 40(%rsp)
+ fldl 32(%rsp)
+ faddl 40(%rsp)
+ jmp .L_2TAG_PACKET_5.0.1
+.L_2TAG_PACKET_6.0.1:
+ fldl (%rsp)
+ jmp .L_2TAG_PACKET_7.0.1
+.L_2TAG_PACKET_3.0.1:
+ shufpd $0, %xmm1, %xmm0
+ movdqa %xmm0, %xmm2
+ movdqa 16+static_const_table(%rip), %xmm3
+ movsd %xmm0, 32(%rsp)
+ movsd %xmm1, 40(%rsp)
+ cmppd $3, %xmm0, %xmm2
+ cmppd $0, %xmm0, %xmm3
+ movmskpd %xmm2, %edx
+ movmskpd %xmm3, %rax
+ testl %edx, %edx
+ je .L_2TAG_PACKET_8.0.1
+ fldl 32(%rsp)
+ fmull 40(%rsp)
+ testq $1, %rax
+ jne .L_2TAG_PACKET_9.0.1
+ testq $2, %rax
+ jne .L_2TAG_PACKET_10.0.1
+ jmp .L_2TAG_PACKET_2.0.1
+.L_2TAG_PACKET_8.0.1:
+ fldl 32(%rsp)
+ faddl 40(%rsp)
+ jmp .L_2TAG_PACKET_2.0.1
+.L_2TAG_PACKET_9.0.1:
+ fstpl 40(%rsp)
+ fldl 32(%rsp)
+ jmp .L_2TAG_PACKET_7.0.1
+.L_2TAG_PACKET_10.0.1:
+ fstpl 32(%rsp)
+ fldl 40(%rsp)
+ jmp .L_2TAG_PACKET_7.0.1
+.L_2TAG_PACKET_2.0.1:
+.L_2TAG_PACKET_7.0.1:
+ fstpl 16(%rsp)
+ movq 16(%rsp), %xmm0
+ addq $64, %rsp
+ ret
+..B1.3:
+..___tag_value_hypot.4:
+END(hypot)
+# -- End hypot
+ .section .rodata, "a"
+ .align 16
+ .align 16
+static_const_table:
+ .long 4294967295
+ .long 2147483647
+ .long 4294967295
+ .long 2147483647
+ .long 0
+ .long 2146435072
+ .long 0
+ .long 2146435072
+ .type static_const_table,@object
+ .size static_const_table,32
+ .data
+ .section .note.GNU-stack, ""
+// -- Begin DWARF2 SEGMENT .eh_frame
+ .section .eh_frame,"a",@progbits
+.eh_frame_seg:
+ .align 1
+ .4byte 0x00000014
+ .8byte 0x00527a0100000000
+ .8byte 0x08070c1b01107801
+ .4byte 0x00000190
+ .4byte 0x00000014
+ .4byte 0x0000001c
+ .4byte ..___tag_value_hypot.1-.
+ .4byte ..___tag_value_hypot.4-..___tag_value_hypot.1
+ .2byte 0x0400
+ .4byte ..___tag_value_hypot.3-..___tag_value_hypot.1
+ .2byte 0x100e
+# End