diff options
Diffstat (limited to 'libm/x86_64/e_hypot.S')
-rw-r--r-- | libm/x86_64/e_hypot.S | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/libm/x86_64/e_hypot.S b/libm/x86_64/e_hypot.S new file mode 100644 index 0000000..089b2b4 --- /dev/null +++ b/libm/x86_64/e_hypot.S @@ -0,0 +1,210 @@ +/* +Copyright (c) 2014, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// X87 version: +// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt. +// +// SSE version: +// Swap x, y if |x|<|y| +// For x=2^k*x, get y=y*2^(-k) +// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits) +// +// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) + +// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) ) +// +// Result is 2^k*(S + Se), where Se = S*e +// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S ) +// +// Return 2^k*(S+Se) +// +// For |y/x|<2^(-64), return x +// +// For cases where maximum biased exponent is either greater than 7fdh or +// below 32, take a special path to check for special cases (0, NaN, Inf), +// possible overflow, and more accurate computation for denormal results +// +// Special cases: +// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent +// hypot(x,+-0) is equivalent to fabs(x) +// hypot(x,y) = y if (x==NaN or x==INF) and y==INF +// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!) +// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF) +// +/******************************************************************************/ + +#include <private/bionic_asm.h> +# -- Begin hypot +ENTRY(hypot) +# parameter 1: %xmm0 +# parameter 2: %xmm1 +..B1.1: +..___tag_value_hypot.1: +..___tag_value_hypot.3: +..B1.2: + subq $64, %rsp + movapd static_const_table(%rip), %xmm3 + movsd %xmm0, 48(%rsp) + movsd %xmm1, 56(%rsp) + andpd %xmm3, %xmm0 + andpd %xmm3, %xmm1 + pextrw $3, %xmm0, %eax + pextrw $3, %xmm1, %edx + cmpl $24528, %eax + ja .L_2TAG_PACKET_0.0.1 + cmpl $24528, %edx + ja .L_2TAG_PACKET_0.0.1 +.L_2TAG_PACKET_1.0.1: + fldl 48(%rsp) + fldl 56(%rsp) + fxch %st(1) + fmul %st(0), %st + fxch %st(1) + nop + fmul %st(0), %st + faddp %st, %st(1) + fsqrt + jmp .L_2TAG_PACKET_2.0.1 +.L_2TAG_PACKET_0.0.1: + cmpl $32752, %eax + movl %eax, %ecx + jae .L_2TAG_PACKET_3.0.1 + subl %edx, %ecx + cmpl $32752, %edx + jae .L_2TAG_PACKET_3.0.1 + addl $928, %ecx + addl %edx, %eax + cmpl $1856, %ecx + ja .L_2TAG_PACKET_4.0.1 + cmpl $49056, %eax + jb .L_2TAG_PACKET_1.0.1 + fldl 48(%rsp) + fldl 56(%rsp) + fxch %st(1) + fmul %st(0), %st + fxch %st(1) + nop + fmul %st(0), %st + faddp %st, %st(1) + fsqrt +.L_2TAG_PACKET_5.0.1: + fstl (%rsp) + fstpt 16(%rsp) + xorl %eax, %eax + movw 24(%rsp), %ax + cmpl $17407, %eax + jae .L_2TAG_PACKET_6.0.1 + fldl (%rsp) + jmp .L_2TAG_PACKET_7.0.1 +.L_2TAG_PACKET_4.0.1: + movsd %xmm0, 32(%rsp) + movsd %xmm1, 40(%rsp) + fldl 32(%rsp) + faddl 40(%rsp) + jmp .L_2TAG_PACKET_5.0.1 +.L_2TAG_PACKET_6.0.1: + fldl (%rsp) + jmp .L_2TAG_PACKET_7.0.1 +.L_2TAG_PACKET_3.0.1: + shufpd $0, %xmm1, %xmm0 + movdqa %xmm0, %xmm2 + movdqa 16+static_const_table(%rip), %xmm3 + movsd %xmm0, 32(%rsp) + movsd %xmm1, 40(%rsp) + cmppd $3, %xmm0, %xmm2 + cmppd $0, %xmm0, %xmm3 + movmskpd %xmm2, %edx + movmskpd %xmm3, %rax + testl %edx, %edx + je .L_2TAG_PACKET_8.0.1 + fldl 32(%rsp) + fmull 40(%rsp) + testq $1, %rax + jne .L_2TAG_PACKET_9.0.1 + testq $2, %rax + jne .L_2TAG_PACKET_10.0.1 + jmp .L_2TAG_PACKET_2.0.1 +.L_2TAG_PACKET_8.0.1: + fldl 32(%rsp) + faddl 40(%rsp) + jmp .L_2TAG_PACKET_2.0.1 +.L_2TAG_PACKET_9.0.1: + fstpl 40(%rsp) + fldl 32(%rsp) + jmp .L_2TAG_PACKET_7.0.1 +.L_2TAG_PACKET_10.0.1: + fstpl 32(%rsp) + fldl 40(%rsp) + jmp .L_2TAG_PACKET_7.0.1 +.L_2TAG_PACKET_2.0.1: +.L_2TAG_PACKET_7.0.1: + fstpl 16(%rsp) + movq 16(%rsp), %xmm0 + addq $64, %rsp + ret +..B1.3: +..___tag_value_hypot.4: +END(hypot) +# -- End hypot + .section .rodata, "a" + .align 16 + .align 16 +static_const_table: + .long 4294967295 + .long 2147483647 + .long 4294967295 + .long 2147483647 + .long 0 + .long 2146435072 + .long 0 + .long 2146435072 + .type static_const_table,@object + .size static_const_table,32 + .data + .section .note.GNU-stack, "" +// -- Begin DWARF2 SEGMENT .eh_frame + .section .eh_frame,"a",@progbits +.eh_frame_seg: + .align 1 + .4byte 0x00000014 + .8byte 0x00527a0100000000 + .8byte 0x08070c1b01107801 + .4byte 0x00000190 + .4byte 0x00000014 + .4byte 0x0000001c + .4byte ..___tag_value_hypot.1-. + .4byte ..___tag_value_hypot.4-..___tag_value_hypot.1 + .2byte 0x0400 + .4byte ..___tag_value_hypot.3-..___tag_value_hypot.1 + .2byte 0x100e +# End |