diff options
Diffstat (limited to 'libm/x86_64/s_expm1.S')
-rw-r--r-- | libm/x86_64/s_expm1.S | 727 |
1 files changed, 727 insertions, 0 deletions
diff --git a/libm/x86_64/s_expm1.S b/libm/x86_64/s_expm1.S new file mode 100644 index 0000000..9da1d9d --- /dev/null +++ b/libm/x86_64/s_expm1.S @@ -0,0 +1,727 @@ +/* +Copyright (c) 2014, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// Description: +// Let K = 64 (table size). +// +// Four sub-domains: +// 1. |x| < 1/(2*K) +// expm1(x) ~ P(x) +// 2. 1/(2*K) <= |x| <= 56*log(2) +// x x/log(2) n +// e - 1 = 2 = 2 * T[j] * (1 + P(y)) - 1 +// 3. 56*log(2) < x < MAX_LOG +// x x x/log(2) n +// e - 1 ~ e = 2 = 2 * T[j] * (1 + P(y)) +// 4. x < -56*log(2) +// x x +// e - 1 = -1 + e ~ -1 +// where +// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] +// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] +// j/K +// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). +// +// P(y) is a minimax polynomial approximation of exp(x)-1 +// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). +// +// In case 3, to avoid problems with arithmetic overflow and underflow, +// n n1 n2 +// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] +// and BIAS is a value of exponent bias. +// +// Special cases: +// expm1(NaN) is NaN +// expm1(+INF) is +INF +// expm1(-INF) is -1 +// expm1(x) is x for subnormals +// for finite argument, only expm1(0)=0 is exact. +// For IEEE double +// if x > 709.782712893383973096 then expm1(x) overflow +// +/******************************************************************************/ + +#include <private/bionic_asm.h> +# -- Begin expm1 +ENTRY(expm1) +# parameter 1: %xmm0 +..B1.1: +..___tag_value_expm1.1: + subq $56, %rsp +..___tag_value_expm1.3: + movsd %xmm0, 32(%rsp) +..B1.2: + unpcklpd %xmm0, %xmm0 + movapd cv(%rip), %xmm1 + movapd Shifter(%rip), %xmm6 + movapd 16+cv(%rip), %xmm2 + movapd 32+cv(%rip), %xmm3 + pextrw $3, %xmm0, %eax + andl $32767, %eax + movl $16527, %edx + subl %eax, %edx + subl $16304, %eax + orl %eax, %edx + cmpl $-2147483648, %edx + jae .L_2TAG_PACKET_0.0.2 + mulpd %xmm0, %xmm1 + addpd %xmm6, %xmm1 + movapd %xmm1, %xmm7 + subpd %xmm6, %xmm1 + mulpd %xmm1, %xmm2 + movapd 48+cv(%rip), %xmm4 + mulpd %xmm1, %xmm3 + movapd 64+cv(%rip), %xmm5 + subpd %xmm2, %xmm0 + movd %xmm7, %eax + movl %eax, %ecx + andl $63, %ecx + shll $4, %ecx + sarl $6, %eax + movl %eax, %edx + subpd %xmm3, %xmm0 + lea Tbl_addr(%rip), %r11 + movapd (%rcx,%r11), %xmm2 + movq 80+cv(%rip), %xmm3 + mulpd %xmm0, %xmm4 + movapd %xmm0, %xmm1 + mulpd %xmm0, %xmm0 + mulsd %xmm0, %xmm3 + addpd %xmm4, %xmm5 + mulsd %xmm0, %xmm0 + movq %xmm2, %xmm4 + unpckhpd %xmm2, %xmm2 + movdqa mmask(%rip), %xmm6 + pand %xmm6, %xmm7 + movdqa bias(%rip), %xmm6 + paddq %xmm6, %xmm7 + psllq $46, %xmm7 + mulsd %xmm0, %xmm3 + mulpd %xmm5, %xmm0 + addl $894, %edx + cmpl $1916, %edx + ja .L_2TAG_PACKET_1.0.2 + addsd %xmm3, %xmm0 + xorpd %xmm3, %xmm3 + movl $16368, %eax + pinsrw $3, %eax, %xmm3 + orpd %xmm7, %xmm2 + mulsd %xmm4, %xmm7 + movq %xmm3, %xmm6 + addsd %xmm1, %xmm3 + pextrw $3, %xmm2, %edx + pshufd $238, %xmm0, %xmm5 + psrlq $38, %xmm3 + psllq $38, %xmm3 + movq %xmm2, %xmm4 + subsd %xmm3, %xmm6 + addsd %xmm5, %xmm0 + addsd %xmm6, %xmm1 + addsd %xmm7, %xmm4 + mulsd %xmm3, %xmm7 + mulsd %xmm2, %xmm3 + xorpd %xmm5, %xmm5 + movl $16368, %eax + pinsrw $3, %eax, %xmm5 + addsd %xmm1, %xmm0 + movl $17184, %ecx + subl %edx, %ecx + subl $16256, %edx + orl %edx, %ecx + jl .L_2TAG_PACKET_2.0.2 + mulsd %xmm4, %xmm0 + subsd %xmm5, %xmm3 + addsd %xmm7, %xmm0 + addsd %xmm3, %xmm0 +.L_2TAG_PACKET_3.0.2: + jmp ..B1.5 +.L_2TAG_PACKET_2.0.2: + cmpl $0, %edx + jl .L_2TAG_PACKET_4.0.2 + mulsd %xmm4, %xmm0 + subsd %xmm5, %xmm7 + addsd %xmm7, %xmm0 + addsd %xmm3, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_4.0.2: + mulsd %xmm4, %xmm0 + addsd %xmm7, %xmm0 + addsd %xmm3, %xmm0 + subsd %xmm5, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_1.0.2: + movl 36(%rsp), %ecx + addsd %xmm0, %xmm1 + unpckhpd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cmpl $0, %ecx + jl .L_2TAG_PACKET_5.0.2 + fstcw (%rsp) + movw (%rsp), %dx + orw $768, %dx + movw %dx, 4(%rsp) + fldcw 4(%rsp) + movl %eax, %edx + sarl $1, %eax + subl %eax, %edx + movdqa emask(%rip), %xmm6 + pandn %xmm2, %xmm6 + addl $1023, %eax + movd %eax, %xmm3 + psllq $52, %xmm3 + orpd %xmm3, %xmm6 + mulsd %xmm3, %xmm4 + movsd %xmm0, 16(%rsp) + fldl 16(%rsp) + movsd %xmm6, 24(%rsp) + fldl 24(%rsp) + movsd %xmm4, 16(%rsp) + fldl 16(%rsp) + addl $1023, %edx + movd %edx, %xmm4 + psllq $52, %xmm4 + faddp %st, %st(1) + fmul %st, %st(1) + faddp %st, %st(1) + movsd %xmm4, 24(%rsp) + fldl 24(%rsp) + fmulp %st, %st(1) + fstpl 16(%rsp) + movsd 16(%rsp), %xmm0 + fldcw (%rsp) + pextrw $3, %xmm0, %ecx + andl $32752, %ecx + cmpl $32752, %ecx + jae .L_2TAG_PACKET_6.0.2 + jmp ..B1.5 + cmpl $-2147483648, %ecx + jb .L_2TAG_PACKET_6.0.2 + jmp ..B1.5 +.L_2TAG_PACKET_6.0.2: + movl $41, 8(%rsp) + jmp .L_2TAG_PACKET_7.0.2 +.L_2TAG_PACKET_8.0.2: + cmpl $2146435072, %eax + jae .L_2TAG_PACKET_9.0.2 + movsd XMAX(%rip), %xmm0 + mulsd %xmm0, %xmm0 + movl $41, 8(%rsp) + jmp .L_2TAG_PACKET_7.0.2 +.L_2TAG_PACKET_9.0.2: + movl 36(%rsp), %eax + movl 32(%rsp), %edx + movl %eax, %ecx + andl $2147483647, %eax + cmpl $2146435072, %eax + ja .L_2TAG_PACKET_10.0.2 + cmpl $0, %edx + jne .L_2TAG_PACKET_10.0.2 + cmpl $0, %ecx + jl .L_2TAG_PACKET_11.0.2 + movq INF(%rip), %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_11.0.2: + jmp .L_2TAG_PACKET_5.0.2 +.L_2TAG_PACKET_10.0.2: + movsd 32(%rsp), %xmm0 + addsd %xmm0, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_12.0.2: + addl $16304, %eax + cmpl $15504, %eax + jb .L_2TAG_PACKET_13.0.2 + movapd cvl(%rip), %xmm2 + pshufd $68, %xmm0, %xmm1 + movapd 16+cvl(%rip), %xmm3 + movapd 32+cvl(%rip), %xmm4 + movq 48+cvl(%rip), %xmm5 + mulsd %xmm1, %xmm1 + xorpd %xmm6, %xmm6 + movl $16352, %eax + pinsrw $3, %eax, %xmm6 + mulpd %xmm0, %xmm2 + xorpd %xmm7, %xmm7 + movl $16368, %edx + pinsrw $3, %edx, %xmm7 + addpd %xmm3, %xmm2 + mulsd %xmm1, %xmm5 + pshufd $228, %xmm1, %xmm3 + mulpd %xmm1, %xmm1 + mulsd %xmm0, %xmm6 + mulpd %xmm0, %xmm2 + addpd %xmm4, %xmm2 + movq %xmm7, %xmm4 + addsd %xmm6, %xmm7 + mulpd %xmm3, %xmm1 + psrlq $27, %xmm7 + psllq $27, %xmm7 + movq HIGHMASK(%rip), %xmm3 + subsd %xmm7, %xmm4 + mulpd %xmm1, %xmm2 + addsd %xmm4, %xmm6 + pshufd $238, %xmm2, %xmm1 + addsd %xmm2, %xmm6 + andpd %xmm0, %xmm3 + movq %xmm0, %xmm4 + addsd %xmm6, %xmm1 + subsd %xmm3, %xmm0 + addsd %xmm5, %xmm1 + mulsd %xmm7, %xmm3 + mulsd %xmm7, %xmm0 + mulsd %xmm1, %xmm4 + addsd %xmm4, %xmm0 + addsd %xmm3, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_13.0.2: + cmpl $16, %eax + jae .L_2TAG_PACKET_3.0.2 + movq %xmm0, %xmm2 + movd %xmm0, %eax + psrlq $31, %xmm2 + movd %xmm2, %ecx + orl %ecx, %eax + je .L_2TAG_PACKET_3.0.2 + movl $16, %edx + xorpd %xmm1, %xmm1 + pinsrw $3, %edx, %xmm1 + mulsd %xmm1, %xmm1 + movl $42, 8(%rsp) + jmp .L_2TAG_PACKET_7.0.2 +.L_2TAG_PACKET_0.0.2: + cmpl $0, %eax + jl .L_2TAG_PACKET_12.0.2 + movl 36(%rsp), %eax + cmpl $1083179008, %eax + jge .L_2TAG_PACKET_8.0.2 + cmpl $-1048576, %eax + jae .L_2TAG_PACKET_9.0.2 +.L_2TAG_PACKET_5.0.2: + xorpd %xmm0, %xmm0 + movl $49136, %eax + pinsrw $3, %eax, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_7.0.2: + movq %xmm0, 40(%rsp) +..B1.3: + movq 40(%rsp), %xmm0 +.L_2TAG_PACKET_14.0.2: +..B1.5: + addq $56, %rsp +..___tag_value_expm1.4: + ret +..___tag_value_expm1.5: +END(expm1) +# -- End expm1 + .section .rodata, "a" + .align 16 + .align 16 +cv: + .long 1697350398 + .long 1079448903 + .long 1697350398 + .long 1079448903 + .long 4277796864 + .long 1065758274 + .long 4277796864 + .long 1065758274 + .long 3164486458 + .long 1025308570 + .long 3164486458 + .long 1025308570 + .long 1963358694 + .long 1065423121 + .long 1431655765 + .long 1069897045 + .long 1431655765 + .long 1067799893 + .long 0 + .long 1071644672 + .long 381774871 + .long 1062650220 + .long 381774871 + .long 1062650220 + .type cv,@object + .size cv,96 + .align 16 +Shifter: + .long 0 + .long 1127743488 + .long 0 + .long 1127743488 + .type Shifter,@object + .size Shifter,16 + .align 16 +Tbl_addr: + .long 0 + .long 0 + .long 0 + .long 0 + .long 1000070955 + .long 1042145304 + .long 1040187392 + .long 11418 + .long 988267849 + .long 1039500660 + .long 3539992576 + .long 22960 + .long 36755401 + .long 1042114290 + .long 402653184 + .long 34629 + .long 3634769483 + .long 1042178627 + .long 1820327936 + .long 46424 + .long 2155991225 + .long 1041560680 + .long 847249408 + .long 58348 + .long 2766913307 + .long 1039293264 + .long 3489660928 + .long 70401 + .long 3651174602 + .long 1040488175 + .long 2927624192 + .long 82586 + .long 3073892131 + .long 1042240606 + .long 1006632960 + .long 94904 + .long 1328391742 + .long 1042019037 + .long 3942645760 + .long 107355 + .long 2650893825 + .long 1041903210 + .long 822083584 + .long 119943 + .long 2397289153 + .long 1041802037 + .long 2281701376 + .long 132667 + .long 430997175 + .long 1042110606 + .long 1845493760 + .long 145530 + .long 1230936525 + .long 1041801015 + .long 1702887424 + .long 158533 + .long 740675935 + .long 1040178913 + .long 4110417920 + .long 171677 + .long 3489810261 + .long 1041825986 + .long 2793406464 + .long 184965 + .long 2532600530 + .long 1040767882 + .long 167772160 + .long 198398 + .long 3542557060 + .long 1041827263 + .long 2986344448 + .long 211976 + .long 1401563777 + .long 1041061093 + .long 922746880 + .long 225703 + .long 3129406026 + .long 1041852413 + .long 880803840 + .long 239579 + .long 900993572 + .long 1039283234 + .long 1275068416 + .long 253606 + .long 2115029358 + .long 1042140042 + .long 562036736 + .long 267786 + .long 1086643152 + .long 1041785419 + .long 1610612736 + .long 282120 + .long 82864366 + .long 1041256244 + .long 3045064704 + .long 296610 + .long 2392968152 + .long 1040913683 + .long 3573547008 + .long 311258 + .long 2905856183 + .long 1040002214 + .long 1988100096 + .long 326066 + .long 3742008261 + .long 1040011137 + .long 1451229184 + .long 341035 + .long 863393794 + .long 1040880621 + .long 914358272 + .long 356167 + .long 1446136837 + .long 1041372426 + .long 3707764736 + .long 371463 + .long 927855201 + .long 1040617636 + .long 360710144 + .long 386927 + .long 1492679939 + .long 1041050306 + .long 2952790016 + .long 402558 + .long 608827001 + .long 1041582217 + .long 2181038080 + .long 418360 + .long 606260204 + .long 1042271987 + .long 1711276032 + .long 434334 + .long 3163044019 + .long 1041843851 + .long 1006632960 + .long 450482 + .long 4148747325 + .long 1041962972 + .long 3900702720 + .long 466805 + .long 802924201 + .long 1041275378 + .long 1442840576 + .long 483307 + .long 3052749833 + .long 1041940577 + .long 1937768448 + .long 499988 + .long 2216116399 + .long 1041486744 + .long 914358272 + .long 516851 + .long 2729697836 + .long 1041445764 + .long 2566914048 + .long 533897 + .long 540608356 + .long 1041310907 + .long 2600468480 + .long 551129 + .long 2916344493 + .long 1040535661 + .long 1107296256 + .long 568549 + .long 731391814 + .long 1039497014 + .long 2566914048 + .long 586158 + .long 1024722704 + .long 1041461625 + .long 2961178624 + .long 603959 + .long 3806831748 + .long 1041732499 + .long 2675965952 + .long 621954 + .long 238953304 + .long 1040316488 + .long 2189426688 + .long 640145 + .long 749123235 + .long 1041725785 + .long 2063597568 + .long 658534 + .long 1168187977 + .long 1041175214 + .long 2986344448 + .long 677123 + .long 3506096399 + .long 1042186095 + .long 1426063360 + .long 695915 + .long 1470221620 + .long 1041675499 + .long 2566914048 + .long 714911 + .long 3182425146 + .long 1041483134 + .long 3087007744 + .long 734114 + .long 3131698208 + .long 1042208657 + .long 4068474880 + .long 753526 + .long 2300504125 + .long 1041428596 + .long 2415919104 + .long 773150 + .long 2290297931 + .long 1037388400 + .long 3716153344 + .long 792987 + .long 3532148223 + .long 1041626194 + .long 771751936 + .long 813041 + .long 1161884404 + .long 1042015258 + .long 3699376128 + .long 833312 + .long 876383176 + .long 1037968878 + .long 1241513984 + .long 853805 + .long 3379986796 + .long 1042213153 + .long 3699376128 + .long 874520 + .long 1545797737 + .long 1041681569 + .long 58720256 + .long 895462 + .long 2925146801 + .long 1042212567 + .long 855638016 + .long 916631 + .long 1316627971 + .long 1038516204 + .long 3883925504 + .long 938030 + .long 3267869137 + .long 1040337004 + .long 2726297600 + .long 959663 + .long 3720868999 + .long 1041782409 + .long 3992977408 + .long 981531 + .long 433316142 + .long 1041994064 + .long 1526726656 + .long 1003638 + .long 781232103 + .long 1040093400 + .long 2172649472 + .long 1025985 + .type Tbl_addr,@object + .size Tbl_addr,1024 + .align 16 +mmask: + .long 4294967232 + .long 0 + .long 4294967232 + .long 0 + .type mmask,@object + .size mmask,16 + .align 16 +bias: + .long 65472 + .long 0 + .long 65472 + .long 0 + .type bias,@object + .size bias,16 + .align 16 +emask: + .long 0 + .long 4293918720 + .long 0 + .long 4293918720 + .type emask,@object + .size emask,16 + .align 16 +cvl: + .long 2773927732 + .long 1053236707 + .long 381774871 + .long 1062650220 + .long 379653899 + .long 1056571845 + .long 286331153 + .long 1065423121 + .long 436314138 + .long 1059717536 + .long 1431655765 + .long 1067799893 + .long 1431655765 + .long 1069897045 + .long 0 + .long 1071644672 + .type cvl,@object + .size cvl,64 + .align 8 +XMAX: + .long 4294967295 + .long 2146435071 + .type XMAX,@object + .size XMAX,8 + .align 8 +INF: + .long 0 + .long 2146435072 + .type INF,@object + .size INF,8 + .align 8 +HIGHMASK: + .long 4227858432 + .long 4294967295 + .type HIGHMASK,@object + .size HIGHMASK,8 + .data + .section .note.GNU-stack, "" +// -- Begin DWARF2 SEGMENT .eh_frame + .section .eh_frame,"a",@progbits +.eh_frame_seg: + .align 1 + .4byte 0x00000014 + .8byte 0x00527a0100000000 + .8byte 0x08070c1b01107801 + .4byte 0x00000190 + .4byte 0x0000001c + .4byte 0x0000001c + .4byte ..___tag_value_expm1.1-. + .4byte ..___tag_value_expm1.5-..___tag_value_expm1.1 + .2byte 0x0400 + .4byte ..___tag_value_expm1.3-..___tag_value_expm1.1 + .2byte 0x400e + .byte 0x04 + .4byte ..___tag_value_expm1.4-..___tag_value_expm1.3 + .2byte 0x080e + .byte 0x00 +# End |