diff options
Diffstat (limited to 'libm/x86_64/e_exp.S')
-rw-r--r-- | libm/x86_64/e_exp.S | 636 |
1 files changed, 636 insertions, 0 deletions
diff --git a/libm/x86_64/e_exp.S b/libm/x86_64/e_exp.S new file mode 100644 index 0000000..6882dfc --- /dev/null +++ b/libm/x86_64/e_exp.S @@ -0,0 +1,636 @@ +/* +Copyright (c) 2014, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// Description: +// Let K = 64 (table size). +// x x/log(2) n +// e = 2 = 2 * T[j] * (1 + P(y)) +// where +// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] +// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] +// j/K +// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). +// +// P(y) is a minimax polynomial approximation of exp(x)-1 +// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). +// +// To avoid problems with arithmetic overflow and underflow, +// n n1 n2 +// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] +// where BIAS is a value of exponent bias. +// +// Special cases: +// exp(NaN) = NaN +// exp(+INF) = +INF +// exp(-INF) = 0 +// exp(x) = 1 for subnormals +// for finite argument, only exp(0)=1 is exact +// For IEEE double +// if x > 709.782712893383973096 then exp(x) overflow +// if x < -745.133219101941108420 then exp(x) underflow +// +/******************************************************************************/ + +#include <private/bionic_asm.h> +# -- Begin exp +ENTRY(exp) +# parameter 1: %xmm0 +..B1.1: +..___tag_value_exp.1: + subq $24, %rsp +..___tag_value_exp.3: + movsd %xmm0, 8(%rsp) +..B1.2: + unpcklpd %xmm0, %xmm0 + movapd cv(%rip), %xmm1 + movapd Shifter(%rip), %xmm6 + movapd 16+cv(%rip), %xmm2 + movapd 32+cv(%rip), %xmm3 + pextrw $3, %xmm0, %eax + andl $32767, %eax + movl $16527, %edx + subl %eax, %edx + subl $15504, %eax + orl %eax, %edx + cmpl $-2147483648, %edx + jae .L_2TAG_PACKET_0.0.2 + mulpd %xmm0, %xmm1 + addpd %xmm6, %xmm1 + movapd %xmm1, %xmm7 + subpd %xmm6, %xmm1 + mulpd %xmm1, %xmm2 + movapd 64+cv(%rip), %xmm4 + mulpd %xmm1, %xmm3 + movapd 80+cv(%rip), %xmm5 + subpd %xmm2, %xmm0 + movd %xmm7, %eax + movl %eax, %ecx + andl $63, %ecx + shll $4, %ecx + sarl $6, %eax + movl %eax, %edx + movdqa mmask(%rip), %xmm6 + pand %xmm6, %xmm7 + movdqa bias(%rip), %xmm6 + paddq %xmm6, %xmm7 + psllq $46, %xmm7 + subpd %xmm3, %xmm0 + lea Tbl_addr(%rip), %r8 + movapd (%rcx,%r8), %xmm2 + mulpd %xmm0, %xmm4 + movapd %xmm0, %xmm6 + movapd %xmm0, %xmm1 + mulpd %xmm6, %xmm6 + mulpd %xmm6, %xmm0 + addpd %xmm4, %xmm5 + mulsd %xmm6, %xmm0 + mulpd 48+cv(%rip), %xmm6 + addsd %xmm2, %xmm1 + unpckhpd %xmm2, %xmm2 + mulpd %xmm5, %xmm0 + addsd %xmm0, %xmm1 + orpd %xmm7, %xmm2 + unpckhpd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + addsd %xmm6, %xmm0 + addl $894, %edx + cmpl $1916, %edx + ja .L_2TAG_PACKET_1.0.2 + mulsd %xmm2, %xmm0 + addsd %xmm2, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_1.0.2: + xorpd %xmm3, %xmm3 + movapd ALLONES(%rip), %xmm4 + movl $-1022, %edx + subl %eax, %edx + movd %edx, %xmm5 + psllq %xmm5, %xmm4 + movl %eax, %ecx + sarl $1, %eax + pinsrw $3, %eax, %xmm3 + movapd ebias(%rip), %xmm6 + psllq $4, %xmm3 + psubd %xmm3, %xmm2 + mulsd %xmm2, %xmm0 + cmpl $52, %edx + jg .L_2TAG_PACKET_2.0.2 + andpd %xmm2, %xmm4 + paddd %xmm6, %xmm3 + subsd %xmm4, %xmm2 + addsd %xmm2, %xmm0 + cmpl $1023, %ecx + jge .L_2TAG_PACKET_3.0.2 + pextrw $3, %xmm0, %ecx + andl $32768, %ecx + orl %ecx, %edx + cmpl $0, %edx + je .L_2TAG_PACKET_4.0.2 + movapd %xmm0, %xmm6 + addsd %xmm4, %xmm0 + mulsd %xmm3, %xmm0 + pextrw $3, %xmm0, %ecx + andl $32752, %ecx + cmpl $0, %ecx + je .L_2TAG_PACKET_5.0.2 + jmp ..B1.5 +.L_2TAG_PACKET_5.0.2: + mulsd %xmm3, %xmm6 + mulsd %xmm3, %xmm4 + movq %xmm6, %xmm0 + pxor %xmm4, %xmm6 + psrad $31, %xmm6 + pshufd $85, %xmm6, %xmm6 + psllq $1, %xmm0 + psrlq $1, %xmm0 + pxor %xmm6, %xmm0 + psrlq $63, %xmm6 + paddq %xmm6, %xmm0 + paddq %xmm4, %xmm0 + movl $15, (%rsp) + jmp .L_2TAG_PACKET_6.0.2 +.L_2TAG_PACKET_4.0.2: + addsd %xmm4, %xmm0 + mulsd %xmm3, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_3.0.2: + addsd %xmm4, %xmm0 + mulsd %xmm3, %xmm0 + pextrw $3, %xmm0, %ecx + andl $32752, %ecx + cmpl $32752, %ecx + jnb .L_2TAG_PACKET_7.0.2 + jmp ..B1.5 +.L_2TAG_PACKET_2.0.2: + paddd %xmm6, %xmm3 + addpd %xmm2, %xmm0 + mulsd %xmm3, %xmm0 + movl $15, (%rsp) + jmp .L_2TAG_PACKET_6.0.2 +.L_2TAG_PACKET_8.0.2: + cmpl $2146435072, %eax + jae .L_2TAG_PACKET_9.0.2 + movl 12(%rsp), %eax + cmpl $-2147483648, %eax + jae .L_2TAG_PACKET_10.0.2 + movsd XMAX(%rip), %xmm0 + mulsd %xmm0, %xmm0 +.L_2TAG_PACKET_7.0.2: + movl $14, (%rsp) + jmp .L_2TAG_PACKET_6.0.2 +.L_2TAG_PACKET_10.0.2: + movsd XMIN(%rip), %xmm0 + mulsd %xmm0, %xmm0 + movl $15, (%rsp) + jmp .L_2TAG_PACKET_6.0.2 +.L_2TAG_PACKET_9.0.2: + movl 8(%rsp), %edx + cmpl $2146435072, %eax + ja .L_2TAG_PACKET_11.0.2 + cmpl $0, %edx + jne .L_2TAG_PACKET_11.0.2 + movl 12(%rsp), %eax + cmpl $2146435072, %eax + jne .L_2TAG_PACKET_12.0.2 + movsd INF(%rip), %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_12.0.2: + movsd ZERO(%rip), %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_11.0.2: + movsd 8(%rsp), %xmm0 + addsd %xmm0, %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_0.0.2: + movl 12(%rsp), %eax + andl $2147483647, %eax + cmpl $1083179008, %eax + jae .L_2TAG_PACKET_8.0.2 + movsd 8(%rsp), %xmm0 + addsd ONE_val(%rip), %xmm0 + jmp ..B1.5 +.L_2TAG_PACKET_6.0.2: + movq %xmm0, 16(%rsp) +..B1.3: + movq 16(%rsp), %xmm0 +.L_2TAG_PACKET_13.0.2: +..B1.5: + addq $24, %rsp +..___tag_value_exp.4: + ret +..___tag_value_exp.5: +END(exp) +# -- End exp + .section .rodata, "a" + .align 16 + .align 16 +cv: + .long 1697350398 + .long 1079448903 + .long 1697350398 + .long 1079448903 + .long 4277796864 + .long 1065758274 + .long 4277796864 + .long 1065758274 + .long 3164486458 + .long 1025308570 + .long 3164486458 + .long 1025308570 + .long 4294967294 + .long 1071644671 + .long 4294967294 + .long 1071644671 + .long 3811088480 + .long 1062650204 + .long 1432067621 + .long 1067799893 + .long 3230715663 + .long 1065423125 + .long 1431604129 + .long 1069897045 + .type cv,@object + .size cv,96 + .align 16 +Shifter: + .long 0 + .long 1127743488 + .long 0 + .long 1127743488 + .type Shifter,@object + .size Shifter,16 + .align 16 +mmask: + .long 4294967232 + .long 0 + .long 4294967232 + .long 0 + .type mmask,@object + .size mmask,16 + .align 16 +bias: + .long 65472 + .long 0 + .long 65472 + .long 0 + .type bias,@object + .size bias,16 + .align 16 +Tbl_addr: + .long 0 + .long 0 + .long 0 + .long 0 + .long 235107661 + .long 1018002367 + .long 1048019040 + .long 11418 + .long 896005651 + .long 1015861842 + .long 3541402996 + .long 22960 + .long 1642514529 + .long 1012987726 + .long 410360776 + .long 34629 + .long 1568897900 + .long 1016568486 + .long 1828292879 + .long 46424 + .long 1882168529 + .long 1010744893 + .long 852742562 + .long 58348 + .long 509852888 + .long 1017336174 + .long 3490863952 + .long 70401 + .long 653277307 + .long 1017431380 + .long 2930322911 + .long 82586 + .long 1649557430 + .long 1017729363 + .long 1014845818 + .long 94904 + .long 1058231231 + .long 1015777676 + .long 3949972341 + .long 107355 + .long 1044000607 + .long 1016786167 + .long 828946858 + .long 119943 + .long 1151779725 + .long 1015705409 + .long 2288159958 + .long 132667 + .long 3819481236 + .long 1016499965 + .long 1853186616 + .long 145530 + .long 2552227826 + .long 1015039787 + .long 1709341917 + .long 158533 + .long 1829350193 + .long 1015216097 + .long 4112506593 + .long 171677 + .long 1913391795 + .long 1015756674 + .long 2799960843 + .long 184965 + .long 1303423926 + .long 1015238005 + .long 171030293 + .long 198398 + .long 1574172746 + .long 1016061241 + .long 2992903935 + .long 211976 + .long 3424156969 + .long 1017196428 + .long 926591434 + .long 225703 + .long 1938513547 + .long 1017631273 + .long 887463926 + .long 239579 + .long 2804567149 + .long 1015390024 + .long 1276261410 + .long 253606 + .long 631083525 + .long 1017690182 + .long 569847337 + .long 267786 + .long 1623370770 + .long 1011049453 + .long 1617004845 + .long 282120 + .long 3667985273 + .long 1013894369 + .long 3049340112 + .long 296610 + .long 3145379760 + .long 1014403278 + .long 3577096743 + .long 311258 + .long 2603100681 + .long 1017152460 + .long 1990012070 + .long 326066 + .long 3249202951 + .long 1017448880 + .long 1453150081 + .long 341035 + .long 419288974 + .long 1016280325 + .long 917841882 + .long 356167 + .long 3793507337 + .long 1016095713 + .long 3712504873 + .long 371463 + .long 728023093 + .long 1016345318 + .long 363667784 + .long 386927 + .long 2582678538 + .long 1017123460 + .long 2956612996 + .long 402558 + .long 7592966 + .long 1016721543 + .long 2186617380 + .long 418360 + .long 228611441 + .long 1016696141 + .long 1719614412 + .long 434334 + .long 2261665670 + .long 1017457593 + .long 1013258798 + .long 450482 + .long 544148907 + .long 1017323666 + .long 3907805043 + .long 466805 + .long 2383914918 + .long 1017143586 + .long 1447192520 + .long 483307 + .long 1176412038 + .long 1017267372 + .long 1944781190 + .long 499988 + .long 2882956373 + .long 1013312481 + .long 919555682 + .long 516851 + .long 3154077648 + .long 1016528543 + .long 2571947538 + .long 533897 + .long 348651999 + .long 1016405780 + .long 2604962540 + .long 551129 + .long 3253791412 + .long 1015920431 + .long 1110089947 + .long 568549 + .long 1509121860 + .long 1014756995 + .long 2568320822 + .long 586158 + .long 2617649212 + .long 1017340090 + .long 2966275556 + .long 603959 + .long 553214634 + .long 1016457425 + .long 2682146383 + .long 621954 + .long 730975783 + .long 1014083580 + .long 2191782032 + .long 640145 + .long 1486499517 + .long 1016818996 + .long 2069751140 + .long 658534 + .long 2595788928 + .long 1016407932 + .long 2990417244 + .long 677123 + .long 1853053619 + .long 1015310724 + .long 1434058175 + .long 695915 + .long 2462790535 + .long 1015814775 + .long 2572866477 + .long 714911 + .long 3693944214 + .long 1017259110 + .long 3092190714 + .long 734114 + .long 2979333550 + .long 1017188654 + .long 4076559942 + .long 753526 + .long 174054861 + .long 1014300631 + .long 2420883922 + .long 773150 + .long 816778419 + .long 1014197934 + .long 3716502172 + .long 792987 + .long 3507050924 + .long 1015341199 + .long 777507147 + .long 813041 + .long 1821514088 + .long 1013410604 + .long 3706687593 + .long 833312 + .long 920623539 + .long 1016295433 + .long 1242007931 + .long 853805 + .long 2789017511 + .long 1014276997 + .long 3707479175 + .long 874520 + .long 3586233004 + .long 1015962192 + .long 64696965 + .long 895462 + .long 474650514 + .long 1016642419 + .long 863738718 + .long 916631 + .long 1614448851 + .long 1014281732 + .long 3884662774 + .long 938030 + .long 2450082086 + .long 1016164135 + .long 2728693977 + .long 959663 + .long 1101668360 + .long 1015989180 + .long 3999357479 + .long 981531 + .long 835814894 + .long 1015702697 + .long 1533953344 + .long 1003638 + .long 1301400989 + .long 1014466875 + .long 2174652632 + .long 1025985 + .type Tbl_addr,@object + .size Tbl_addr,1024 + .align 16 +ALLONES: + .long 4294967295 + .long 4294967295 + .long 4294967295 + .long 4294967295 + .type ALLONES,@object + .size ALLONES,16 + .align 16 +ebias: + .long 0 + .long 1072693248 + .long 0 + .long 1072693248 + .type ebias,@object + .size ebias,16 + .align 4 +XMAX: + .long 4294967295 + .long 2146435071 + .type XMAX,@object + .size XMAX,8 + .align 4 +XMIN: + .long 0 + .long 1048576 + .type XMIN,@object + .size XMIN,8 + .align 4 +INF: + .long 0 + .long 2146435072 + .type INF,@object + .size INF,8 + .align 4 +ZERO: + .long 0 + .long 0 + .type ZERO,@object + .size ZERO,8 + .align 4 +ONE_val: + .long 0 + .long 1072693248 + .type ONE_val,@object + .size ONE_val,8 + .data + .section .note.GNU-stack, "" +// -- Begin DWARF2 SEGMENT .eh_frame + .section .eh_frame,"a",@progbits +.eh_frame_seg: + .align 1 + .4byte 0x00000014 + .8byte 0x00527a0100000000 + .8byte 0x08070c1b01107801 + .4byte 0x00000190 + .4byte 0x0000001c + .4byte 0x0000001c + .4byte ..___tag_value_exp.1-. + .4byte ..___tag_value_exp.5-..___tag_value_exp.1 + .2byte 0x0400 + .4byte ..___tag_value_exp.3-..___tag_value_exp.1 + .2byte 0x200e + .byte 0x04 + .4byte ..___tag_value_exp.4-..___tag_value_exp.3 + .2byte 0x080e + .byte 0x00 +# End |