diff options
Diffstat (limited to 'libm/x86_64/s_cbrt.S')
-rw-r--r-- | libm/x86_64/s_cbrt.S | 754 |
1 files changed, 754 insertions, 0 deletions
diff --git a/libm/x86_64/s_cbrt.S b/libm/x86_64/s_cbrt.S new file mode 100644 index 0000000..4aa4373 --- /dev/null +++ b/libm/x86_64/s_cbrt.S @@ -0,0 +1,754 @@ +/* +Copyright (c) 2014, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2. +// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5], +// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision +// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5] +// (T stores the high 53 bits, D stores the low order bits) +// Result=2^k*T+(2^k*T*r)*P+2^k*D +// where P=p1+p2*r+..+p8*r^7 +// +// Special cases: +// cbrt(NaN) = quiet NaN, and raise invalid exception +// cbrt(INF) = that INF +// cbrt(+/-0) = +/-0 +// +/******************************************************************************/ + +#include <private/bionic_asm.h> +# -- Begin cbrt +ENTRY(cbrt) +# parameter 1: %xmm0 +..B1.1: +..___tag_value_cbrt.1: + subq $24, %rsp +..___tag_value_cbrt.3: + movsd %xmm0, (%rsp) +..B1.2: + movq %xmm0, %xmm7 + movl $524032, %edx + movsd EXP_MSK3(%rip), %xmm5 + movsd EXP_MSK2(%rip), %xmm3 + psrlq $44, %xmm7 + pextrw $0, %xmm7, %ecx + movd %xmm7, %eax + movsd EXP_MASK(%rip), %xmm1 + movsd SIG_MASK(%rip), %xmm2 + andl $248, %ecx + lea rcp_table(%rip), %r8 + movsd (%rcx,%r8), %xmm4 + movq %rax, %r9 + andl %eax, %edx + cmpl $0, %edx + je .L_2TAG_PACKET_0.0.1 + cmpl $524032, %edx + je .L_2TAG_PACKET_1.0.1 + shrl $8, %edx + shrq $8, %r9 + andpd %xmm0, %xmm2 + andpd %xmm5, %xmm0 + orpd %xmm2, %xmm3 + orpd %xmm0, %xmm1 + movapd coeff_table(%rip), %xmm5 + movl $5462, %eax + movapd 16+coeff_table(%rip), %xmm6 + mull %edx + movq %r9, %rdx + andq $2047, %r9 + shrl $14, %eax + andl $2048, %edx + subq %rax, %r9 + subq %rax, %r9 + subq %rax, %r9 + shlq $8, %r9 + addl $682, %eax + orl %edx, %eax + movd %eax, %xmm7 + addq %r9, %rcx + psllq $52, %xmm7 +.L_2TAG_PACKET_2.0.1: + movapd 32+coeff_table(%rip), %xmm2 + movapd 48+coeff_table(%rip), %xmm0 + subsd %xmm3, %xmm1 + movq %xmm7, %xmm3 + lea cbrt_table(%rip), %r8 + mulsd (%rcx,%r8), %xmm7 + mulsd %xmm4, %xmm1 + lea D_table(%rip), %r8 + mulsd (%rcx,%r8), %xmm3 + movapd %xmm1, %xmm4 + unpcklpd %xmm1, %xmm1 + mulpd %xmm1, %xmm5 + mulpd %xmm1, %xmm6 + mulpd %xmm1, %xmm1 + addpd %xmm5, %xmm2 + addpd %xmm6, %xmm0 + mulpd %xmm1, %xmm2 + mulpd %xmm1, %xmm1 + mulsd %xmm7, %xmm4 + addpd %xmm2, %xmm0 + mulsd %xmm0, %xmm1 + unpckhpd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + mulsd %xmm4, %xmm0 + addsd %xmm3, %xmm0 + addsd %xmm7, %xmm0 + jmp ..B1.4 +.L_2TAG_PACKET_0.0.1: + mulsd SCALE63(%rip), %xmm0 + movq %xmm0, %xmm7 + movl $524032, %edx + psrlq $44, %xmm7 + pextrw $0, %xmm7, %ecx + movd %xmm7, %eax + andl $248, %ecx + lea rcp_table(%rip), %r8 + movsd (%rcx,%r8), %xmm4 + movq %rax, %r9 + andl %eax, %edx + shrl $8, %edx + shrq $8, %r9 + cmpl $0, %edx + je .L_2TAG_PACKET_3.0.1 + andpd %xmm0, %xmm2 + andpd %xmm5, %xmm0 + orpd %xmm2, %xmm3 + orpd %xmm0, %xmm1 + movapd coeff_table(%rip), %xmm5 + movl $5462, %eax + movapd 16+coeff_table(%rip), %xmm6 + mull %edx + movq %r9, %rdx + andq $2047, %r9 + shrl $14, %eax + andl $2048, %edx + subq %rax, %r9 + subq %rax, %r9 + subq %rax, %r9 + shlq $8, %r9 + addl $661, %eax + orl %edx, %eax + movd %eax, %xmm7 + addq %r9, %rcx + psllq $52, %xmm7 + jmp .L_2TAG_PACKET_2.0.1 +.L_2TAG_PACKET_3.0.1: + cmpq $0, %r9 + jne .L_2TAG_PACKET_4.0.1 + xorpd %xmm0, %xmm0 + jmp ..B1.4 +.L_2TAG_PACKET_4.0.1: + movsd ZERON(%rip), %xmm0 + jmp ..B1.4 +.L_2TAG_PACKET_1.0.1: + movl 4(%rsp), %eax + movl (%rsp), %edx + movl %eax, %ecx + andl $2147483647, %ecx + cmpl $2146435072, %ecx + ja .L_2TAG_PACKET_5.0.1 + cmpl $0, %edx + jne .L_2TAG_PACKET_5.0.1 + cmpl $2146435072, %eax + jne .L_2TAG_PACKET_6.0.1 + movsd INF(%rip), %xmm0 + jmp ..B1.4 +.L_2TAG_PACKET_6.0.1: + movsd NEG_INF(%rip), %xmm0 + jmp ..B1.4 +.L_2TAG_PACKET_5.0.1: + movsd (%rsp), %xmm0 + addsd %xmm0, %xmm0 + movq %xmm0, 8(%rsp) +.L_2TAG_PACKET_7.0.1: +..B1.4: + addq $24, %rsp +..___tag_value_cbrt.4: + ret +..___tag_value_cbrt.5: +END(cbrt) +# -- End cbrt + .section .rodata, "a" + .align 16 + .align 16 +coeff_table: + .long 1553778919 + .long 3213899486 + .long 3534952507 + .long 3215266280 + .long 1646371399 + .long 3214412045 + .long 477218588 + .long 3216798151 + .long 3582521621 + .long 1066628362 + .long 1007461464 + .long 1068473053 + .long 889629714 + .long 1067378449 + .long 1431655765 + .long 1070945621 + .type coeff_table,@object + .size coeff_table,64 + .align 4 +EXP_MSK3: + .long 4294967295 + .long 1048575 + .type EXP_MSK3,@object + .size EXP_MSK3,8 + .align 4 +EXP_MSK2: + .long 0 + .long 3220193280 + .type EXP_MSK2,@object + .size EXP_MSK2,8 + .align 4 +EXP_MASK: + .long 0 + .long 3220176896 + .type EXP_MASK,@object + .size EXP_MASK,8 + .align 4 +SIG_MASK: + .long 0 + .long 1032192 + .type SIG_MASK,@object + .size SIG_MASK,8 + .align 4 +rcp_table: + .long 528611360 + .long 3220144632 + .long 2884679527 + .long 3220082993 + .long 1991868891 + .long 3220024928 + .long 2298714891 + .long 3219970134 + .long 58835168 + .long 3219918343 + .long 3035110223 + .long 3219869313 + .long 1617585086 + .long 3219822831 + .long 2500867033 + .long 3219778702 + .long 4241943008 + .long 3219736752 + .long 258732970 + .long 3219696825 + .long 404232216 + .long 3219658776 + .long 2172167368 + .long 3219622476 + .long 1544257904 + .long 3219587808 + .long 377579543 + .long 3219554664 + .long 1616385542 + .long 3219522945 + .long 813783277 + .long 3219492562 + .long 3940743189 + .long 3219463431 + .long 2689777499 + .long 3219435478 + .long 1700977147 + .long 3219408632 + .long 3169102082 + .long 3219382828 + .long 327235604 + .long 3219358008 + .long 1244336319 + .long 3219334115 + .long 1300311200 + .long 3219311099 + .long 3095471925 + .long 3219288912 + .long 2166487928 + .long 3219267511 + .long 2913108253 + .long 3219246854 + .long 293672978 + .long 3219226904 + .long 288737297 + .long 3219207624 + .long 1810275472 + .long 3219188981 + .long 174592167 + .long 3219170945 + .long 3539053052 + .long 3219153485 + .long 2164392968 + .long 3219136576 + .type rcp_table,@object + .size rcp_table,256 + .align 4 +cbrt_table: + .long 572345495 + .long 1072698681 + .long 1998204467 + .long 1072709382 + .long 3861501553 + .long 1072719872 + .long 2268192434 + .long 1072730162 + .long 2981979308 + .long 1072740260 + .long 270859143 + .long 1072750176 + .long 2958651392 + .long 1072759916 + .long 313113243 + .long 1072769490 + .long 919449400 + .long 1072778903 + .long 2809328903 + .long 1072788162 + .long 2222981587 + .long 1072797274 + .long 2352530781 + .long 1072806244 + .long 594152517 + .long 1072815078 + .long 1555767199 + .long 1072823780 + .long 4282421314 + .long 1072832355 + .long 2355578597 + .long 1072840809 + .long 1162590619 + .long 1072849145 + .long 797864051 + .long 1072857367 + .long 431273680 + .long 1072865479 + .long 2669831148 + .long 1072873484 + .long 733477752 + .long 1072881387 + .long 4280220604 + .long 1072889189 + .long 801961634 + .long 1072896896 + .long 2915370760 + .long 1072904508 + .long 1159613482 + .long 1072912030 + .long 2689944798 + .long 1072919463 + .long 1248687822 + .long 1072926811 + .long 2967951030 + .long 1072934075 + .long 630170432 + .long 1072941259 + .long 3760898254 + .long 1072948363 + .long 0 + .long 1072955392 + .long 2370273294 + .long 1072962345 + .long 1261754802 + .long 1072972640 + .long 546334065 + .long 1072986123 + .long 1054893830 + .long 1072999340 + .long 1571187597 + .long 1073012304 + .long 1107975175 + .long 1073025027 + .long 3606909377 + .long 1073037519 + .long 1113616747 + .long 1073049792 + .long 4154744632 + .long 1073061853 + .long 3358931423 + .long 1073073713 + .long 4060702372 + .long 1073085379 + .long 747576176 + .long 1073096860 + .long 3023138255 + .long 1073108161 + .long 1419988548 + .long 1073119291 + .long 1914185305 + .long 1073130255 + .long 294389948 + .long 1073141060 + .long 3761802570 + .long 1073151710 + .long 978281566 + .long 1073162213 + .long 823148820 + .long 1073172572 + .long 2420954441 + .long 1073182792 + .long 3815449908 + .long 1073192878 + .long 2046058587 + .long 1073202835 + .long 1807524753 + .long 1073212666 + .long 2628681401 + .long 1073222375 + .long 3225667357 + .long 1073231966 + .long 1555307421 + .long 1073241443 + .long 3454043099 + .long 1073250808 + .long 1208137896 + .long 1073260066 + .long 3659916772 + .long 1073269218 + .long 1886261264 + .long 1073278269 + .long 3593647839 + .long 1073287220 + .long 3086012205 + .long 1073296075 + .long 2769796922 + .long 1073304836 + .long 888716057 + .long 1073317807 + .long 2201465623 + .long 1073334794 + .long 164369365 + .long 1073351447 + .long 3462666733 + .long 1073367780 + .long 2773905457 + .long 1073383810 + .long 1342879088 + .long 1073399550 + .long 2543933975 + .long 1073415012 + .long 1684477781 + .long 1073430209 + .long 3532178543 + .long 1073445151 + .long 1147747300 + .long 1073459850 + .long 1928031793 + .long 1073474314 + .long 2079717015 + .long 1073488553 + .long 4016765315 + .long 1073502575 + .long 3670431139 + .long 1073516389 + .long 3549227225 + .long 1073530002 + .long 11637607 + .long 1073543422 + .long 588220169 + .long 1073556654 + .long 2635407503 + .long 1073569705 + .long 2042029317 + .long 1073582582 + .long 1925128962 + .long 1073595290 + .long 4136375664 + .long 1073607834 + .long 759964600 + .long 1073620221 + .long 4257606771 + .long 1073632453 + .long 297278907 + .long 1073644538 + .long 3655053093 + .long 1073656477 + .long 2442253172 + .long 1073668277 + .long 1111876799 + .long 1073679941 + .long 3330973139 + .long 1073691472 + .long 3438879452 + .long 1073702875 + .long 3671565478 + .long 1073714153 + .long 1317849547 + .long 1073725310 + .long 1642364115 + .long 1073736348 + .type cbrt_table,@object + .size cbrt_table,768 + .align 4 +D_table: + .long 4050900474 + .long 1014427190 + .long 1157977860 + .long 1016444461 + .long 1374568199 + .long 1017271387 + .long 2809163288 + .long 1016882676 + .long 3742377377 + .long 1013168191 + .long 3101606597 + .long 1017541672 + .long 65224358 + .long 1017217597 + .long 2691591250 + .long 1017266643 + .long 4020758549 + .long 1017689313 + .long 1316310992 + .long 1018030788 + .long 1031537856 + .long 1014090882 + .long 3261395239 + .long 1016413641 + .long 886424999 + .long 1016313335 + .long 3114776834 + .long 1014195875 + .long 1681120620 + .long 1017825416 + .long 1329600273 + .long 1016625740 + .long 465474623 + .long 1017097119 + .long 4251633980 + .long 1017169077 + .long 1986990133 + .long 1017710645 + .long 752958613 + .long 1017159641 + .long 2216216792 + .long 1018020163 + .long 4282860129 + .long 1015924861 + .long 1557627859 + .long 1016039538 + .long 3889219754 + .long 1018086237 + .long 3684996408 + .long 1017353275 + .long 723532103 + .long 1017717141 + .long 2951149676 + .long 1012528470 + .long 831890937 + .long 1017830553 + .long 1031212645 + .long 1017387331 + .long 2741737450 + .long 1017604974 + .long 2863311531 + .long 1003776682 + .long 4276736099 + .long 1013153088 + .long 4111778382 + .long 1015673686 + .long 1728065769 + .long 1016413986 + .long 2708718031 + .long 1018078833 + .long 1069335005 + .long 1015291224 + .long 700037144 + .long 1016482032 + .long 2904566452 + .long 1017226861 + .long 4074156649 + .long 1017622651 + .long 25019565 + .long 1015245366 + .long 3601952608 + .long 1015771755 + .long 3267129373 + .long 1017904664 + .long 503203103 + .long 1014921629 + .long 2122011730 + .long 1018027866 + .long 3927295461 + .long 1014189456 + .long 2790625147 + .long 1016024251 + .long 1330460186 + .long 1016940346 + .long 4033568463 + .long 1015538390 + .long 3695818227 + .long 1017509621 + .long 257573361 + .long 1017208868 + .long 3227697852 + .long 1017337964 + .long 234118548 + .long 1017169577 + .long 4009025803 + .long 1017278524 + .long 1948343394 + .long 1017749310 + .long 678398162 + .long 1018144239 + .long 3083864863 + .long 1016669086 + .long 2415453452 + .long 1017890370 + .long 175467344 + .long 1017330033 + .long 3197359580 + .long 1010339928 + .long 2071276951 + .long 1015941358 + .long 268372543 + .long 1016737773 + .long 938132959 + .long 1017389108 + .long 1816750559 + .long 1017337448 + .long 4119203749 + .long 1017152174 + .long 2578653878 + .long 1013108497 + .long 2470331096 + .long 1014678606 + .long 123855735 + .long 1016553320 + .long 1265650889 + .long 1014782687 + .long 3414398172 + .long 1017182638 + .long 1040773369 + .long 1016158401 + .long 3483628886 + .long 1016886550 + .long 4140499405 + .long 1016191425 + .long 3893477850 + .long 1016964495 + .long 3935319771 + .long 1009634717 + .long 2978982660 + .long 1015027112 + .long 2452709923 + .long 1017990229 + .long 3190365712 + .long 1015835149 + .long 4237588139 + .long 1015832925 + .long 2610678389 + .long 1017962711 + .long 2127316774 + .long 1017405770 + .long 824267502 + .long 1017959463 + .long 2165924042 + .long 1017912225 + .long 2774007076 + .long 1013257418 + .long 4123916326 + .long 1017582284 + .long 1976417958 + .long 1016959909 + .long 4092806412 + .long 1017711279 + .long 119251817 + .long 1015363631 + .long 3475418768 + .long 1017675415 + .long 1972580503 + .long 1015470684 + .long 815541017 + .long 1017517969 + .long 2429917451 + .long 1017397776 + .long 4062888482 + .long 1016749897 + .long 68284153 + .long 1017925678 + .long 2207779246 + .long 1016320298 + .long 1183466520 + .long 1017408657 + .long 143326427 + .long 1017060403 + .type D_table,@object + .size D_table,768 + .align 4 +SCALE63: + .long 0 + .long 1138753536 + .type SCALE63,@object + .size SCALE63,8 + .align 4 +ZERON: + .long 0 + .long 2147483648 + .type ZERON,@object + .size ZERON,8 + .align 4 +INF: + .long 0 + .long 2146435072 + .type INF,@object + .size INF,8 + .align 4 +NEG_INF: + .long 0 + .long 4293918720 + .type NEG_INF,@object + .size NEG_INF,8 + .data + .section .note.GNU-stack, "" +// -- Begin DWARF2 SEGMENT .eh_frame + .section .eh_frame,"a",@progbits +.eh_frame_seg: + .align 1 + .4byte 0x00000014 + .8byte 0x00527a0100000000 + .8byte 0x08070c1b01107801 + .4byte 0x00000190 + .4byte 0x0000001c + .4byte 0x0000001c + .4byte ..___tag_value_cbrt.1-. + .4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1 + .2byte 0x0400 + .4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1 + .2byte 0x200e + .byte 0x04 + .4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3 + .2byte 0x080e + .byte 0x00 +# End |