summaryrefslogtreecommitdiffstats
path: root/libm/x86_64/s_cbrt.S
diff options
context:
space:
mode:
Diffstat (limited to 'libm/x86_64/s_cbrt.S')
-rw-r--r--libm/x86_64/s_cbrt.S754
1 files changed, 754 insertions, 0 deletions
diff --git a/libm/x86_64/s_cbrt.S b/libm/x86_64/s_cbrt.S
new file mode 100644
index 0000000..4aa4373
--- /dev/null
+++ b/libm/x86_64/s_cbrt.S
@@ -0,0 +1,754 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/******************************************************************************/
+// ALGORITHM DESCRIPTION
+// ---------------------
+//
+// Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2.
+// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5],
+// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision
+// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5]
+// (T stores the high 53 bits, D stores the low order bits)
+// Result=2^k*T+(2^k*T*r)*P+2^k*D
+// where P=p1+p2*r+..+p8*r^7
+//
+// Special cases:
+// cbrt(NaN) = quiet NaN, and raise invalid exception
+// cbrt(INF) = that INF
+// cbrt(+/-0) = +/-0
+//
+/******************************************************************************/
+
+#include <private/bionic_asm.h>
+# -- Begin cbrt
+ENTRY(cbrt)
+# parameter 1: %xmm0
+..B1.1:
+..___tag_value_cbrt.1:
+ subq $24, %rsp
+..___tag_value_cbrt.3:
+ movsd %xmm0, (%rsp)
+..B1.2:
+ movq %xmm0, %xmm7
+ movl $524032, %edx
+ movsd EXP_MSK3(%rip), %xmm5
+ movsd EXP_MSK2(%rip), %xmm3
+ psrlq $44, %xmm7
+ pextrw $0, %xmm7, %ecx
+ movd %xmm7, %eax
+ movsd EXP_MASK(%rip), %xmm1
+ movsd SIG_MASK(%rip), %xmm2
+ andl $248, %ecx
+ lea rcp_table(%rip), %r8
+ movsd (%rcx,%r8), %xmm4
+ movq %rax, %r9
+ andl %eax, %edx
+ cmpl $0, %edx
+ je .L_2TAG_PACKET_0.0.1
+ cmpl $524032, %edx
+ je .L_2TAG_PACKET_1.0.1
+ shrl $8, %edx
+ shrq $8, %r9
+ andpd %xmm0, %xmm2
+ andpd %xmm5, %xmm0
+ orpd %xmm2, %xmm3
+ orpd %xmm0, %xmm1
+ movapd coeff_table(%rip), %xmm5
+ movl $5462, %eax
+ movapd 16+coeff_table(%rip), %xmm6
+ mull %edx
+ movq %r9, %rdx
+ andq $2047, %r9
+ shrl $14, %eax
+ andl $2048, %edx
+ subq %rax, %r9
+ subq %rax, %r9
+ subq %rax, %r9
+ shlq $8, %r9
+ addl $682, %eax
+ orl %edx, %eax
+ movd %eax, %xmm7
+ addq %r9, %rcx
+ psllq $52, %xmm7
+.L_2TAG_PACKET_2.0.1:
+ movapd 32+coeff_table(%rip), %xmm2
+ movapd 48+coeff_table(%rip), %xmm0
+ subsd %xmm3, %xmm1
+ movq %xmm7, %xmm3
+ lea cbrt_table(%rip), %r8
+ mulsd (%rcx,%r8), %xmm7
+ mulsd %xmm4, %xmm1
+ lea D_table(%rip), %r8
+ mulsd (%rcx,%r8), %xmm3
+ movapd %xmm1, %xmm4
+ unpcklpd %xmm1, %xmm1
+ mulpd %xmm1, %xmm5
+ mulpd %xmm1, %xmm6
+ mulpd %xmm1, %xmm1
+ addpd %xmm5, %xmm2
+ addpd %xmm6, %xmm0
+ mulpd %xmm1, %xmm2
+ mulpd %xmm1, %xmm1
+ mulsd %xmm7, %xmm4
+ addpd %xmm2, %xmm0
+ mulsd %xmm0, %xmm1
+ unpckhpd %xmm0, %xmm0
+ addsd %xmm1, %xmm0
+ mulsd %xmm4, %xmm0
+ addsd %xmm3, %xmm0
+ addsd %xmm7, %xmm0
+ jmp ..B1.4
+.L_2TAG_PACKET_0.0.1:
+ mulsd SCALE63(%rip), %xmm0
+ movq %xmm0, %xmm7
+ movl $524032, %edx
+ psrlq $44, %xmm7
+ pextrw $0, %xmm7, %ecx
+ movd %xmm7, %eax
+ andl $248, %ecx
+ lea rcp_table(%rip), %r8
+ movsd (%rcx,%r8), %xmm4
+ movq %rax, %r9
+ andl %eax, %edx
+ shrl $8, %edx
+ shrq $8, %r9
+ cmpl $0, %edx
+ je .L_2TAG_PACKET_3.0.1
+ andpd %xmm0, %xmm2
+ andpd %xmm5, %xmm0
+ orpd %xmm2, %xmm3
+ orpd %xmm0, %xmm1
+ movapd coeff_table(%rip), %xmm5
+ movl $5462, %eax
+ movapd 16+coeff_table(%rip), %xmm6
+ mull %edx
+ movq %r9, %rdx
+ andq $2047, %r9
+ shrl $14, %eax
+ andl $2048, %edx
+ subq %rax, %r9
+ subq %rax, %r9
+ subq %rax, %r9
+ shlq $8, %r9
+ addl $661, %eax
+ orl %edx, %eax
+ movd %eax, %xmm7
+ addq %r9, %rcx
+ psllq $52, %xmm7
+ jmp .L_2TAG_PACKET_2.0.1
+.L_2TAG_PACKET_3.0.1:
+ cmpq $0, %r9
+ jne .L_2TAG_PACKET_4.0.1
+ xorpd %xmm0, %xmm0
+ jmp ..B1.4
+.L_2TAG_PACKET_4.0.1:
+ movsd ZERON(%rip), %xmm0
+ jmp ..B1.4
+.L_2TAG_PACKET_1.0.1:
+ movl 4(%rsp), %eax
+ movl (%rsp), %edx
+ movl %eax, %ecx
+ andl $2147483647, %ecx
+ cmpl $2146435072, %ecx
+ ja .L_2TAG_PACKET_5.0.1
+ cmpl $0, %edx
+ jne .L_2TAG_PACKET_5.0.1
+ cmpl $2146435072, %eax
+ jne .L_2TAG_PACKET_6.0.1
+ movsd INF(%rip), %xmm0
+ jmp ..B1.4
+.L_2TAG_PACKET_6.0.1:
+ movsd NEG_INF(%rip), %xmm0
+ jmp ..B1.4
+.L_2TAG_PACKET_5.0.1:
+ movsd (%rsp), %xmm0
+ addsd %xmm0, %xmm0
+ movq %xmm0, 8(%rsp)
+.L_2TAG_PACKET_7.0.1:
+..B1.4:
+ addq $24, %rsp
+..___tag_value_cbrt.4:
+ ret
+..___tag_value_cbrt.5:
+END(cbrt)
+# -- End cbrt
+ .section .rodata, "a"
+ .align 16
+ .align 16
+coeff_table:
+ .long 1553778919
+ .long 3213899486
+ .long 3534952507
+ .long 3215266280
+ .long 1646371399
+ .long 3214412045
+ .long 477218588
+ .long 3216798151
+ .long 3582521621
+ .long 1066628362
+ .long 1007461464
+ .long 1068473053
+ .long 889629714
+ .long 1067378449
+ .long 1431655765
+ .long 1070945621
+ .type coeff_table,@object
+ .size coeff_table,64
+ .align 4
+EXP_MSK3:
+ .long 4294967295
+ .long 1048575
+ .type EXP_MSK3,@object
+ .size EXP_MSK3,8
+ .align 4
+EXP_MSK2:
+ .long 0
+ .long 3220193280
+ .type EXP_MSK2,@object
+ .size EXP_MSK2,8
+ .align 4
+EXP_MASK:
+ .long 0
+ .long 3220176896
+ .type EXP_MASK,@object
+ .size EXP_MASK,8
+ .align 4
+SIG_MASK:
+ .long 0
+ .long 1032192
+ .type SIG_MASK,@object
+ .size SIG_MASK,8
+ .align 4
+rcp_table:
+ .long 528611360
+ .long 3220144632
+ .long 2884679527
+ .long 3220082993
+ .long 1991868891
+ .long 3220024928
+ .long 2298714891
+ .long 3219970134
+ .long 58835168
+ .long 3219918343
+ .long 3035110223
+ .long 3219869313
+ .long 1617585086
+ .long 3219822831
+ .long 2500867033
+ .long 3219778702
+ .long 4241943008
+ .long 3219736752
+ .long 258732970
+ .long 3219696825
+ .long 404232216
+ .long 3219658776
+ .long 2172167368
+ .long 3219622476
+ .long 1544257904
+ .long 3219587808
+ .long 377579543
+ .long 3219554664
+ .long 1616385542
+ .long 3219522945
+ .long 813783277
+ .long 3219492562
+ .long 3940743189
+ .long 3219463431
+ .long 2689777499
+ .long 3219435478
+ .long 1700977147
+ .long 3219408632
+ .long 3169102082
+ .long 3219382828
+ .long 327235604
+ .long 3219358008
+ .long 1244336319
+ .long 3219334115
+ .long 1300311200
+ .long 3219311099
+ .long 3095471925
+ .long 3219288912
+ .long 2166487928
+ .long 3219267511
+ .long 2913108253
+ .long 3219246854
+ .long 293672978
+ .long 3219226904
+ .long 288737297
+ .long 3219207624
+ .long 1810275472
+ .long 3219188981
+ .long 174592167
+ .long 3219170945
+ .long 3539053052
+ .long 3219153485
+ .long 2164392968
+ .long 3219136576
+ .type rcp_table,@object
+ .size rcp_table,256
+ .align 4
+cbrt_table:
+ .long 572345495
+ .long 1072698681
+ .long 1998204467
+ .long 1072709382
+ .long 3861501553
+ .long 1072719872
+ .long 2268192434
+ .long 1072730162
+ .long 2981979308
+ .long 1072740260
+ .long 270859143
+ .long 1072750176
+ .long 2958651392
+ .long 1072759916
+ .long 313113243
+ .long 1072769490
+ .long 919449400
+ .long 1072778903
+ .long 2809328903
+ .long 1072788162
+ .long 2222981587
+ .long 1072797274
+ .long 2352530781
+ .long 1072806244
+ .long 594152517
+ .long 1072815078
+ .long 1555767199
+ .long 1072823780
+ .long 4282421314
+ .long 1072832355
+ .long 2355578597
+ .long 1072840809
+ .long 1162590619
+ .long 1072849145
+ .long 797864051
+ .long 1072857367
+ .long 431273680
+ .long 1072865479
+ .long 2669831148
+ .long 1072873484
+ .long 733477752
+ .long 1072881387
+ .long 4280220604
+ .long 1072889189
+ .long 801961634
+ .long 1072896896
+ .long 2915370760
+ .long 1072904508
+ .long 1159613482
+ .long 1072912030
+ .long 2689944798
+ .long 1072919463
+ .long 1248687822
+ .long 1072926811
+ .long 2967951030
+ .long 1072934075
+ .long 630170432
+ .long 1072941259
+ .long 3760898254
+ .long 1072948363
+ .long 0
+ .long 1072955392
+ .long 2370273294
+ .long 1072962345
+ .long 1261754802
+ .long 1072972640
+ .long 546334065
+ .long 1072986123
+ .long 1054893830
+ .long 1072999340
+ .long 1571187597
+ .long 1073012304
+ .long 1107975175
+ .long 1073025027
+ .long 3606909377
+ .long 1073037519
+ .long 1113616747
+ .long 1073049792
+ .long 4154744632
+ .long 1073061853
+ .long 3358931423
+ .long 1073073713
+ .long 4060702372
+ .long 1073085379
+ .long 747576176
+ .long 1073096860
+ .long 3023138255
+ .long 1073108161
+ .long 1419988548
+ .long 1073119291
+ .long 1914185305
+ .long 1073130255
+ .long 294389948
+ .long 1073141060
+ .long 3761802570
+ .long 1073151710
+ .long 978281566
+ .long 1073162213
+ .long 823148820
+ .long 1073172572
+ .long 2420954441
+ .long 1073182792
+ .long 3815449908
+ .long 1073192878
+ .long 2046058587
+ .long 1073202835
+ .long 1807524753
+ .long 1073212666
+ .long 2628681401
+ .long 1073222375
+ .long 3225667357
+ .long 1073231966
+ .long 1555307421
+ .long 1073241443
+ .long 3454043099
+ .long 1073250808
+ .long 1208137896
+ .long 1073260066
+ .long 3659916772
+ .long 1073269218
+ .long 1886261264
+ .long 1073278269
+ .long 3593647839
+ .long 1073287220
+ .long 3086012205
+ .long 1073296075
+ .long 2769796922
+ .long 1073304836
+ .long 888716057
+ .long 1073317807
+ .long 2201465623
+ .long 1073334794
+ .long 164369365
+ .long 1073351447
+ .long 3462666733
+ .long 1073367780
+ .long 2773905457
+ .long 1073383810
+ .long 1342879088
+ .long 1073399550
+ .long 2543933975
+ .long 1073415012
+ .long 1684477781
+ .long 1073430209
+ .long 3532178543
+ .long 1073445151
+ .long 1147747300
+ .long 1073459850
+ .long 1928031793
+ .long 1073474314
+ .long 2079717015
+ .long 1073488553
+ .long 4016765315
+ .long 1073502575
+ .long 3670431139
+ .long 1073516389
+ .long 3549227225
+ .long 1073530002
+ .long 11637607
+ .long 1073543422
+ .long 588220169
+ .long 1073556654
+ .long 2635407503
+ .long 1073569705
+ .long 2042029317
+ .long 1073582582
+ .long 1925128962
+ .long 1073595290
+ .long 4136375664
+ .long 1073607834
+ .long 759964600
+ .long 1073620221
+ .long 4257606771
+ .long 1073632453
+ .long 297278907
+ .long 1073644538
+ .long 3655053093
+ .long 1073656477
+ .long 2442253172
+ .long 1073668277
+ .long 1111876799
+ .long 1073679941
+ .long 3330973139
+ .long 1073691472
+ .long 3438879452
+ .long 1073702875
+ .long 3671565478
+ .long 1073714153
+ .long 1317849547
+ .long 1073725310
+ .long 1642364115
+ .long 1073736348
+ .type cbrt_table,@object
+ .size cbrt_table,768
+ .align 4
+D_table:
+ .long 4050900474
+ .long 1014427190
+ .long 1157977860
+ .long 1016444461
+ .long 1374568199
+ .long 1017271387
+ .long 2809163288
+ .long 1016882676
+ .long 3742377377
+ .long 1013168191
+ .long 3101606597
+ .long 1017541672
+ .long 65224358
+ .long 1017217597
+ .long 2691591250
+ .long 1017266643
+ .long 4020758549
+ .long 1017689313
+ .long 1316310992
+ .long 1018030788
+ .long 1031537856
+ .long 1014090882
+ .long 3261395239
+ .long 1016413641
+ .long 886424999
+ .long 1016313335
+ .long 3114776834
+ .long 1014195875
+ .long 1681120620
+ .long 1017825416
+ .long 1329600273
+ .long 1016625740
+ .long 465474623
+ .long 1017097119
+ .long 4251633980
+ .long 1017169077
+ .long 1986990133
+ .long 1017710645
+ .long 752958613
+ .long 1017159641
+ .long 2216216792
+ .long 1018020163
+ .long 4282860129
+ .long 1015924861
+ .long 1557627859
+ .long 1016039538
+ .long 3889219754
+ .long 1018086237
+ .long 3684996408
+ .long 1017353275
+ .long 723532103
+ .long 1017717141
+ .long 2951149676
+ .long 1012528470
+ .long 831890937
+ .long 1017830553
+ .long 1031212645
+ .long 1017387331
+ .long 2741737450
+ .long 1017604974
+ .long 2863311531
+ .long 1003776682
+ .long 4276736099
+ .long 1013153088
+ .long 4111778382
+ .long 1015673686
+ .long 1728065769
+ .long 1016413986
+ .long 2708718031
+ .long 1018078833
+ .long 1069335005
+ .long 1015291224
+ .long 700037144
+ .long 1016482032
+ .long 2904566452
+ .long 1017226861
+ .long 4074156649
+ .long 1017622651
+ .long 25019565
+ .long 1015245366
+ .long 3601952608
+ .long 1015771755
+ .long 3267129373
+ .long 1017904664
+ .long 503203103
+ .long 1014921629
+ .long 2122011730
+ .long 1018027866
+ .long 3927295461
+ .long 1014189456
+ .long 2790625147
+ .long 1016024251
+ .long 1330460186
+ .long 1016940346
+ .long 4033568463
+ .long 1015538390
+ .long 3695818227
+ .long 1017509621
+ .long 257573361
+ .long 1017208868
+ .long 3227697852
+ .long 1017337964
+ .long 234118548
+ .long 1017169577
+ .long 4009025803
+ .long 1017278524
+ .long 1948343394
+ .long 1017749310
+ .long 678398162
+ .long 1018144239
+ .long 3083864863
+ .long 1016669086
+ .long 2415453452
+ .long 1017890370
+ .long 175467344
+ .long 1017330033
+ .long 3197359580
+ .long 1010339928
+ .long 2071276951
+ .long 1015941358
+ .long 268372543
+ .long 1016737773
+ .long 938132959
+ .long 1017389108
+ .long 1816750559
+ .long 1017337448
+ .long 4119203749
+ .long 1017152174
+ .long 2578653878
+ .long 1013108497
+ .long 2470331096
+ .long 1014678606
+ .long 123855735
+ .long 1016553320
+ .long 1265650889
+ .long 1014782687
+ .long 3414398172
+ .long 1017182638
+ .long 1040773369
+ .long 1016158401
+ .long 3483628886
+ .long 1016886550
+ .long 4140499405
+ .long 1016191425
+ .long 3893477850
+ .long 1016964495
+ .long 3935319771
+ .long 1009634717
+ .long 2978982660
+ .long 1015027112
+ .long 2452709923
+ .long 1017990229
+ .long 3190365712
+ .long 1015835149
+ .long 4237588139
+ .long 1015832925
+ .long 2610678389
+ .long 1017962711
+ .long 2127316774
+ .long 1017405770
+ .long 824267502
+ .long 1017959463
+ .long 2165924042
+ .long 1017912225
+ .long 2774007076
+ .long 1013257418
+ .long 4123916326
+ .long 1017582284
+ .long 1976417958
+ .long 1016959909
+ .long 4092806412
+ .long 1017711279
+ .long 119251817
+ .long 1015363631
+ .long 3475418768
+ .long 1017675415
+ .long 1972580503
+ .long 1015470684
+ .long 815541017
+ .long 1017517969
+ .long 2429917451
+ .long 1017397776
+ .long 4062888482
+ .long 1016749897
+ .long 68284153
+ .long 1017925678
+ .long 2207779246
+ .long 1016320298
+ .long 1183466520
+ .long 1017408657
+ .long 143326427
+ .long 1017060403
+ .type D_table,@object
+ .size D_table,768
+ .align 4
+SCALE63:
+ .long 0
+ .long 1138753536
+ .type SCALE63,@object
+ .size SCALE63,8
+ .align 4
+ZERON:
+ .long 0
+ .long 2147483648
+ .type ZERON,@object
+ .size ZERON,8
+ .align 4
+INF:
+ .long 0
+ .long 2146435072
+ .type INF,@object
+ .size INF,8
+ .align 4
+NEG_INF:
+ .long 0
+ .long 4293918720
+ .type NEG_INF,@object
+ .size NEG_INF,8
+ .data
+ .section .note.GNU-stack, ""
+// -- Begin DWARF2 SEGMENT .eh_frame
+ .section .eh_frame,"a",@progbits
+.eh_frame_seg:
+ .align 1
+ .4byte 0x00000014
+ .8byte 0x00527a0100000000
+ .8byte 0x08070c1b01107801
+ .4byte 0x00000190
+ .4byte 0x0000001c
+ .4byte 0x0000001c
+ .4byte ..___tag_value_cbrt.1-.
+ .4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1
+ .2byte 0x0400
+ .4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1
+ .2byte 0x200e
+ .byte 0x04
+ .4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3
+ .2byte 0x080e
+ .byte 0x00
+# End