1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
|
/*
Copyright (c) 2014, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/******************************************************************************/
// ALGORITHM DESCRIPTION
// ---------------------
//
// Description:
// Let K = 64 (table size).
//
// Four sub-domains:
// 1. |x| < 1/(2*K)
// expm1(x) ~ P(x)
// 2. 1/(2*K) <= |x| <= 56*log(2)
// x x/log(2) n
// e - 1 = 2 = 2 * T[j] * (1 + P(y)) - 1
// 3. 56*log(2) < x < MAX_LOG
// x x x/log(2) n
// e - 1 ~ e = 2 = 2 * T[j] * (1 + P(y))
// 4. x < -56*log(2)
// x x
// e - 1 = -1 + e ~ -1
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// In case 3, to avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// and BIAS is a value of exponent bias.
//
// Special cases:
// expm1(NaN) is NaN
// expm1(+INF) is +INF
// expm1(-INF) is -1
// expm1(x) is x for subnormals
// for finite argument, only expm1(0)=0 is exact.
// For IEEE double
// if x > 709.782712893383973096 then expm1(x) overflow
//
/******************************************************************************/
#include <private/bionic_asm.h>
# -- Begin expm1
ENTRY(expm1)
# parameter 1: %xmm0
..B1.1:
..___tag_value_expm1.1:
subq $56, %rsp
..___tag_value_expm1.3:
movsd %xmm0, 32(%rsp)
..B1.2:
unpcklpd %xmm0, %xmm0
movapd cv(%rip), %xmm1
movapd Shifter(%rip), %xmm6
movapd 16+cv(%rip), %xmm2
movapd 32+cv(%rip), %xmm3
pextrw $3, %xmm0, %eax
andl $32767, %eax
movl $16527, %edx
subl %eax, %edx
subl $16304, %eax
orl %eax, %edx
cmpl $-2147483648, %edx
jae .L_2TAG_PACKET_0.0.2
mulpd %xmm0, %xmm1
addpd %xmm6, %xmm1
movapd %xmm1, %xmm7
subpd %xmm6, %xmm1
mulpd %xmm1, %xmm2
movapd 48+cv(%rip), %xmm4
mulpd %xmm1, %xmm3
movapd 64+cv(%rip), %xmm5
subpd %xmm2, %xmm0
movd %xmm7, %eax
movl %eax, %ecx
andl $63, %ecx
shll $4, %ecx
sarl $6, %eax
movl %eax, %edx
subpd %xmm3, %xmm0
lea Tbl_addr(%rip), %r11
movapd (%rcx,%r11), %xmm2
movq 80+cv(%rip), %xmm3
mulpd %xmm0, %xmm4
movapd %xmm0, %xmm1
mulpd %xmm0, %xmm0
mulsd %xmm0, %xmm3
addpd %xmm4, %xmm5
mulsd %xmm0, %xmm0
movq %xmm2, %xmm4
unpckhpd %xmm2, %xmm2
movdqa mmask(%rip), %xmm6
pand %xmm6, %xmm7
movdqa bias(%rip), %xmm6
paddq %xmm6, %xmm7
psllq $46, %xmm7
mulsd %xmm0, %xmm3
mulpd %xmm5, %xmm0
addl $894, %edx
cmpl $1916, %edx
ja .L_2TAG_PACKET_1.0.2
addsd %xmm3, %xmm0
xorpd %xmm3, %xmm3
movl $16368, %eax
pinsrw $3, %eax, %xmm3
orpd %xmm7, %xmm2
mulsd %xmm4, %xmm7
movq %xmm3, %xmm6
addsd %xmm1, %xmm3
pextrw $3, %xmm2, %edx
pshufd $238, %xmm0, %xmm5
psrlq $38, %xmm3
psllq $38, %xmm3
movq %xmm2, %xmm4
subsd %xmm3, %xmm6
addsd %xmm5, %xmm0
addsd %xmm6, %xmm1
addsd %xmm7, %xmm4
mulsd %xmm3, %xmm7
mulsd %xmm2, %xmm3
xorpd %xmm5, %xmm5
movl $16368, %eax
pinsrw $3, %eax, %xmm5
addsd %xmm1, %xmm0
movl $17184, %ecx
subl %edx, %ecx
subl $16256, %edx
orl %edx, %ecx
jl .L_2TAG_PACKET_2.0.2
mulsd %xmm4, %xmm0
subsd %xmm5, %xmm3
addsd %xmm7, %xmm0
addsd %xmm3, %xmm0
.L_2TAG_PACKET_3.0.2:
jmp ..B1.5
.L_2TAG_PACKET_2.0.2:
cmpl $0, %edx
jl .L_2TAG_PACKET_4.0.2
mulsd %xmm4, %xmm0
subsd %xmm5, %xmm7
addsd %xmm7, %xmm0
addsd %xmm3, %xmm0
jmp ..B1.5
.L_2TAG_PACKET_4.0.2:
mulsd %xmm4, %xmm0
addsd %xmm7, %xmm0
addsd %xmm3, %xmm0
subsd %xmm5, %xmm0
jmp ..B1.5
.L_2TAG_PACKET_1.0.2:
movl 36(%rsp), %ecx
addsd %xmm0, %xmm1
unpckhpd %xmm0, %xmm0
addsd %xmm1, %xmm0
cmpl $0, %ecx
jl .L_2TAG_PACKET_5.0.2
fstcw (%rsp)
movw (%rsp), %dx
orw $768, %dx
movw %dx, 4(%rsp)
fldcw 4(%rsp)
movl %eax, %edx
sarl $1, %eax
subl %eax, %edx
movdqa emask(%rip), %xmm6
pandn %xmm2, %xmm6
addl $1023, %eax
movd %eax, %xmm3
psllq $52, %xmm3
orpd %xmm3, %xmm6
mulsd %xmm3, %xmm4
movsd %xmm0, 16(%rsp)
fldl 16(%rsp)
movsd %xmm6, 24(%rsp)
fldl 24(%rsp)
movsd %xmm4, 16(%rsp)
fldl 16(%rsp)
addl $1023, %edx
movd %edx, %xmm4
psllq $52, %xmm4
faddp %st, %st(1)
fmul %st, %st(1)
faddp %st, %st(1)
movsd %xmm4, 24(%rsp)
fldl 24(%rsp)
fmulp %st, %st(1)
fstpl 16(%rsp)
movsd 16(%rsp), %xmm0
fldcw (%rsp)
pextrw $3, %xmm0, %ecx
andl $32752, %ecx
cmpl $32752, %ecx
jae .L_2TAG_PACKET_6.0.2
jmp ..B1.5
cmpl $-2147483648, %ecx
jb .L_2TAG_PACKET_6.0.2
jmp ..B1.5
.L_2TAG_PACKET_6.0.2:
movl $41, 8(%rsp)
jmp .L_2TAG_PACKET_7.0.2
.L_2TAG_PACKET_8.0.2:
cmpl $2146435072, %eax
jae .L_2TAG_PACKET_9.0.2
movsd XMAX(%rip), %xmm0
mulsd %xmm0, %xmm0
movl $41, 8(%rsp)
jmp .L_2TAG_PACKET_7.0.2
.L_2TAG_PACKET_9.0.2:
movl 36(%rsp), %eax
movl 32(%rsp), %edx
movl %eax, %ecx
andl $2147483647, %eax
cmpl $2146435072, %eax
ja .L_2TAG_PACKET_10.0.2
cmpl $0, %edx
jne .L_2TAG_PACKET_10.0.2
cmpl $0, %ecx
jl .L_2TAG_PACKET_11.0.2
movq INF(%rip), %xmm0
jmp ..B1.5
.L_2TAG_PACKET_11.0.2:
jmp .L_2TAG_PACKET_5.0.2
.L_2TAG_PACKET_10.0.2:
movsd 32(%rsp), %xmm0
addsd %xmm0, %xmm0
jmp ..B1.5
.L_2TAG_PACKET_12.0.2:
addl $16304, %eax
cmpl $15504, %eax
jb .L_2TAG_PACKET_13.0.2
movapd cvl(%rip), %xmm2
pshufd $68, %xmm0, %xmm1
movapd 16+cvl(%rip), %xmm3
movapd 32+cvl(%rip), %xmm4
movq 48+cvl(%rip), %xmm5
mulsd %xmm1, %xmm1
xorpd %xmm6, %xmm6
movl $16352, %eax
pinsrw $3, %eax, %xmm6
mulpd %xmm0, %xmm2
xorpd %xmm7, %xmm7
movl $16368, %edx
pinsrw $3, %edx, %xmm7
addpd %xmm3, %xmm2
mulsd %xmm1, %xmm5
pshufd $228, %xmm1, %xmm3
mulpd %xmm1, %xmm1
mulsd %xmm0, %xmm6
mulpd %xmm0, %xmm2
addpd %xmm4, %xmm2
movq %xmm7, %xmm4
addsd %xmm6, %xmm7
mulpd %xmm3, %xmm1
psrlq $27, %xmm7
psllq $27, %xmm7
movq HIGHMASK(%rip), %xmm3
subsd %xmm7, %xmm4
mulpd %xmm1, %xmm2
addsd %xmm4, %xmm6
pshufd $238, %xmm2, %xmm1
addsd %xmm2, %xmm6
andpd %xmm0, %xmm3
movq %xmm0, %xmm4
addsd %xmm6, %xmm1
subsd %xmm3, %xmm0
addsd %xmm5, %xmm1
mulsd %xmm7, %xmm3
mulsd %xmm7, %xmm0
mulsd %xmm1, %xmm4
addsd %xmm4, %xmm0
addsd %xmm3, %xmm0
jmp ..B1.5
.L_2TAG_PACKET_13.0.2:
cmpl $16, %eax
jae .L_2TAG_PACKET_3.0.2
movq %xmm0, %xmm2
movd %xmm0, %eax
psrlq $31, %xmm2
movd %xmm2, %ecx
orl %ecx, %eax
je .L_2TAG_PACKET_3.0.2
movl $16, %edx
xorpd %xmm1, %xmm1
pinsrw $3, %edx, %xmm1
mulsd %xmm1, %xmm1
movl $42, 8(%rsp)
jmp .L_2TAG_PACKET_7.0.2
.L_2TAG_PACKET_0.0.2:
cmpl $0, %eax
jl .L_2TAG_PACKET_12.0.2
movl 36(%rsp), %eax
cmpl $1083179008, %eax
jge .L_2TAG_PACKET_8.0.2
cmpl $-1048576, %eax
jae .L_2TAG_PACKET_9.0.2
.L_2TAG_PACKET_5.0.2:
xorpd %xmm0, %xmm0
movl $49136, %eax
pinsrw $3, %eax, %xmm0
jmp ..B1.5
.L_2TAG_PACKET_7.0.2:
movq %xmm0, 40(%rsp)
..B1.3:
movq 40(%rsp), %xmm0
.L_2TAG_PACKET_14.0.2:
..B1.5:
addq $56, %rsp
..___tag_value_expm1.4:
ret
..___tag_value_expm1.5:
END(expm1)
# -- End expm1
.section .rodata, "a"
.align 16
.align 16
cv:
.long 1697350398
.long 1079448903
.long 1697350398
.long 1079448903
.long 4277796864
.long 1065758274
.long 4277796864
.long 1065758274
.long 3164486458
.long 1025308570
.long 3164486458
.long 1025308570
.long 1963358694
.long 1065423121
.long 1431655765
.long 1069897045
.long 1431655765
.long 1067799893
.long 0
.long 1071644672
.long 381774871
.long 1062650220
.long 381774871
.long 1062650220
.type cv,@object
.size cv,96
.align 16
Shifter:
.long 0
.long 1127743488
.long 0
.long 1127743488
.type Shifter,@object
.size Shifter,16
.align 16
Tbl_addr:
.long 0
.long 0
.long 0
.long 0
.long 1000070955
.long 1042145304
.long 1040187392
.long 11418
.long 988267849
.long 1039500660
.long 3539992576
.long 22960
.long 36755401
.long 1042114290
.long 402653184
.long 34629
.long 3634769483
.long 1042178627
.long 1820327936
.long 46424
.long 2155991225
.long 1041560680
.long 847249408
.long 58348
.long 2766913307
.long 1039293264
.long 3489660928
.long 70401
.long 3651174602
.long 1040488175
.long 2927624192
.long 82586
.long 3073892131
.long 1042240606
.long 1006632960
.long 94904
.long 1328391742
.long 1042019037
.long 3942645760
.long 107355
.long 2650893825
.long 1041903210
.long 822083584
.long 119943
.long 2397289153
.long 1041802037
.long 2281701376
.long 132667
.long 430997175
.long 1042110606
.long 1845493760
.long 145530
.long 1230936525
.long 1041801015
.long 1702887424
.long 158533
.long 740675935
.long 1040178913
.long 4110417920
.long 171677
.long 3489810261
.long 1041825986
.long 2793406464
.long 184965
.long 2532600530
.long 1040767882
.long 167772160
.long 198398
.long 3542557060
.long 1041827263
.long 2986344448
.long 211976
.long 1401563777
.long 1041061093
.long 922746880
.long 225703
.long 3129406026
.long 1041852413
.long 880803840
.long 239579
.long 900993572
.long 1039283234
.long 1275068416
.long 253606
.long 2115029358
.long 1042140042
.long 562036736
.long 267786
.long 1086643152
.long 1041785419
.long 1610612736
.long 282120
.long 82864366
.long 1041256244
.long 3045064704
.long 296610
.long 2392968152
.long 1040913683
.long 3573547008
.long 311258
.long 2905856183
.long 1040002214
.long 1988100096
.long 326066
.long 3742008261
.long 1040011137
.long 1451229184
.long 341035
.long 863393794
.long 1040880621
.long 914358272
.long 356167
.long 1446136837
.long 1041372426
.long 3707764736
.long 371463
.long 927855201
.long 1040617636
.long 360710144
.long 386927
.long 1492679939
.long 1041050306
.long 2952790016
.long 402558
.long 608827001
.long 1041582217
.long 2181038080
.long 418360
.long 606260204
.long 1042271987
.long 1711276032
.long 434334
.long 3163044019
.long 1041843851
.long 1006632960
.long 450482
.long 4148747325
.long 1041962972
.long 3900702720
.long 466805
.long 802924201
.long 1041275378
.long 1442840576
.long 483307
.long 3052749833
.long 1041940577
.long 1937768448
.long 499988
.long 2216116399
.long 1041486744
.long 914358272
.long 516851
.long 2729697836
.long 1041445764
.long 2566914048
.long 533897
.long 540608356
.long 1041310907
.long 2600468480
.long 551129
.long 2916344493
.long 1040535661
.long 1107296256
.long 568549
.long 731391814
.long 1039497014
.long 2566914048
.long 586158
.long 1024722704
.long 1041461625
.long 2961178624
.long 603959
.long 3806831748
.long 1041732499
.long 2675965952
.long 621954
.long 238953304
.long 1040316488
.long 2189426688
.long 640145
.long 749123235
.long 1041725785
.long 2063597568
.long 658534
.long 1168187977
.long 1041175214
.long 2986344448
.long 677123
.long 3506096399
.long 1042186095
.long 1426063360
.long 695915
.long 1470221620
.long 1041675499
.long 2566914048
.long 714911
.long 3182425146
.long 1041483134
.long 3087007744
.long 734114
.long 3131698208
.long 1042208657
.long 4068474880
.long 753526
.long 2300504125
.long 1041428596
.long 2415919104
.long 773150
.long 2290297931
.long 1037388400
.long 3716153344
.long 792987
.long 3532148223
.long 1041626194
.long 771751936
.long 813041
.long 1161884404
.long 1042015258
.long 3699376128
.long 833312
.long 876383176
.long 1037968878
.long 1241513984
.long 853805
.long 3379986796
.long 1042213153
.long 3699376128
.long 874520
.long 1545797737
.long 1041681569
.long 58720256
.long 895462
.long 2925146801
.long 1042212567
.long 855638016
.long 916631
.long 1316627971
.long 1038516204
.long 3883925504
.long 938030
.long 3267869137
.long 1040337004
.long 2726297600
.long 959663
.long 3720868999
.long 1041782409
.long 3992977408
.long 981531
.long 433316142
.long 1041994064
.long 1526726656
.long 1003638
.long 781232103
.long 1040093400
.long 2172649472
.long 1025985
.type Tbl_addr,@object
.size Tbl_addr,1024
.align 16
mmask:
.long 4294967232
.long 0
.long 4294967232
.long 0
.type mmask,@object
.size mmask,16
.align 16
bias:
.long 65472
.long 0
.long 65472
.long 0
.type bias,@object
.size bias,16
.align 16
emask:
.long 0
.long 4293918720
.long 0
.long 4293918720
.type emask,@object
.size emask,16
.align 16
cvl:
.long 2773927732
.long 1053236707
.long 381774871
.long 1062650220
.long 379653899
.long 1056571845
.long 286331153
.long 1065423121
.long 436314138
.long 1059717536
.long 1431655765
.long 1067799893
.long 1431655765
.long 1069897045
.long 0
.long 1071644672
.type cvl,@object
.size cvl,64
.align 8
XMAX:
.long 4294967295
.long 2146435071
.type XMAX,@object
.size XMAX,8
.align 8
INF:
.long 0
.long 2146435072
.type INF,@object
.size INF,8
.align 8
HIGHMASK:
.long 4227858432
.long 4294967295
.type HIGHMASK,@object
.size HIGHMASK,8
.data
.section .note.GNU-stack, ""
// -- Begin DWARF2 SEGMENT .eh_frame
.section .eh_frame,"a",@progbits
.eh_frame_seg:
.align 1
.4byte 0x00000014
.8byte 0x00527a0100000000
.8byte 0x08070c1b01107801
.4byte 0x00000190
.4byte 0x0000001c
.4byte 0x0000001c
.4byte ..___tag_value_expm1.1-.
.4byte ..___tag_value_expm1.5-..___tag_value_expm1.1
.2byte 0x0400
.4byte ..___tag_value_expm1.3-..___tag_value_expm1.1
.2byte 0x400e
.byte 0x04
.4byte ..___tag_value_expm1.4-..___tag_value_expm1.3
.2byte 0x080e
.byte 0x00
# End
|