summaryrefslogtreecommitdiffstats
path: root/compiler/dex/quick/x86/fp_x86.cc
blob: 8e81746db52f91d345a5a15f1e8ba3679118e33c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "codegen_x86.h"

#include "base/logging.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "dex/reg_storage_eq.h"
#include "x86_lir.h"

namespace art {

void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
                                 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
  X86OpCode op = kX86Nop;
  RegLocation rl_result;

  /*
   * Don't attempt to optimize register usage since these opcodes call out to
   * the handlers.
   */
  switch (opcode) {
    case Instruction::ADD_FLOAT_2ADDR:
    case Instruction::ADD_FLOAT:
      op = kX86AddssRR;
      break;
    case Instruction::SUB_FLOAT_2ADDR:
    case Instruction::SUB_FLOAT:
      op = kX86SubssRR;
      break;
    case Instruction::DIV_FLOAT_2ADDR:
    case Instruction::DIV_FLOAT:
      op = kX86DivssRR;
      break;
    case Instruction::MUL_FLOAT_2ADDR:
    case Instruction::MUL_FLOAT:
      op = kX86MulssRR;
      break;
    case Instruction::REM_FLOAT_2ADDR:
    case Instruction::REM_FLOAT:
      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
      return;
    case Instruction::NEG_FLOAT:
      GenNegFloat(rl_dest, rl_src1);
      return;
    default:
      LOG(FATAL) << "Unexpected opcode: " << opcode;
  }
  rl_src1 = LoadValue(rl_src1, kFPReg);
  rl_src2 = LoadValue(rl_src2, kFPReg);
  rl_result = EvalLoc(rl_dest, kFPReg, true);
  RegStorage r_dest = rl_result.reg;
  RegStorage r_src1 = rl_src1.reg;
  RegStorage r_src2 = rl_src2.reg;
  if (r_dest == r_src2) {
    r_src2 = AllocTempSingle();
    OpRegCopy(r_src2, r_dest);
  }
  OpRegCopy(r_dest, r_src1);
  NewLIR2(op, r_dest.GetReg(), r_src2.GetReg());
  StoreValue(rl_dest, rl_result);
}

void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
                                  RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
  DCHECK(rl_dest.wide);
  DCHECK(rl_dest.fp);
  DCHECK(rl_src1.wide);
  DCHECK(rl_src1.fp);
  DCHECK(rl_src2.wide);
  DCHECK(rl_src2.fp);
  X86OpCode op = kX86Nop;
  RegLocation rl_result;

  switch (opcode) {
    case Instruction::ADD_DOUBLE_2ADDR:
    case Instruction::ADD_DOUBLE:
      op = kX86AddsdRR;
      break;
    case Instruction::SUB_DOUBLE_2ADDR:
    case Instruction::SUB_DOUBLE:
      op = kX86SubsdRR;
      break;
    case Instruction::DIV_DOUBLE_2ADDR:
    case Instruction::DIV_DOUBLE:
      op = kX86DivsdRR;
      break;
    case Instruction::MUL_DOUBLE_2ADDR:
    case Instruction::MUL_DOUBLE:
      op = kX86MulsdRR;
      break;
    case Instruction::REM_DOUBLE_2ADDR:
    case Instruction::REM_DOUBLE:
      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
      return;
    case Instruction::NEG_DOUBLE:
      GenNegDouble(rl_dest, rl_src1);
      return;
    default:
      LOG(FATAL) << "Unexpected opcode: " << opcode;
  }
  rl_src1 = LoadValueWide(rl_src1, kFPReg);
  rl_src2 = LoadValueWide(rl_src2, kFPReg);
  rl_result = EvalLoc(rl_dest, kFPReg, true);
  if (rl_result.reg == rl_src2.reg) {
    rl_src2.reg = AllocTempDouble();
    OpRegCopy(rl_src2.reg, rl_result.reg);
  }
  OpRegCopy(rl_result.reg, rl_src1.reg);
  NewLIR2(op, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
  StoreValueWide(rl_dest, rl_result);
}

void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
                                            int32_t constant) {
  // TODO: need x86 implementation.
  UNUSED(rl_dest, rl_src1, constant);
  LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
}

void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
                                             int64_t constant) {
  // TODO: need x86 implementation.
  UNUSED(rl_dest, rl_src1, constant);
  LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
}

void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
  // Compute offsets to the source and destination VRs on stack
  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);

  // Update the in-register state of source.
  rl_src = UpdateLocWide(rl_src);

  // All memory accesses below reference dalvik regs.
  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);

  // If the source is in physical register, then put it in its location on stack.
  if (rl_src.location == kLocPhysReg) {
    RegisterInfo* reg_info = GetRegInfo(rl_src.reg);

    if (reg_info != nullptr && reg_info->IsTemp()) {
      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
      FlushSpecificReg(reg_info);
      // ResetDef to prevent NullifyRange from removing stores.
      ResetDef(rl_src.reg);
    } else {
      // It must have been register promoted if it is not a temp but is still in physical
      // register. Since we need it to be in memory to convert, we place it there now.
      const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
      StoreBaseDisp(rs_rSP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
    }
  }

  // Push the source virtual register onto the x87 stack.
  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP_32.GetReg(),
                              src_v_reg_offset + LOWORD_OFFSET);
  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
                          true /* is_load */, true /* is64bit */);

  // Now pop off x87 stack and store it in the destination VR's stack location.
  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
  LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP_32.GetReg(), displacement);
  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);

  /*
   * The result is in a physical register if it was in a temp or was register
   * promoted. For that reason it is enough to check if it is in physical
   * register. If it is, then we must do all of the bookkeeping necessary to
   * invalidate temp (if needed) and load in promoted register (if needed).
   * If the result's location is in memory, then we do not need to do anything
   * more since the fstp has already placed the correct value in memory.
   */
  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
  if (rl_result.location == kLocPhysReg) {
    /*
     * We already know that the result is in a physical register but do not know if it is the
     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
     * correct register class.
     */
    rl_result = EvalLoc(rl_dest, kFPReg, true);
    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
    if (is_double) {
      LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);

      StoreFinalValueWide(rl_dest, rl_result);
    } else {
      Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);

      StoreFinalValue(rl_dest, rl_result);
    }
  }
}

void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                               RegLocation rl_src) {
  RegisterClass rcSrc = kFPReg;
  X86OpCode op = kX86Nop;
  RegLocation rl_result;
  switch (opcode) {
    case Instruction::INT_TO_FLOAT:
      rcSrc = kCoreReg;
      op = kX86Cvtsi2ssRR;
      break;
    case Instruction::DOUBLE_TO_FLOAT:
      rcSrc = kFPReg;
      op = kX86Cvtsd2ssRR;
      break;
    case Instruction::FLOAT_TO_DOUBLE:
      rcSrc = kFPReg;
      op = kX86Cvtss2sdRR;
      break;
    case Instruction::INT_TO_DOUBLE:
      rcSrc = kCoreReg;
      op = kX86Cvtsi2sdRR;
      break;
    case Instruction::FLOAT_TO_INT: {
      rl_src = LoadValue(rl_src, kFPReg);
      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
      ClobberSReg(rl_dest.s_reg_low);
      rl_result = EvalLoc(rl_dest, kCoreReg, true);
      RegStorage temp_reg = AllocTempSingle();

      LoadConstant(rl_result.reg, 0x7fffffff);
      NewLIR2(kX86Cvtsi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
      NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
      NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
      branch_normal->target = NewLIR0(kPseudoTargetLabel);
      StoreValue(rl_dest, rl_result);
      return;
    }
    case Instruction::DOUBLE_TO_INT: {
      rl_src = LoadValueWide(rl_src, kFPReg);
      // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
      ClobberSReg(rl_dest.s_reg_low);
      rl_result = EvalLoc(rl_dest, kCoreReg, true);
      RegStorage temp_reg = AllocTempDouble();

      LoadConstant(rl_result.reg, 0x7fffffff);
      NewLIR2(kX86Cvtsi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
      NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
      LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
      LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
      NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
      LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
      branch_na_n->target = NewLIR0(kPseudoTargetLabel);
      NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
      branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
      branch_normal->target = NewLIR0(kPseudoTargetLabel);
      StoreValue(rl_dest, rl_result);
      return;
    }
    case Instruction::LONG_TO_DOUBLE:
      if (cu_->target64) {
        rcSrc = kCoreReg;
        op = kX86Cvtsqi2sdRR;
        break;
      }
      GenLongToFP(rl_dest, rl_src, true /* is_double */);
      return;
    case Instruction::LONG_TO_FLOAT:
      if (cu_->target64) {
        rcSrc = kCoreReg;
        op = kX86Cvtsqi2ssRR;
       break;
      }
      GenLongToFP(rl_dest, rl_src, false /* is_double */);
      return;
    case Instruction::FLOAT_TO_LONG:
      if (cu_->target64) {
        rl_src = LoadValue(rl_src, kFPReg);
        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
        ClobberSReg(rl_dest.s_reg_low);
        rl_result = EvalLoc(rl_dest, kCoreReg, true);
        RegStorage temp_reg = AllocTempSingle();

        // Set 0x7fffffffffffffff to rl_result
        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
        branch_normal->target = NewLIR0(kPseudoTargetLabel);
        StoreValueWide(rl_dest, rl_result);
      } else {
        CheckEntrypointTypes<kQuickF2l, int64_t, float>();  // int64_t -> kCoreReg
        GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
      }
      return;
    case Instruction::DOUBLE_TO_LONG:
      if (cu_->target64) {
        rl_src = LoadValueWide(rl_src, kFPReg);
        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
        ClobberSReg(rl_dest.s_reg_low);
        rl_result = EvalLoc(rl_dest, kCoreReg, true);
        RegStorage temp_reg = AllocTempDouble();

        // Set 0x7fffffffffffffff to rl_result
        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondAe);
        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
        branch_normal->target = NewLIR0(kPseudoTargetLabel);
        StoreValueWide(rl_dest, rl_result);
      } else {
        CheckEntrypointTypes<kQuickD2l, int64_t, double>();  // int64_t -> kCoreReg
        GenConversionCall(kQuickD2l, rl_dest, rl_src, kCoreReg);
      }
      return;
    default:
      LOG(INFO) << "Unexpected opcode: " << opcode;
  }
  // At this point, target will be either float or double.
  DCHECK(rl_dest.fp);
  if (rl_src.wide) {
    rl_src = LoadValueWide(rl_src, rcSrc);
  } else {
    rl_src = LoadValue(rl_src, rcSrc);
  }
  rl_result = EvalLoc(rl_dest, kFPReg, true);
  NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
  if (rl_dest.wide) {
    StoreValueWide(rl_dest, rl_result);
  } else {
    StoreValue(rl_dest, rl_result);
  }
}

void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
  // Compute offsets to the source and destination VRs on stack.
  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);

  // Update the in-register state of sources.
  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);

  // All memory accesses below reference dalvik regs.
  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);

  // If the source is in physical register, then put it in its location on stack.
  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
  if (rl_src1.location == kLocPhysReg) {
    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);

    if (reg_info != nullptr && reg_info->IsTemp()) {
      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
      FlushSpecificReg(reg_info);
      // ResetDef to prevent NullifyRange from removing stores.
      ResetDef(rl_src1.reg);
    } else {
      // It must have been register promoted if it is not a temp but is still in physical
      // register. Since we need it to be in memory to convert, we place it there now.
      StoreBaseDisp(rs_rSP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
                    kNotVolatile);
    }
  }

  if (rl_src2.location == kLocPhysReg) {
    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
    if (reg_info != nullptr && reg_info->IsTemp()) {
      FlushSpecificReg(reg_info);
      ResetDef(rl_src2.reg);
    } else {
      StoreBaseDisp(rs_rSP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
                    kNotVolatile);
    }
  }

  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;

  // Push the source virtual registers onto the x87 stack.
  LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
                             src2_v_reg_offset + LOWORD_OFFSET);
  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
                          true /* is_load */, is_double /* is64bit */);

  LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rSP.GetReg(),
                             src1_v_reg_offset + LOWORD_OFFSET);
  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
                          true /* is_load */, is_double /* is64bit */);

  FlushReg(rs_rAX);
  Clobber(rs_rAX);
  LockTemp(rs_rAX);

  LIR* retry = NewLIR0(kPseudoTargetLabel);

  // Divide ST(0) by ST(1) and place result to ST(0).
  NewLIR0(kX86Fprem);

  // Move FPU status word to AX.
  NewLIR0(kX86Fstsw16R);

  // Check if reduction is complete.
  OpRegImm(kOpAnd, rs_rAX, 0x400);

  // If no then continue to compute remainder.
  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
  branch->target = retry;

  FreeTemp(rs_rAX);

  // Now store result in the destination VR's stack location.
  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
  LIR *fst = NewLIR2NoDest(opcode, rs_rSP.GetReg(), displacement);
  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);

  // Pop ST(1) and ST(0).
  NewLIR0(kX86Fucompp);

  /*
   * The result is in a physical register if it was in a temp or was register
   * promoted. For that reason it is enough to check if it is in physical
   * register. If it is, then we must do all of the bookkeeping necessary to
   * invalidate temp (if needed) and load in promoted register (if needed).
   * If the result's location is in memory, then we do not need to do anything
   * more since the fstp has already placed the correct value in memory.
   */
  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest) : UpdateLocTyped(rl_dest);
  if (rl_result.location == kLocPhysReg) {
    rl_result = EvalLoc(rl_dest, kFPReg, true);
    if (is_double) {
      LoadBaseDisp(rs_rSP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
      StoreFinalValueWide(rl_dest, rl_result);
    } else {
      Load32Disp(rs_rSP, dest_v_reg_offset, rl_result.reg);
      StoreFinalValue(rl_dest, rl_result);
    }
  }
}

void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
                          RegLocation rl_src1, RegLocation rl_src2) {
  bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
  bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT);
  if (single) {
    rl_src1 = LoadValue(rl_src1, kFPReg);
    rl_src2 = LoadValue(rl_src2, kFPReg);
  } else {
    rl_src1 = LoadValueWide(rl_src1, kFPReg);
    rl_src2 = LoadValueWide(rl_src2, kFPReg);
  }
  // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
  ClobberSReg(rl_dest.s_reg_low);
  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
  LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0);
  if (single) {
    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
  } else {
    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
  }
  LIR* branch = nullptr;
  if (unordered_gt) {
    branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
  }
  // If the result reg can't be byte accessed, use a jump and move instead of a set.
  if (!IsByteRegister(rl_result.reg)) {
    LIR* branch2 = nullptr;
    if (unordered_gt) {
      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0);
    } else {
      branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe);
      NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1);
    }
    branch2->target = NewLIR0(kPseudoTargetLabel);
  } else {
    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */);
  }
  NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0);
  if (unordered_gt) {
    branch->target = NewLIR0(kPseudoTargetLabel);
  }
  StoreValue(rl_dest, rl_result);
}

void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
                                     bool is_double) {
  LIR* taken = &block_label_list_[bb->taken];
  LIR* not_taken = &block_label_list_[bb->fall_through];
  LIR* branch = nullptr;
  RegLocation rl_src1;
  RegLocation rl_src2;
  if (is_double) {
    rl_src1 = mir_graph_->GetSrcWide(mir, 0);
    rl_src2 = mir_graph_->GetSrcWide(mir, 2);
    rl_src1 = LoadValueWide(rl_src1, kFPReg);
    rl_src2 = LoadValueWide(rl_src2, kFPReg);
    NewLIR2(kX86UcomisdRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
  } else {
    rl_src1 = mir_graph_->GetSrc(mir, 0);
    rl_src2 = mir_graph_->GetSrc(mir, 1);
    rl_src1 = LoadValue(rl_src1, kFPReg);
    rl_src2 = LoadValue(rl_src2, kFPReg);
    NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
  }
  ConditionCode ccode = mir->meta.ccode;
  switch (ccode) {
    case kCondEq:
      if (!gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = not_taken;
      }
      break;
    case kCondNe:
      if (!gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = taken;
      }
      break;
    case kCondLt:
      if (gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = not_taken;
      }
      ccode = kCondUlt;
      break;
    case kCondLe:
      if (gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = not_taken;
      }
      ccode = kCondLs;
      break;
    case kCondGt:
      if (gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = taken;
      }
      ccode = kCondHi;
      break;
    case kCondGe:
      if (gt_bias) {
        branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
        branch->target = taken;
      }
      ccode = kCondUge;
      break;
    default:
      LOG(FATAL) << "Unexpected ccode: " << ccode;
  }
  OpCondBranch(ccode, taken);
}

void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
  RegLocation rl_result;
  rl_src = LoadValue(rl_src, kCoreReg);
  rl_result = EvalLoc(rl_dest, kCoreReg, true);
  OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000);
  StoreValue(rl_dest, rl_result);
}

void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
  RegLocation rl_result;
  rl_src = LoadValueWide(rl_src, kCoreReg);
  if (cu_->target64) {
    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
    OpRegCopy(rl_result.reg, rl_src.reg);
    // Flip sign bit.
    NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
    NewLIR2(kX86Xor64RI, rl_result.reg.GetReg(), 1);
    NewLIR2(kX86Ror64RI, rl_result.reg.GetReg(), 1);
  } else {
    rl_result = ForceTempWide(rl_src);
    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), 0x80000000);
  }
  StoreValueWide(rl_dest, rl_result);
}

bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
  RegLocation rl_dest = InlineTargetWide(info);  // double place for result
  if (rl_dest.s_reg_low == INVALID_SREG) {
    // Result is unused, the code is dead. Inlining successful, no code generated.
    return true;
  }
  RegLocation rl_src = info->args[0];
  rl_src = LoadValueWide(rl_src, kFPReg);
  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
  NewLIR2(kX86SqrtsdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
  StoreValueWide(rl_dest, rl_result);
  return true;
}

bool X86Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
  // Get the argument
  RegLocation rl_src = info->args[0];

  // Get the inlined intrinsic target virtual register
  RegLocation rl_dest = InlineTarget(info);

  // Get the virtual register number
  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
  if (rl_dest.s_reg_low == INVALID_SREG) {
    // Result is unused, the code is dead. Inlining successful, no code generated.
    return true;
  }
  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);

  // if argument is the same as inlined intrinsic target
  if (v_src_reg == v_dst_reg) {
    rl_src = UpdateLoc(rl_src);

    // if argument is in the physical register
    if (rl_src.location == kLocPhysReg) {
      rl_src = LoadValue(rl_src, kCoreReg);
      OpRegImm(kOpAnd, rl_src.reg, 0x7fffffff);
      StoreValue(rl_dest, rl_src);
      return true;
    }
    // the argument is in memory
    DCHECK((rl_src.location == kLocDalvikFrame) ||
         (rl_src.location == kLocCompilerTemp));

    // Operate directly into memory.
    int displacement = SRegOffset(rl_dest.s_reg_low);
    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement, 0x7fffffff);
    AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
    AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
    return true;
  } else {
    rl_src = LoadValue(rl_src, kCoreReg);
    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    OpRegRegImm(kOpAnd, rl_result.reg, rl_src.reg, 0x7fffffff);
    StoreValue(rl_dest, rl_result);
    return true;
  }
}

bool X86Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
  RegLocation rl_src = info->args[0];
  RegLocation rl_dest = InlineTargetWide(info);
  DCHECK_NE(rl_src.s_reg_low, INVALID_SREG);
  if (rl_dest.s_reg_low == INVALID_SREG) {
    // Result is unused, the code is dead. Inlining successful, no code generated.
    return true;
  }
  if (cu_->target64) {
    rl_src = LoadValueWide(rl_src, kCoreReg);
    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    OpRegCopyWide(rl_result.reg, rl_src.reg);
    OpRegImm(kOpLsl, rl_result.reg, 1);
    OpRegImm(kOpLsr, rl_result.reg, 1);
    StoreValueWide(rl_dest, rl_result);
    return true;
  }
  int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
  int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
  rl_src = UpdateLocWide(rl_src);

  // if argument is in the physical XMM register
  if (rl_src.location == kLocPhysReg && rl_src.reg.IsFloat()) {
    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
    if (rl_result.reg != rl_src.reg) {
      LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
    } else {
      RegStorage sign_mask = AllocTempDouble();
      LoadConstantWide(sign_mask, 0x7fffffffffffffff);
      NewLIR2(kX86PandRR, rl_result.reg.GetReg(), sign_mask.GetReg());
      FreeTemp(sign_mask);
    }
    StoreValueWide(rl_dest, rl_result);
    return true;
  } else if (v_src_reg == v_dst_reg) {
    // if argument is the same as inlined intrinsic target
    // if argument is in the physical register
    if (rl_src.location == kLocPhysReg) {
      rl_src = LoadValueWide(rl_src, kCoreReg);
      OpRegImm(kOpAnd, rl_src.reg.GetHigh(), 0x7fffffff);
      StoreValueWide(rl_dest, rl_src);
      return true;
    }
    // the argument is in memory
    DCHECK((rl_src.location == kLocDalvikFrame) ||
           (rl_src.location == kLocCompilerTemp));

    // Operate directly into memory.
    int displacement = SRegOffset(rl_dest.s_reg_low);
    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP_32.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
    return true;
  } else {
    rl_src = LoadValueWide(rl_src, kCoreReg);
    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
    OpRegCopyWide(rl_result.reg, rl_src.reg);
    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
    StoreValueWide(rl_dest, rl_result);
    return true;
  }
}

bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
  if (is_double) {
    RegLocation rl_dest = InlineTargetWide(info);
    if (rl_dest.s_reg_low == INVALID_SREG) {
      // Result is unused, the code is dead. Inlining successful, no code generated.
      return true;
    }
    RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
    RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
    RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);

    // Avoid src2 corruption by OpRegCopyWide.
    if (rl_result.reg == rl_src2.reg) {
        std::swap(rl_src2.reg, rl_src1.reg);
    }

    OpRegCopyWide(rl_result.reg, rl_src1.reg);
    NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    // If either arg is NaN, return NaN.
    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
    // Min/Max branches.
    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
    NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
    // Handle NaN.
    branch_nan->target = NewLIR0(kPseudoTargetLabel);
    LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));

    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
    // Handle Min/Max. Copy greater/lesser value from src2.
    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
    OpRegCopyWide(rl_result.reg, rl_src2.reg);
    // Right operand is already in result reg.
    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
    // Exit.
    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
    StoreValueWide(rl_dest, rl_result);
  } else {
    RegLocation rl_dest = InlineTarget(info);
    if (rl_dest.s_reg_low == INVALID_SREG) {
      // Result is unused, the code is dead. Inlining successful, no code generated.
      return true;
    }
    RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
    RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);

    // Avoid src2 corruption by OpRegCopyWide.
    if (rl_result.reg == rl_src2.reg) {
        std::swap(rl_src2.reg, rl_src1.reg);
    }

    OpRegCopy(rl_result.reg, rl_src1.reg);
    NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    // If either arg is NaN, return NaN.
    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
    // Min/Max branches.
    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
    NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
    // Handle NaN.
    branch_nan->target = NewLIR0(kPseudoTargetLabel);
    LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
    // Handle Min/Max. Copy greater/lesser value from src2.
    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
    OpRegCopy(rl_result.reg, rl_src2.reg);
    // Right operand is already in result reg.
    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
    // Exit.
    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
    StoreValue(rl_dest, rl_result);
  }
  return true;
}

}  // namespace art