summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/quick/codegen_util.cc16
-rw-r--r--compiler/dex/quick/x86/call_x86.cc4
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h2
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc119
-rw-r--r--compiler/dex/quick/x86/x86_lir.h4
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.cc8
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.h4
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc44
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc8
9 files changed, 162 insertions, 47 deletions
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5870d22..048aca3 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1046,9 +1046,19 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
}
// Push a marker to take place of lr.
vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
- // fp regs already sorted.
- for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
- vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+ if (cu_->instruction_set == kThumb2) {
+ // fp regs already sorted.
+ for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) {
+ vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment);
+ }
+ } else {
+ // For other platforms regs may have been inserted out of order - sort first.
+ std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end());
+ for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
+ // Copy, stripping out the phys register sort key.
+ vmap_encoder.PushBackUnsigned(
+ ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
+ }
}
} else {
DCHECK_EQ(POPCOUNT(core_spill_mask_), 0);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 9000514..8e2a1e3 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -234,8 +234,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
NewLIR0(kPseudoMethodEntry);
/* Spill core callee saves */
SpillCoreRegs();
- /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
- DCHECK_EQ(num_fp_spills_, 0);
+ SpillFPRegs();
if (!skip_overflow_check) {
class StackOverflowSlowPath : public LIRSlowPath {
public:
@@ -309,6 +308,7 @@ void X86Mir2Lir::GenExitSequence() {
NewLIR0(kPseudoMethodExit);
UnSpillCoreRegs();
+ UnSpillFPRegs();
/* Remove frame except for return address */
stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
NewLIR0(kX86Ret);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ff7b30e..b0c54e8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -319,6 +319,8 @@ class X86Mir2Lir : public Mir2Lir {
void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset);
void SpillCoreRegs();
void UnSpillCoreRegs();
+ void UnSpillFPRegs();
+ void SpillFPRegs();
static const X86EncodingMap EncodingMap[kX86Last];
bool InexpensiveConstantInt(int32_t value);
bool InexpensiveConstantFloat(int32_t value);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e81f505..1ebbbbd 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -52,6 +52,13 @@ static constexpr RegStorage dp_regs_arr_64[] = {
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
};
+static constexpr RegStorage xp_regs_arr_32[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+};
+static constexpr RegStorage xp_regs_arr_64[] = {
+ rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
+ rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+};
static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
@@ -60,6 +67,24 @@ static constexpr RegStorage core_temps_arr_64[] = {
rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
rs_r8, rs_r9, rs_r10, rs_r11
};
+
+// How to add register to be available for promotion:
+// 1) Remove register from array defining temp
+// 2) Update ClobberCallerSave
+// 3) Update JNI compiler ABI:
+// 3.1) add reg in JniCallingConvention method
+// 3.2) update CoreSpillMask/FpSpillMask
+// 4) Update entrypoints
+// 4.1) Update constants in asm_support_x86_64.h for new frame size
+// 4.2) Remove entry in SmashCallerSaves
+// 4.3) Update jni_entrypoints to spill/unspill new callee save reg
+// 4.4) Update quick_entrypoints to spill/unspill new callee save reg
+// 5) Update runtime ABI
+// 5.1) Update quick_method_frame_info with new required spills
+// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms
+// Note that you cannot use register corresponding to incoming args
+// according to ABI and QCG needs one additional XMM temp for
+// bulk copy in preparation to call.
static constexpr RegStorage core_temps_arr_64q[] = {
rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
rs_r8q, rs_r9q, rs_r10q, rs_r11q
@@ -69,14 +94,14 @@ static constexpr RegStorage sp_temps_arr_32[] = {
};
static constexpr RegStorage sp_temps_arr_64[] = {
rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
- rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
+ rs_fr8, rs_fr9, rs_fr10, rs_fr11
};
static constexpr RegStorage dp_temps_arr_32[] = {
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
};
static constexpr RegStorage dp_temps_arr_64[] = {
rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
- rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
+ rs_dr8, rs_dr9, rs_dr10, rs_dr11
};
static constexpr RegStorage xp_temps_arr_32[] = {
@@ -84,7 +109,7 @@ static constexpr RegStorage xp_temps_arr_32[] = {
};
static constexpr RegStorage xp_temps_arr_64[] = {
rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
- rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
+ rs_xr8, rs_xr9, rs_xr10, rs_xr11
};
static constexpr ArrayRef<const RegStorage> empty_pool;
@@ -95,6 +120,8 @@ static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64);
static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
@@ -437,21 +464,13 @@ bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
/* Clobber all regs that might be used by an external C call */
void X86Mir2Lir::ClobberCallerSave() {
- Clobber(rs_rAX);
- Clobber(rs_rCX);
- Clobber(rs_rDX);
- Clobber(rs_rBX);
-
- Clobber(rs_fr0);
- Clobber(rs_fr1);
- Clobber(rs_fr2);
- Clobber(rs_fr3);
- Clobber(rs_fr4);
- Clobber(rs_fr5);
- Clobber(rs_fr6);
- Clobber(rs_fr7);
-
if (cu_->target64) {
+ Clobber(rs_rAX);
+ Clobber(rs_rCX);
+ Clobber(rs_rDX);
+ Clobber(rs_rSI);
+ Clobber(rs_rDI);
+
Clobber(rs_r8);
Clobber(rs_r9);
Clobber(rs_r10);
@@ -461,11 +480,21 @@ void X86Mir2Lir::ClobberCallerSave() {
Clobber(rs_fr9);
Clobber(rs_fr10);
Clobber(rs_fr11);
- Clobber(rs_fr12);
- Clobber(rs_fr13);
- Clobber(rs_fr14);
- Clobber(rs_fr15);
+ } else {
+ Clobber(rs_rAX);
+ Clobber(rs_rCX);
+ Clobber(rs_rDX);
+ Clobber(rs_rBX);
}
+
+ Clobber(rs_fr0);
+ Clobber(rs_fr1);
+ Clobber(rs_fr2);
+ Clobber(rs_fr3);
+ Clobber(rs_fr4);
+ Clobber(rs_fr5);
+ Clobber(rs_fr6);
+ Clobber(rs_fr7);
}
RegLocation X86Mir2Lir::GetReturnWideAlt() {
@@ -599,11 +628,15 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
// Target-specific adjustments.
// Add in XMM registers.
- const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
- for (RegStorage reg : *xp_temps) {
+ const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32;
+ for (RegStorage reg : *xp_regs) {
RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
reginfo_map_.Put(reg.GetReg(), info);
- info->SetIsTemp(true);
+ }
+ const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
+ for (RegStorage reg : *xp_temps) {
+ RegisterInfo* xp_reg_info = GetRegInfo(reg);
+ xp_reg_info->SetIsTemp(true);
}
// Alias single precision xmm to double xmms.
@@ -665,9 +698,11 @@ void X86Mir2Lir::SpillCoreRegs() {
// Spill mask not including fake return address register
uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+ OpSize size = cu_->target64 ? k64 : k32;
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+ StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
+ size, kNotVolatile);
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
@@ -680,14 +715,46 @@ void X86Mir2Lir::UnSpillCoreRegs() {
// Spill mask not including fake return address register
uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
+ OpSize size = cu_->target64 ? k64 : k32;
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
+ LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
+ size, kNotVolatile);
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
}
+void X86Mir2Lir::SpillFPRegs() {
+ if (num_fp_spills_ == 0) {
+ return;
+ }
+ uint32_t mask = fp_spill_mask_;
+ int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+ for (int reg = 0; mask; mask >>= 1, reg++) {
+ if (mask & 0x1) {
+ StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+ k64, kNotVolatile);
+ offset += sizeof(double);
+ }
+ }
+}
+void X86Mir2Lir::UnSpillFPRegs() {
+ if (num_fp_spills_ == 0) {
+ return;
+ }
+ uint32_t mask = fp_spill_mask_;
+ int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
+ for (int reg = 0; mask; mask >>= 1, reg++) {
+ if (mask & 0x1) {
+ LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg),
+ k64, kNotVolatile);
+ offset += sizeof(double);
+ }
+ }
+}
+
+
bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) {
return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32);
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 2789923..5657381 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -66,7 +66,9 @@ namespace art {
* XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch
* XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch
* --- x86-64/x32 registers
- * XMM8 .. 15: caller save available as scratch registers for ART.
+ * XMM8 .. 11: caller save available as scratch registers for ART.
+ * XMM12 .. 15: callee save available as promoted registers for ART.
+ * This change (XMM12..15) is for QCG only, for others they are caller save.
*
* X87 is a necessary evil outside of ART code for x86:
* ST0: x86 float/double native return value, caller save
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 5febed2..525f05c 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -130,6 +130,10 @@ X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_s
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14));
+ callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15));
}
uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
@@ -137,6 +141,10 @@ uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
1 << kNumberOfCpuRegisters;
}
+uint32_t X86_64JniCallingConvention::FpSpillMask() const {
+ return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15;
+}
+
size_t X86_64JniCallingConvention::FrameSize() {
// Method*, return address and callee save area size, local reference segment state
size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 1ba5353..7a90c6e 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -61,9 +61,7 @@ class X86_64JniCallingConvention FINAL : public JniCallingConvention {
}
ManagedRegister ReturnScratchRegister() const OVERRIDE;
uint32_t CoreSpillMask() const OVERRIDE;
- uint32_t FpSpillMask() const OVERRIDE {
- return 0;
- }
+ uint32_t FpSpillMask() const OVERRIDE;
bool IsCurrentParamInRegister() OVERRIDE;
bool IsCurrentParamOnStack() OVERRIDE;
ManagedRegister CurrentParamRegister() OVERRIDE;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 4d5d613..78738d8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1671,16 +1671,31 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& spill_regs,
const ManagedRegisterEntrySpills& entry_spills) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ int gpr_count = 0;
for (int i = spill_regs.size() - 1; i >= 0; --i) {
- pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsCpuRegister()) {
+ pushq(spill.AsCpuRegister());
+ gpr_count++;
+ }
}
// return address then method on stack
- addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) +
- sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
- kFramePointerSize /*return address*/));
+ int64_t rest_of_frame = static_cast<int64_t>(frame_size)
+ - (gpr_count * kFramePointerSize)
+ - kFramePointerSize /*return address*/;
+ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+ // spill xmms
+ int64_t offset = rest_of_frame;
+ for (int i = spill_regs.size() - 1; i >= 0; --i) {
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsXmmRegister()) {
+ offset -= sizeof(double);
+ movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+ }
+ }
DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
- subq(CpuRegister(RSP), Immediate(4));
+
movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
for (size_t i = 0; i < entry_spills.size(); ++i) {
@@ -1707,9 +1722,24 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void X86_64Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& spill_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
- addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+ int gpr_count = 0;
+ // unspill xmms
+ int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
for (size_t i = 0; i < spill_regs.size(); ++i) {
- popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsXmmRegister()) {
+ offset += sizeof(double);
+ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+ } else {
+ gpr_count++;
+ }
+ }
+ addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+ for (size_t i = 0; i < spill_regs.size(); ++i) {
+ x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
+ if (spill.IsCpuRegister()) {
+ popq(spill.AsCpuRegister());
+ }
}
ret();
}
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index f7bad8b..dc1758f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -246,11 +246,9 @@ std::string buildframe_test_fn(x86_64::X86_64Assembler* assembler) {
str << "pushq %rsi\n";
str << "pushq %r10\n";
// 2) Move down the stack pointer.
- ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 +
- sizeof(StackReference<mirror::ArtMethod>) + 8;
- str << "addq $" << displacement << ", %rsp\n";
- // 3) Make space for method reference, and store it.
- str << "subq $4, %rsp\n";
+ ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8);
+ str << "subq $" << displacement << ", %rsp\n";
+ // 3) Store method reference.
str << "movl %edi, (%rsp)\n";
// 4) Entry spills.
str << "movq %rax, " << frame_size + 0 << "(%rsp)\n";