diff options
author | Serguei Katkov <serguei.i.katkov@intel.com> | 2014-07-08 17:21:53 +0700 |
---|---|---|
committer | Chao-ying Fu <chao-ying.fu@intel.com> | 2014-07-10 13:31:47 -0700 |
commit | c380191f3048db2a3796d65db8e5d5a5e7b08c65 (patch) | |
tree | 56f7f5fc60f8445ead63cd43faf06b9e1dfda6b2 | |
parent | cba6b1fc88fd54c35211fd49a7a7501cfcdaa170 (diff) | |
download | art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.zip art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.gz art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.bz2 |
x86_64: Enable fp-reg promotion
Patch introduces 4 register XMM12-15 available for promotion of
fp virtual registers.
Change-Id: I3f89ad07fc8ae98b70f550eada09be7b693ffb67
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
-rw-r--r-- | compiler/dex/quick/codegen_util.cc | 16 | ||||
-rw-r--r-- | compiler/dex/quick/x86/call_x86.cc | 4 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 2 | ||||
-rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 119 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 4 | ||||
-rw-r--r-- | compiler/jni/quick/x86_64/calling_convention_x86_64.cc | 8 | ||||
-rw-r--r-- | compiler/jni/quick/x86_64/calling_convention_x86_64.h | 4 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 44 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 8 | ||||
-rw-r--r-- | runtime/arch/x86_64/asm_support_x86_64.h | 6 | ||||
-rw-r--r-- | runtime/arch/x86_64/context_x86_64.cc | 49 | ||||
-rw-r--r-- | runtime/arch/x86_64/entrypoints_init_x86_64.cc | 4 | ||||
-rw-r--r-- | runtime/arch/x86_64/jni_entrypoints_x86_64.S | 16 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 172 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_method_frame_info_x86_64.h | 6 | ||||
-rw-r--r-- | runtime/arch/x86_64/registers_x86_64.cc | 9 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 4 |
17 files changed, 368 insertions, 107 deletions
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 5870d22..048aca3 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1046,9 +1046,19 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); - // fp regs already sorted. - for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { - vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + if (cu_->instruction_set == kThumb2) { + // fp regs already sorted. + for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { + vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + } + } else { + // For other platforms regs may have been inserted out of order - sort first. + std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end()); + for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { + // Copy, stripping out the phys register sort key. + vmap_encoder.PushBackUnsigned( + ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + } } } else { DCHECK_EQ(POPCOUNT(core_spill_mask_), 0); diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 9000514..8e2a1e3 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -234,8 +234,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); - /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */ - DCHECK_EQ(num_fp_spills_, 0); + SpillFPRegs(); if (!skip_overflow_check) { class StackOverflowSlowPath : public LIRSlowPath { public: @@ -309,6 +308,7 @@ void X86Mir2Lir::GenExitSequence() { NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); + UnSpillFPRegs(); /* Remove frame except for return address */ stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); NewLIR0(kX86Ret); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index ff7b30e..b0c54e8 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -319,6 +319,8 @@ class X86Mir2Lir : public Mir2Lir { void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset); void SpillCoreRegs(); void UnSpillCoreRegs(); + void UnSpillFPRegs(); + void SpillFPRegs(); static const X86EncodingMap EncodingMap[kX86Last]; bool InexpensiveConstantInt(int32_t value); bool InexpensiveConstantFloat(int32_t value); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index e81f505..1ebbbbd 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -52,6 +52,13 @@ static constexpr RegStorage dp_regs_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 }; +static constexpr RegStorage xp_regs_arr_32[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, +}; +static constexpr RegStorage xp_regs_arr_64[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, + rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 +}; static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; @@ -60,6 +67,24 @@ static constexpr RegStorage core_temps_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, rs_r8, rs_r9, rs_r10, rs_r11 }; + +// How to add register to be available for promotion: +// 1) Remove register from array defining temp +// 2) Update ClobberCallerSave +// 3) Update JNI compiler ABI: +// 3.1) add reg in JniCallingConvention method +// 3.2) update CoreSpillMask/FpSpillMask +// 4) Update entrypoints +// 4.1) Update constants in asm_support_x86_64.h for new frame size +// 4.2) Remove entry in SmashCallerSaves +// 4.3) Update jni_entrypoints to spill/unspill new callee save reg +// 4.4) Update quick_entrypoints to spill/unspill new callee save reg +// 5) Update runtime ABI +// 5.1) Update quick_method_frame_info with new required spills +// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms +// Note that you cannot use register corresponding to incoming args +// according to ABI and QCG needs one additional XMM temp for +// bulk copy in preparation to call. static constexpr RegStorage core_temps_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, rs_r8q, rs_r9q, rs_r10q, rs_r11q @@ -69,14 +94,14 @@ static constexpr RegStorage sp_temps_arr_32[] = { }; static constexpr RegStorage sp_temps_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, - rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 + rs_fr8, rs_fr9, rs_fr10, rs_fr11 }; static constexpr RegStorage dp_temps_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; static constexpr RegStorage dp_temps_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, - rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 + rs_dr8, rs_dr9, rs_dr10, rs_dr11 }; static constexpr RegStorage xp_temps_arr_32[] = { @@ -84,7 +109,7 @@ static constexpr RegStorage xp_temps_arr_32[] = { }; static constexpr RegStorage xp_temps_arr_64[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, - rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 + rs_xr8, rs_xr9, rs_xr10, rs_xr11 }; static constexpr ArrayRef<const RegStorage> empty_pool; @@ -95,6 +120,8 @@ static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32); static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64); static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32); static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64); +static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32); +static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q); @@ -437,21 +464,13 @@ bool X86Mir2Lir::IsByteRegister(RegStorage reg) { /* Clobber all regs that might be used by an external C call */ void X86Mir2Lir::ClobberCallerSave() { - Clobber(rs_rAX); - Clobber(rs_rCX); - Clobber(rs_rDX); - Clobber(rs_rBX); - - Clobber(rs_fr0); - Clobber(rs_fr1); - Clobber(rs_fr2); - Clobber(rs_fr3); - Clobber(rs_fr4); - Clobber(rs_fr5); - Clobber(rs_fr6); - Clobber(rs_fr7); - if (cu_->target64) { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rSI); + Clobber(rs_rDI); + Clobber(rs_r8); Clobber(rs_r9); Clobber(rs_r10); @@ -461,11 +480,21 @@ void X86Mir2Lir::ClobberCallerSave() { Clobber(rs_fr9); Clobber(rs_fr10); Clobber(rs_fr11); - Clobber(rs_fr12); - Clobber(rs_fr13); - Clobber(rs_fr14); - Clobber(rs_fr15); + } else { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rBX); } + + Clobber(rs_fr0); + Clobber(rs_fr1); + Clobber(rs_fr2); + Clobber(rs_fr3); + Clobber(rs_fr4); + Clobber(rs_fr5); + Clobber(rs_fr6); + Clobber(rs_fr7); } RegLocation X86Mir2Lir::GetReturnWideAlt() { @@ -599,11 +628,15 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Target-specific adjustments. // Add in XMM registers. - const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; - for (RegStorage reg : *xp_temps) { + const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32; + for (RegStorage reg : *xp_regs) { RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); reginfo_map_.Put(reg.GetReg(), info); - info->SetIsTemp(true); + } + const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; + for (RegStorage reg : *xp_temps) { + RegisterInfo* xp_reg_info = GetRegInfo(reg); + xp_reg_info->SetIsTemp(true); } // Alias single precision xmm to double xmms. @@ -665,9 +698,11 @@ void X86Mir2Lir::SpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -680,14 +715,46 @@ void X86Mir2Lir::UnSpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } } +void X86Mir2Lir::SpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} +void X86Mir2Lir::UnSpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} + + bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 2789923..5657381 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -66,7 +66,9 @@ namespace art { * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch * --- x86-64/x32 registers - * XMM8 .. 15: caller save available as scratch registers for ART. + * XMM8 .. 11: caller save available as scratch registers for ART. + * XMM12 .. 15: callee save available as promoted registers for ART. + * This change (XMM12..15) is for QCG only, for others they are caller save. * * X87 is a necessary evil outside of ART code for x86: * ST0: x86 float/double native return value, caller save diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 5febed2..525f05c 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -130,6 +130,10 @@ X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_s callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13)); callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14)); callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM12)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM13)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM14)); + callee_save_regs_.push_back(X86_64ManagedRegister::FromXmmRegister(XMM15)); } uint32_t X86_64JniCallingConvention::CoreSpillMask() const { @@ -137,6 +141,10 @@ uint32_t X86_64JniCallingConvention::CoreSpillMask() const { 1 << kNumberOfCpuRegisters; } +uint32_t X86_64JniCallingConvention::FpSpillMask() const { + return 1 << XMM12 | 1 << XMM13 | 1 << XMM14 | 1 << XMM15; +} + size_t X86_64JniCallingConvention::FrameSize() { // Method*, return address and callee save area size, local reference segment state size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index 1ba5353..7a90c6e 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -61,9 +61,7 @@ class X86_64JniCallingConvention FINAL : public JniCallingConvention { } ManagedRegister ReturnScratchRegister() const OVERRIDE; uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE { - return 0; - } + uint32_t FpSpillMask() const OVERRIDE; bool IsCurrentParamInRegister() OVERRIDE; bool IsCurrentParamOnStack() OVERRIDE; ManagedRegister CurrentParamRegister() OVERRIDE; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 4d5d613..78738d8 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1671,16 +1671,31 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { - pushq(spill_regs.at(i).AsX86_64().AsCpuRegister()); + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsCpuRegister()) { + pushq(spill.AsCpuRegister()); + gpr_count++; + } } // return address then method on stack - addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(frame_size) + (spill_regs.size() * kFramePointerSize) + - sizeof(StackReference<mirror::ArtMethod>) /*method*/ + - kFramePointerSize /*return address*/)); + int64_t rest_of_frame = static_cast<int64_t>(frame_size) + - (gpr_count * kFramePointerSize) + - kFramePointerSize /*return address*/; + subq(CpuRegister(RSP), Immediate(rest_of_frame)); + // spill xmms + int64_t offset = rest_of_frame; + for (int i = spill_regs.size() - 1; i >= 0; --i) { + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsXmmRegister()) { + offset -= sizeof(double); + movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); + } + } DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); - subq(CpuRegister(RSP), Immediate(4)); + movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); for (size_t i = 0; i < entry_spills.size(); ++i) { @@ -1707,9 +1722,24 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86_64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - kFramePointerSize)); + int gpr_count = 0; + // unspill xmms + int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize; for (size_t i = 0; i < spill_regs.size(); ++i) { - popq(spill_regs.at(i).AsX86_64().AsCpuRegister()); + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsXmmRegister()) { + offset += sizeof(double); + movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); + } else { + gpr_count++; + } + } + addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize)); + for (size_t i = 0; i < spill_regs.size(); ++i) { + x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); + if (spill.IsCpuRegister()) { + popq(spill.AsCpuRegister()); + } } ret(); } diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index f7bad8b..dc1758f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -246,11 +246,9 @@ std::string buildframe_test_fn(x86_64::X86_64Assembler* assembler) { str << "pushq %rsi\n"; str << "pushq %r10\n"; // 2) Move down the stack pointer. - ssize_t displacement = -static_cast<ssize_t>(frame_size) + spill_regs.size() * 8 + - sizeof(StackReference<mirror::ArtMethod>) + 8; - str << "addq $" << displacement << ", %rsp\n"; - // 3) Make space for method reference, and store it. - str << "subq $4, %rsp\n"; + ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8); + str << "subq $" << displacement << ", %rsp\n"; + // 3) Store method reference. str << "movl %edi, (%rsp)\n"; // 4) Entry spills. str << "movq %rax, " << frame_size + 0 << "(%rsp)\n"; diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h index bff8501..05d0ef8 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.h +++ b/runtime/arch/x86_64/asm_support_x86_64.h @@ -35,9 +35,9 @@ // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu #define THREAD_ID_OFFSET 12 -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8 // Expected size of a heap reference #define HEAP_REFERENCE_SIZE 4 diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index e1f47ee..7699eaf 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -78,6 +78,18 @@ void X86_64Context::SmashCallerSaves() { gprs_[R9] = nullptr; gprs_[R10] = nullptr; gprs_[R11] = nullptr; + fprs_[XMM0] = nullptr; + fprs_[XMM1] = nullptr; + fprs_[XMM2] = nullptr; + fprs_[XMM3] = nullptr; + fprs_[XMM4] = nullptr; + fprs_[XMM5] = nullptr; + fprs_[XMM6] = nullptr; + fprs_[XMM7] = nullptr; + fprs_[XMM8] = nullptr; + fprs_[XMM9] = nullptr; + fprs_[XMM10] = nullptr; + fprs_[XMM11] = nullptr; } bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) { @@ -102,41 +114,26 @@ bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) { } } +extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*); + void X86_64Context::DoLongJump() { #if defined(__x86_64__) - // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at - // the top for the stack pointer that doesn't get popped in a pop-all. - volatile uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t fprs[kNumberOfFloatRegisters]; + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i; } + for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { + fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i; + } + // We want to load the stack pointer one slot below so that the ret will pop eip. uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize; gprs[kNumberOfCpuRegisters] = rsp; *(reinterpret_cast<uintptr_t*>(rsp)) = rip_; - __asm__ __volatile__( - "movq %0, %%rsp\n\t" // RSP points to gprs. - "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs. - "popq %%r14\n\t" - "popq %%r13\n\t" - "popq %%r12\n\t" - "popq %%r11\n\t" - "popq %%r10\n\t" - "popq %%r9\n\t" - "popq %%r8\n\t" - "popq %%rdi\n\t" - "popq %%rsi\n\t" - "popq %%rbp\n\t" - "addq $8, %%rsp\n\t" - "popq %%rbx\n\t" - "popq %%rdx\n\t" - "popq %%rcx\n\t" - "popq %%rax\n\t" - "popq %%rsp\n\t" // Load stack pointer. - "ret\n\t" // From higher in the stack pop rip. - : // output. - : "g"(&gprs[0]) // input. - :); // clobber. + + art_quick_do_long_jump(gprs, fprs); #else UNIMPLEMENTED(FATAL); #endif diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 609d1c6..204d52c 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -35,7 +35,7 @@ extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*); extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*); // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, +extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass, const mirror::Class* ref_class); extern "C" void art_quick_check_cast(void*, void*); @@ -129,7 +129,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, ResetQuickAllocEntryPoints(qpoints); // Cast - qpoints->pInstanceofNonTrivial = artIsAssignableFromCode; + qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code; qpoints->pCheckCast = art_quick_check_cast; // DexCache diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index d668797..f6736df 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -28,8 +28,8 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub PUSH rdx // Arg. PUSH rcx // Arg. // Create space for FPR args, plus padding for alignment - subq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(72) + subq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(72 + 4 * 8) // Save FPRs. movq %xmm0, 0(%rsp) movq %xmm1, 8(%rsp) @@ -39,6 +39,10 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq %xmm5, 40(%rsp) movq %xmm6, 48(%rsp) movq %xmm7, 56(%rsp) + movq %xmm12, 64(%rsp) + movq %xmm13, 72(%rsp) + movq %xmm14, 80(%rsp) + movq %xmm15, 88(%rsp) // prepare call movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current() // call @@ -52,8 +56,12 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq 40(%rsp), %xmm5 movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 - addq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(-72) + movq 64(%rsp), %xmm12 + movq 72(%rsp), %xmm13 + movq 80(%rsp), %xmm14 + movq 88(%rsp), %xmm15 + addq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8) POP rcx // Arg. POP rdx // Arg. POP rsi // Arg. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8fa947c..7f7226c 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -16,6 +16,26 @@ #include "asm_support_x86_64.S" +MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) + // Create space for ART FP callee-saved registers + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) +END_MACRO + +MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME) + // Restore ART FP callee-saved registers + movq 0(%rsp), %xmm12 + movq 8(%rsp), %xmm13 + movq 16(%rsp), %xmm14 + movq 24(%rsp), %xmm15 + addq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(- 4 * 8) +END_MACRO + // For x86, the CFA is esp+4, the address above the pushed return address on the stack. /* @@ -37,6 +57,14 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. + // Create space for FPR args, plus padding for alignment + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + // Save FPRs. + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). CFI_ADJUST_CFA_OFFSET(8) // R10 := ArtMethod* for save all callee save frame method. @@ -46,7 +74,7 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -71,8 +99,14 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. - subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). - CFI_ADJUST_CFA_OFFSET(8) + // Create space for FPR args, plus padding for alignment + subq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(8 + 4*8) + // Save FPRs. + movq %xmm12, 8(%rsp) + movq %xmm13, 16(%rsp) + movq %xmm14, 24(%rsp) + movq %xmm15, 32(%rsp) // R10 := ArtMethod* for refs only callee save frame method. movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Store ArtMethod* to bottom of stack. @@ -80,15 +114,19 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ END_MACRO MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME) - addq MACRO_LITERAL(8), %rsp - CFI_ADJUST_CFA_OFFSET(-8) + movq 8(%rsp), %xmm12 + movq 16(%rsp), %xmm13 + movq 24(%rsp), %xmm14 + movq 32(%rsp), %xmm15 + addq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-8 - 4*8) // TODO: optimize by not restoring callee-saves restored by the ABI POP rbx POP rbp @@ -123,8 +161,8 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Save FPRs. @@ -136,12 +174,16 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store ArtMethod* to bottom of stack. movq %r10, 0(%rsp) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -157,8 +199,12 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - addq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + addq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) // Restore callee and GPR args, mixed together to agree with core spills bitmap. POP rcx POP rdx @@ -536,6 +582,58 @@ DEFINE_FUNCTION art_quick_invoke_static_stub #endif // __APPLE__ END_FUNCTION art_quick_invoke_static_stub + /* + * Long jump stub. + * On entry: + * rdi = gprs + * rsi = fprs + */ +DEFINE_FUNCTION art_quick_do_long_jump +#if defined(__APPLE__) + int3 + int3 +#else + // Restore FPRs. + movq 0(%rsi), %xmm0 + movq 8(%rsi), %xmm1 + movq 16(%rsi), %xmm2 + movq 24(%rsi), %xmm3 + movq 32(%rsi), %xmm4 + movq 40(%rsi), %xmm5 + movq 48(%rsi), %xmm6 + movq 56(%rsi), %xmm7 + movq 64(%rsi), %xmm8 + movq 72(%rsi), %xmm9 + movq 80(%rsi), %xmm10 + movq 88(%rsi), %xmm11 + movq 96(%rsi), %xmm12 + movq 104(%rsi), %xmm13 + movq 112(%rsi), %xmm14 + movq 120(%rsi), %xmm15 + // Restore FPRs. + movq %rdi, %rsp // RSP points to gprs. + // Load all registers except RSP and RIP with values in gprs. + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rdi + popq %rsi + popq %rbp + addq LITERAL(8), %rsp // Skip rsp + popq %rbx + popq %rdx + popq %rcx + popq %rax + popq %rsp // Load stack pointer. + ret // From higher in the stack pop rip. +#endif // __APPLE__ +END_FUNCTION art_quick_do_long_jump + MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION VAR(c_name, 0) SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC @@ -820,13 +918,17 @@ END_FUNCTION art_quick_unlock_object DEFINE_FUNCTION art_quick_check_cast PUSH rdi // Save args for exc PUSH rsi + SETUP_FP_CALLEE_SAVE_FRAME call PLT_SYMBOL(artIsAssignableFromCode) // (Class* klass, Class* ref_klass) testq %rax, %rax jz 1f // jump forward if not assignable + RESTORE_FP_CALLEE_SAVE_FRAME addq LITERAL(16), %rsp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) + ret 1: + RESTORE_FP_CALLEE_SAVE_FRAME POP rsi // Pop arguments POP rdi SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context @@ -907,6 +1009,7 @@ DEFINE_FUNCTION art_quick_aput_obj PUSH rdx subq LITERAL(8), %rsp // Alignment padding. CFI_ADJUST_CFA_OFFSET(8) + SETUP_FP_CALLEE_SAVE_FRAME // "Uncompress" = do nothing, as already zero-extended on load. movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class. @@ -918,6 +1021,7 @@ DEFINE_FUNCTION art_quick_aput_obj testq %rax, %rax jz .Lthrow_array_store_exception + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -934,6 +1038,7 @@ DEFINE_FUNCTION art_quick_aput_obj // movb %dl, (%rdx, %rdi) ret .Lthrow_array_store_exception: + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -1012,8 +1117,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1023,14 +1128,18 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store proxy method to bottom of stack. movq %rdi, 0(%rsp) movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current(). movq %rsp, %rcx // Pass SP. call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP) movq %rax, %xmm0 // Copy return value in case of float returns. - addq LITERAL(168), %rsp // Pop arguments. - CFI_ADJUST_CFA_OFFSET(-168) + addq LITERAL(168 + 4*8), %rsp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-168 - 4*8) RETURN_OR_DELIVER_PENDING_EXCEPTION END_FUNCTION art_quick_proxy_invoke_handler @@ -1156,8 +1265,8 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1167,6 +1276,10 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack. movq %rsp, %rbp // save SP at (old) callee-save frame CFI_DEF_CFA_REGISTER(rbp) @@ -1260,9 +1373,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1292,9 +1409,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + // was 80 + 32 bytes + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1450,3 +1571,10 @@ DEFINE_FUNCTION art_quick_string_compareto END_FUNCTION art_quick_string_compareto UNIMPLEMENTED art_quick_memcmp16 + +DEFINE_FUNCTION art_quick_assignable_from_code + SETUP_FP_CALLEE_SAVE_FRAME + call PLT_SYMBOL(artIsAssignableFromCode) // (const mirror::Class*, const mirror::Class*) + RESTORE_FP_CALLEE_SAVE_FRAME + ret +END_FUNCTION art_quick_assignable_from_code diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h index 6183909..53aa212 100644 --- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h +++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h @@ -34,6 +34,9 @@ static constexpr uint32_t kX86_64CalleeSaveFpArgSpills = (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) | (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7); +static constexpr uint32_t kX86_64CalleeSaveFpSpills = + (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) | + (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15); constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kX86_64CalleeSaveRefSpills | @@ -42,7 +45,8 @@ constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { } constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); + return kX86_64CalleeSaveFpSpills | + (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); } constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc index 38f3494..f29c426 100644 --- a/runtime/arch/x86_64/registers_x86_64.cc +++ b/runtime/arch/x86_64/registers_x86_64.cc @@ -34,5 +34,14 @@ std::ostream& operator<<(std::ostream& os, const Register& rhs) { return os; } +std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) { + if (rhs >= XMM0 && rhs <= XMM15) { + os << "xmm" << static_cast<int>(rhs); + } else { + os << "Register[" << static_cast<int>(rhs) << "]"; + } + return os; +} + } // namespace x86_64 } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 95cb85e..2a66f2f 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -175,8 +175,8 @@ class QuickArgumentVisitor { static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80 + 4*8; // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168 + 4*8; // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { switch (gpr_index) { case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA)); |