diff options
23 files changed, 1121 insertions, 273 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index c6c5ca7..beeb3ad 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -60,6 +60,14 @@ enum SpecialTargetRegister { kFArg5, kFArg6, kFArg7, + kFArg8, + kFArg9, + kFArg10, + kFArg11, + kFArg12, + kFArg13, + kFArg14, + kFArg15, kRet0, kRet1, kInvokeTgt, diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index d935bc3..36cb7a4 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -297,19 +297,20 @@ constexpr RegStorage rs_dr30(RegStorage::kValid | dr30); constexpr RegStorage rs_dr31(RegStorage::kValid | dr31); #endif -// RegisterLocation templates return values (r0, or r0/r1). -const RegLocation arm_loc_c_return - {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_wide +// RegisterLocation templates return values (r0, r0/r1, s0, or d0). +// Note: The return locations are shared between quick code and quick helper. This follows quick +// ABI. Quick helper assembly routine needs to handle the ABI differences. +const RegLocation arm_loc_c_return = + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_wide = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_float - {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_double - {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; + RegStorage::MakeRegPair(rs_r0, rs_r1), INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_float = kArm32QuickCodeUseSoftFloat + ? arm_loc_c_return + : RegLocation({kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_fr0, INVALID_SREG, INVALID_SREG}); +const RegLocation arm_loc_c_return_double = kArm32QuickCodeUseSoftFloat + ? arm_loc_c_return_wide + : RegLocation({kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_dr0, INVALID_SREG, INVALID_SREG}); enum ArmShiftEncodings { kArmLsl = 0x0, diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 6fd29f2..442c4fc 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -25,6 +25,64 @@ namespace art { class ArmMir2Lir FINAL : public Mir2Lir { + protected: + // TODO: Consolidate hard float target support. + // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends. + // Base class used to get RegStorage for next argument. + class InToRegStorageMapper { + public: + virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0; + virtual ~InToRegStorageMapper() { + } + }; + + // Inherited class for ARM backend. + class InToRegStorageArmMapper FINAL : public InToRegStorageMapper { + public: + InToRegStorageArmMapper() + : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) { + } + + virtual ~InToRegStorageArmMapper() { + } + + RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE; + + private: + uint32_t cur_core_reg_; + uint32_t cur_fp_reg_; + uint32_t cur_fp_double_reg_; + }; + + // Class to map argument to RegStorage. The mapping object is initialized by a mapper. + class InToRegStorageMapping FINAL { + public: + InToRegStorageMapping() + : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) { + } + + int GetMaxMappedIn() const { + return max_mapped_in_; + } + + bool IsThereStackMapped() const { + return is_there_stack_mapped_; + } + + bool IsInitialized() const { + return initialized_; + } + + void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); + RegStorage Get(int in_position) const; + + private: + std::map<int, RegStorage> mapping_; + int max_mapped_in_; + bool is_there_stack_mapped_; + bool initialized_; + }; + public: ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -47,15 +105,30 @@ class ArmMir2Lir FINAL : public Mir2Lir { void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg); // Required for target - register utilities. - RegStorage TargetReg(SpecialTargetRegister reg); - RegStorage GetArgMappingToPhysicalReg(int arg_num); - RegLocation GetReturnAlt(); - RegLocation GetReturnWideAlt(); - RegLocation LocCReturn(); - RegLocation LocCReturnRef(); - RegLocation LocCReturnDouble(); - RegLocation LocCReturnFloat(); - RegLocation LocCReturnWide(); + RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE; + RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE { + if (wide_kind == kWide) { + DCHECK((kArg0 <= reg && reg < kArg3) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg)); + RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg), + TargetReg(static_cast<SpecialTargetRegister>(reg + 1))); + if (ret_reg.IsFloat()) { + // Regard double as double, be consistent with register allocation. + ret_reg = As64BitFloatReg(ret_reg); + } + return ret_reg; + } else { + return TargetReg(reg); + } + } + + RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE; + RegLocation GetReturnAlt() OVERRIDE; + RegLocation GetReturnWideAlt() OVERRIDE; + RegLocation LocCReturn() OVERRIDE; + RegLocation LocCReturnRef() OVERRIDE; + RegLocation LocCReturnDouble() OVERRIDE; + RegLocation LocCReturnFloat() OVERRIDE; + RegLocation LocCReturnWide() OVERRIDE; ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); @@ -210,6 +283,19 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; size_t GetInstructionOffset(LIR* lir); + int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this) OVERRIDE; + int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this) OVERRIDE; + private: void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -226,10 +312,10 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) OVERRIDE; RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE; - typedef struct { + struct EasyMultiplyOp { OpKind op; uint32_t shift; - } EasyMultiplyOp; + }; bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op); bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); @@ -239,6 +325,36 @@ class ArmMir2Lir FINAL : public Mir2Lir { static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list); ArenaVector<LIR*> call_method_insns_; + + /** + * @brief Given float register pair, returns Solo64 float register. + * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3). + * @return A Solo64 float mapping to the register pair (e.g. @c d1). + */ + static RegStorage As64BitFloatReg(RegStorage reg) { + DCHECK(reg.IsFloat()); + + RegStorage low = reg.GetLow(); + RegStorage high = reg.GetHigh(); + DCHECK((low.GetRegNum() % 2 == 0) && (low.GetRegNum() + 1 == high.GetRegNum())); + + return RegStorage::FloatSolo64(low.GetRegNum() / 2); + } + + /** + * @brief Given Solo64 float register, returns float register pair. + * @param reg #RegStorage containing a Solo64 float register (e.g. @c d1). + * @return A float register pair mapping to the Solo64 float pair (e.g. @c s2 and s3). + */ + static RegStorage As64BitFloatRegPair(RegStorage reg) { + DCHECK(reg.IsDouble() && reg.Is64BitSolo()); + + int reg_num = reg.GetRegNum(); + return RegStorage::MakeRegPair(RegStorage::FloatSolo32(reg_num * 2), + RegStorage::FloatSolo32(reg_num * 2 + 1)); + } + + InToRegStorageMapping in_to_reg_storage_mapping_; }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 9742243..8e08f5f 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -442,6 +442,15 @@ void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { bool src_fp = r_src.IsFloat(); DCHECK(r_dest.Is64Bit()); DCHECK(r_src.Is64Bit()); + // Note: If the register is get by register allocator, it should never be a pair. + // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs. + // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect(). + if (dest_fp && r_dest.IsPair()) { + r_dest = As64BitFloatReg(r_dest); + } + if (src_fp && r_src.IsPair()) { + r_src = As64BitFloatReg(r_src); + } if (dest_fp) { if (src_fp) { OpRegCopy(r_dest, r_src); diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index dd8f7fe..7100a28 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -89,7 +89,7 @@ RegLocation ArmMir2Lir::LocCReturnDouble() { // Return a target-dependent special register. RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { - RegStorage res_reg = RegStorage::InvalidReg(); + RegStorage res_reg; switch (reg) { case kSelf: res_reg = rs_rARM_SELF; break; #ifdef ARM_R4_SUSPEND_FLAG @@ -104,10 +104,22 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { case kArg1: res_reg = rs_r1; break; case kArg2: res_reg = rs_r2; break; case kArg3: res_reg = rs_r3; break; - case kFArg0: res_reg = rs_r0; break; - case kFArg1: res_reg = rs_r1; break; - case kFArg2: res_reg = rs_r2; break; - case kFArg3: res_reg = rs_r3; break; + case kFArg0: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r0 : rs_fr0; break; + case kFArg1: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r1 : rs_fr1; break; + case kFArg2: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r2 : rs_fr2; break; + case kFArg3: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r3 : rs_fr3; break; + case kFArg4: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr4; break; + case kFArg5: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr5; break; + case kFArg6: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr6; break; + case kFArg7: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr7; break; + case kFArg8: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr8; break; + case kFArg9: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr9; break; + case kFArg10: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr10; break; + case kFArg11: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr11; break; + case kFArg12: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr12; break; + case kFArg13: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr13; break; + case kFArg14: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr14; break; + case kFArg15: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr15; break; case kRet0: res_reg = rs_r0; break; case kRet1: res_reg = rs_r1; break; case kInvokeTgt: res_reg = rs_rARM_LR; break; @@ -119,20 +131,6 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { return res_reg; } -RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - switch (arg_num) { - case 0: - return rs_r1; - case 1: - return rs_r2; - case 2: - return rs_r3; - default: - return RegStorage::InvalidReg(); - } -} - /* * Decode the register id. */ @@ -718,6 +716,32 @@ void ArmMir2Lir::LockCallTemps() { LockTemp(rs_r1); LockTemp(rs_r2); LockTemp(rs_r3); + if (!kArm32QuickCodeUseSoftFloat) { + LockTemp(rs_fr0); + LockTemp(rs_fr1); + LockTemp(rs_fr2); + LockTemp(rs_fr3); + LockTemp(rs_fr4); + LockTemp(rs_fr5); + LockTemp(rs_fr6); + LockTemp(rs_fr7); + LockTemp(rs_fr8); + LockTemp(rs_fr9); + LockTemp(rs_fr10); + LockTemp(rs_fr11); + LockTemp(rs_fr12); + LockTemp(rs_fr13); + LockTemp(rs_fr14); + LockTemp(rs_fr15); + LockTemp(rs_dr0); + LockTemp(rs_dr1); + LockTemp(rs_dr2); + LockTemp(rs_dr3); + LockTemp(rs_dr4); + LockTemp(rs_dr5); + LockTemp(rs_dr6); + LockTemp(rs_dr7); + } } /* To be used when explicitly managing register use */ @@ -726,6 +750,32 @@ void ArmMir2Lir::FreeCallTemps() { FreeTemp(rs_r1); FreeTemp(rs_r2); FreeTemp(rs_r3); + if (!kArm32QuickCodeUseSoftFloat) { + FreeTemp(rs_fr0); + FreeTemp(rs_fr1); + FreeTemp(rs_fr2); + FreeTemp(rs_fr3); + FreeTemp(rs_fr4); + FreeTemp(rs_fr5); + FreeTemp(rs_fr6); + FreeTemp(rs_fr7); + FreeTemp(rs_fr8); + FreeTemp(rs_fr9); + FreeTemp(rs_fr10); + FreeTemp(rs_fr11); + FreeTemp(rs_fr12); + FreeTemp(rs_fr13); + FreeTemp(rs_fr14); + FreeTemp(rs_fr15); + FreeTemp(rs_dr0); + FreeTemp(rs_dr1); + FreeTemp(rs_dr2); + FreeTemp(rs_dr3); + FreeTemp(rs_dr4); + FreeTemp(rs_dr5); + FreeTemp(rs_dr6); + FreeTemp(rs_dr7); + } } RegStorage ArmMir2Lir::LoadHelper(QuickEntrypointEnum trampoline) { @@ -847,4 +897,313 @@ void ArmMir2Lir::InstallLiteralPools() { Mir2Lir::InstallLiteralPools(); } +RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) { + const RegStorage coreArgMappingToPhysicalReg[] = + {rs_r1, rs_r2, rs_r3}; + const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); + const RegStorage fpArgMappingToPhysicalReg[] = + {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, + rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15}; + const uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg); + COMPILE_ASSERT(fpArgMappingToPhysicalRegSize % 2 == 0, knum_of_fp_arg_regs_not_even); + + if (kArm32QuickCodeUseSoftFloat) { + is_double_or_float = false; // Regard double as long, float as int. + is_wide = false; // Map long separately. + } + + RegStorage result = RegStorage::InvalidReg(); + if (is_double_or_float) { + // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double. + if (is_wide || cur_fp_double_reg_ % 2 != 0) { + cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2)); + if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) { + // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers. + // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_], + // fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]); + // result = As64BitFloatReg(result); + // cur_fp_double_reg_ += 2; + result = fpArgMappingToPhysicalReg[cur_fp_double_reg_]; + cur_fp_double_reg_++; + } + } else { + // TODO: Remove the check when we return double as double. + DCHECK_EQ(cur_fp_double_reg_ % 2, 0U); + if (cur_fp_reg_ % 2 == 0) { + cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_); + } + if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { + result = fpArgMappingToPhysicalReg[cur_fp_reg_]; + cur_fp_reg_++; + } + } + } else { + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = coreArgMappingToPhysicalReg[cur_core_reg_++]; + // TODO: Enable following code when FlushIns() support 64-bit registers. + // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + // result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]); + // } + } + } + return result; +} + +RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const { + DCHECK(IsInitialized()); + auto res = mapping_.find(in_position); + return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); +} + +void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, + InToRegStorageMapper* mapper) { + DCHECK(mapper != nullptr); + max_mapped_in_ = -1; + is_there_stack_mapped_ = false; + for (int in_position = 0; in_position < count; in_position++) { + RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, + arg_locs[in_position].wide); + if (reg.Valid()) { + mapping_[in_position] = reg; + // TODO: Enable the following code when FlushIns() support 64-bit argument registers. + // if (arg_locs[in_position].wide) { + // if (reg.Is32Bit()) { + // // As it is a split long, the hi-part is on stack. + // is_there_stack_mapped_ = true; + // } + // // We covered 2 v-registers, so skip the next one + // in_position++; + // } + max_mapped_in_ = std::max(max_mapped_in_, in_position); + } else { + is_there_stack_mapped_ = true; + } + } + initialized_ = true; +} + +// TODO: Should be able to return long, double registers. +// Need check some common code as it will break some assumption. +RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { + if (!in_to_reg_storage_mapping_.IsInitialized()) { + int start_vreg = mir_graph_->GetFirstInVR(); + RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; + + InToRegStorageArmMapper mapper; + in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper); + } + return in_to_reg_storage_mapping_.Get(arg_num); +} + +int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info, + int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) { + if (kArm32QuickCodeUseSoftFloat) { + return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method, + vtable_idx, direct_code, direct_method, type, skip_this); + } else { + return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx, + direct_code, direct_method, type, skip_this); + } +} + +int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, + LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) { + if (kArm32QuickCodeUseSoftFloat) { + return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, + vtable_idx, direct_code, direct_method, type, skip_this); + } + + // TODO: Rework the implementation when argument register can be long or double. + + /* If no arguments, just return */ + if (info->num_arg_words == 0) { + return call_state; + } + + const int start_index = skip_this ? 1 : 0; + + InToRegStorageArmMapper mapper; + InToRegStorageMapping in_to_reg_storage_mapping; + in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); + const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); + int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1); + + // First of all, check whether it makes sense to use bulk copying. + // Bulk copying is done only for the range case. + // TODO: make a constant instead of 2 + if (info->is_range && regs_left_to_pass_via_stack >= 2) { + // Scan the rest of the args - if in phys_reg flush to memory + for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) { + RegLocation loc = info->args[next_arg]; + if (loc.wide) { + // TODO: Only flush hi-part. + if (loc.high_word) { + loc = info->args[--next_arg]; + } + loc = UpdateLocWide(loc); + if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); + } + next_arg += 2; + } else { + loc = UpdateLoc(loc); + if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (loc.ref) { + StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, + kNotVolatile); + } + } + next_arg++; + } + } + + // The rest can be copied together + int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low); + int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1, + cu_->instruction_set); + + int current_src_offset = start_offset; + int current_dest_offset = outs_offset; + + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + while (regs_left_to_pass_via_stack > 0) { + /* + * TODO: Improve by adding block copy for large number of arguments. This + * should be done, if possible, as a target-depending helper. For now, just + * copy a Dalvik vreg at a time. + */ + // Moving 32-bits via general purpose register. + size_t bytes_to_move = sizeof(uint32_t); + + // Instead of allocating a new temp, simply reuse one of the registers being used + // for argument passing. + RegStorage temp = TargetReg(kArg3, kNotWide); + + // Now load the argument VR and store to the outs. + Load32Disp(TargetPtrReg(kSp), current_src_offset, temp); + Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp); + + current_src_offset += bytes_to_move; + current_dest_offset += bytes_to_move; + regs_left_to_pass_via_stack -= (bytes_to_move >> 2); + } + DCHECK_EQ(regs_left_to_pass_via_stack, 0); + } + + // Now handle rest not registers if they are + if (in_to_reg_storage_mapping.IsThereStackMapped()) { + RegStorage regWide = TargetReg(kArg2, kWide); + for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + // TODO: Only pass split wide hi-part via stack. + if (!reg.Valid() || rl_arg.wide) { + int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile); + } + } else { + if (rl_arg.location == kLocPhysReg) { + if (rl_arg.ref) { + StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); + } + } else { + if (rl_arg.ref) { + RegStorage regSingle = TargetReg(kArg2, kRef); + LoadValueDirectFixed(rl_arg, regSingle); + StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile); + } else { + RegStorage regSingle = TargetReg(kArg2, kNotWide); + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile); + } + } + } + } + + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + if (rl_arg.wide) { + i++; + } + } + } + + // Finish with mapped registers + for (int i = start_index; i <= last_mapped_in; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (reg.Valid()) { + if (reg.Is64Bit()) { + LoadValueDirectWideFixed(rl_arg, reg); + } else { + // TODO: Only split long should be the case we need to care about. + if (rl_arg.wide) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + int high_word = rl_arg.high_word ? 1 : 0; + rl_arg = high_word ? info->args[i - 1] : rl_arg; + if (rl_arg.location == kLocPhysReg) { + RegStorage rs_arg = rl_arg.reg; + if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) { + rs_arg = As64BitFloatRegPair(rs_arg); + } + RegStorage rs_arg_low = rs_arg.GetLow(); + RegStorage rs_arg_high = rs_arg.GetHigh(); + OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low); + } else { + Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg); + } + } else { + LoadValueDirectFixed(rl_arg, reg); + } + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + if (reg.Is64Bit()) { + i++; + } + } + + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + if (pcrLabel) { + if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + Load32Disp(TargetReg(kArg1, kRef), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } + } + return call_state; +} + } // namespace art diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 09acf4c..ce2de65 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -1007,6 +1007,12 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora // Intentional fall-though. case k64: if (r_src.IsFloat()) { + // Note: If the register is retrieved by register allocator, it should never be a pair. + // But some functions in mir2lir assume 64-bit registers are 32-bit register pairs. + // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect(). + if (r_src.IsPair()) { + r_src = As64BitFloatReg(r_src); + } DCHECK(!r_src.IsPair()); store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src); } else { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2bef7c5..bc4d00b 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -248,13 +248,13 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(QuickEntrypointEnum trampo if (cu_->instruction_set == kMips) { LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg1, kNotWide)); } else { - LoadValueDirectFixed(arg1, TargetReg(kArg1, kNotWide)); + LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kNotWide)); } } else { if (cu_->instruction_set == kMips) { LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kWide)); } else { - LoadValueDirectWideFixed(arg1, TargetReg(kArg1, kWide)); + LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kWide)); } } } else { @@ -365,6 +365,7 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation( * ArgLocs is an array of location records describing the incoming arguments * with one location record per word of argument. */ +// TODO: Support 64-bit argument registers. void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { /* * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod> diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 3e0844b..f4e6dfe 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -1191,13 +1191,17 @@ class Mir2Lir : public Backend { */ virtual RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) { if (wide_kind == kWide) { - DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg7) || (kRet0 == reg)); + DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg)); COMPILE_ASSERT((kArg1 == kArg0 + 1) && (kArg2 == kArg1 + 1) && (kArg3 == kArg2 + 1) && (kArg4 == kArg3 + 1) && (kArg5 == kArg4 + 1) && (kArg6 == kArg5 + 1) && (kArg7 == kArg6 + 1), kargs_range_unexpected); COMPILE_ASSERT((kFArg1 == kFArg0 + 1) && (kFArg2 == kFArg1 + 1) && (kFArg3 == kFArg2 + 1) && (kFArg4 == kFArg3 + 1) && (kFArg5 == kFArg4 + 1) && (kFArg6 == kFArg5 + 1) && - (kFArg7 == kFArg6 + 1), kfargs_range_unexpected); + (kFArg7 == kFArg6 + 1) && (kFArg8 == kFArg7 + 1) && (kFArg9 == kFArg8 + 1) && + (kFArg10 == kFArg9 + 1) && (kFArg11 == kFArg10 + 1) && + (kFArg12 == kFArg11 + 1) && (kFArg13 == kFArg12 + 1) && + (kFArg14 == kFArg13 + 1) && (kFArg15 == kFArg14 + 1), + kfargs_range_unexpected); COMPILE_ASSERT(kRet1 == kRet0 + 1, kret_range_unexpected); return RegStorage::MakeRegPair(TargetReg(reg), TargetReg(static_cast<SpecialTargetRegister>(reg + 1))); diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 6f2a647..8f7bd30 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -425,6 +425,21 @@ static int kAllOpcodes[] = { kMirOpSelect, }; +static int kInvokeOpcodes[] = { + Instruction::INVOKE_VIRTUAL, + Instruction::INVOKE_SUPER, + Instruction::INVOKE_DIRECT, + Instruction::INVOKE_STATIC, + Instruction::INVOKE_INTERFACE, + Instruction::INVOKE_VIRTUAL_RANGE, + Instruction::INVOKE_SUPER_RANGE, + Instruction::INVOKE_DIRECT_RANGE, + Instruction::INVOKE_STATIC_RANGE, + Instruction::INVOKE_INTERFACE_RANGE, + Instruction::INVOKE_VIRTUAL_QUICK, + Instruction::INVOKE_VIRTUAL_RANGE_QUICK, +}; + // Unsupported opcodes. nullptr can be used when everything is supported. Size of the lists is // recorded below. static const int* kUnsupportedOpcodes[] = { @@ -523,8 +538,8 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; // Check if we support the byte code. - if (std::find(unsupport_list, unsupport_list + unsupport_list_size, - opcode) != unsupport_list + unsupport_list_size) { + if (std::find(unsupport_list, unsupport_list + unsupport_list_size, opcode) + != unsupport_list + unsupport_list_size) { if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { VLOG(compiler) << "Unsupported dalvik byte code : " << mir->dalvikInsn.opcode; @@ -535,11 +550,8 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil return false; } // Check if it invokes a prototype that we cannot support. - if (Instruction::INVOKE_VIRTUAL == opcode || - Instruction::INVOKE_SUPER == opcode || - Instruction::INVOKE_DIRECT == opcode || - Instruction::INVOKE_STATIC == opcode || - Instruction::INVOKE_INTERFACE == opcode) { + if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode) + != kInvokeOpcodes + arraysize(kInvokeOpcodes)) { uint32_t invoke_method_idx = mir->dalvikInsn.vB; const char* invoke_method_shorty = dex_file.GetMethodShorty( dex_file.GetMethodId(invoke_method_idx)); diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index f0c0ed7..9545896 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -21,6 +21,22 @@ namespace art { namespace arm { +// Used by hard float. +static const Register kHFCoreArgumentRegisters[] = { + R0, R1, R2, R3 +}; + +static const SRegister kHFSArgumentRegisters[] = { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 +}; + +static const DRegister kHFDArgumentRegisters[] = { + D0, D1, D2, D3, D4, D5, D6, D7 +}; + +COMPILE_ASSERT(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters), + ks_d_argument_registers_mismatch); + // Calling convention ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() { @@ -31,26 +47,43 @@ ManagedRegister ArmJniCallingConvention::InterproceduralScratchRegister() { return ArmManagedRegister::FromCoreRegister(IP); // R12 } -static ManagedRegister ReturnRegisterForShorty(const char* shorty) { - if (shorty[0] == 'F') { - return ArmManagedRegister::FromCoreRegister(R0); - } else if (shorty[0] == 'D') { - return ArmManagedRegister::FromRegisterPair(R0_R1); - } else if (shorty[0] == 'J') { - return ArmManagedRegister::FromRegisterPair(R0_R1); - } else if (shorty[0] == 'V') { - return ArmManagedRegister::NoRegister(); +ManagedRegister ArmManagedRuntimeCallingConvention::ReturnRegister() { + if (kArm32QuickCodeUseSoftFloat) { + switch (GetShorty()[0]) { + case 'V': + return ArmManagedRegister::NoRegister(); + case 'D': + case 'J': + return ArmManagedRegister::FromRegisterPair(R0_R1); + default: + return ArmManagedRegister::FromCoreRegister(R0); + } } else { - return ArmManagedRegister::FromCoreRegister(R0); + switch (GetShorty()[0]) { + case 'V': + return ArmManagedRegister::NoRegister(); + case 'D': + return ArmManagedRegister::FromDRegister(D0); + case 'F': + return ArmManagedRegister::FromSRegister(S0); + case 'J': + return ArmManagedRegister::FromRegisterPair(R0_R1); + default: + return ArmManagedRegister::FromCoreRegister(R0); + } } } -ManagedRegister ArmManagedRuntimeCallingConvention::ReturnRegister() { - return ReturnRegisterForShorty(GetShorty()); -} - ManagedRegister ArmJniCallingConvention::ReturnRegister() { - return ReturnRegisterForShorty(GetShorty()); + switch (GetShorty()[0]) { + case 'V': + return ArmManagedRegister::NoRegister(); + case 'D': + case 'J': + return ArmManagedRegister::FromRegisterPair(R0_R1); + default: + return ArmManagedRegister::FromCoreRegister(R0); + } } ManagedRegister ArmJniCallingConvention::IntReturnRegister() { @@ -88,15 +121,68 @@ FrameOffset ArmManagedRuntimeCallingConvention::CurrentParamStackOffset() { const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpills() { // We spill the argument registers on ARM to free them up for scratch use, we then assume // all arguments are on the stack. - if (entry_spills_.size() == 0) { - size_t num_spills = NumArgs() + NumLongOrDoubleArgs(); - if (num_spills > 0) { - entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R1)); - if (num_spills > 1) { - entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R2)); - if (num_spills > 2) { - entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R3)); + if (kArm32QuickCodeUseSoftFloat) { + if (entry_spills_.size() == 0) { + size_t num_spills = NumArgs() + NumLongOrDoubleArgs(); + if (num_spills > 0) { + entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R1)); + if (num_spills > 1) { + entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R2)); + if (num_spills > 2) { + entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R3)); + } + } + } + } + } else { + if ((entry_spills_.size() == 0) && (NumArgs() > 0)) { + uint32_t gpr_index = 1; // R0 ~ R3. Reserve r0 for ArtMethod*. + uint32_t fpr_index = 0; // S0 ~ S15. + uint32_t fpr_double_index = 0; // D0 ~ D7. + + ResetIterator(FrameOffset(0)); + while (HasNext()) { + if (IsCurrentParamAFloatOrDouble()) { + if (IsCurrentParamADouble()) { // Double. + // Double should not overlap with float. + fpr_double_index = (std::max(fpr_double_index * 2, RoundUp(fpr_index, 2))) / 2; + if (fpr_double_index < arraysize(kHFDArgumentRegisters)) { + entry_spills_.push_back( + ArmManagedRegister::FromDRegister(kHFDArgumentRegisters[fpr_double_index++])); + } else { + entry_spills_.push_back(ManagedRegister::NoRegister(), 8); + } + } else { // Float. + // Float should not overlap with double. + if (fpr_index % 2 == 0) { + fpr_index = std::max(fpr_double_index * 2, fpr_index); + } + if (fpr_index < arraysize(kHFSArgumentRegisters)) { + entry_spills_.push_back( + ArmManagedRegister::FromSRegister(kHFSArgumentRegisters[fpr_index++])); + } else { + entry_spills_.push_back(ManagedRegister::NoRegister(), 4); + } + } + } else { + // FIXME: Pointer this returns as both reference and long. + if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long. + if (gpr_index < arraysize(kHFCoreArgumentRegisters)) { + entry_spills_.push_back( + ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++])); + } else { + entry_spills_.push_back(ManagedRegister::NoRegister(), 4); + } + } + // High part of long or 32-bit argument. + if (gpr_index < arraysize(kHFCoreArgumentRegisters)) { + entry_spills_.push_back( + ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++])); + } else { + entry_spills_.push_back(ManagedRegister::NoRegister(), 4); + } } + Next(); } } } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 80e9cdb..0555c00 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -233,23 +233,30 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite bool shouldOptimize = dex_compilation_unit.GetSymbol().find("00024reg_00024") != std::string::npos; + if (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat) { + uint32_t shorty_len; + const char* shorty = dex_compilation_unit.GetShorty(&shorty_len); + for (uint32_t i = 0; i < shorty_len; ++i) { + if (shorty[i] == 'D' || shorty[i] == 'F') { + CHECK(!shouldCompile) << "Hard float ARM32 parameters are not yet supported"; + return nullptr; + } + } + } + ArenaPool pool; ArenaAllocator arena(&pool); HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file, GetCompilerDriver()); HGraph* graph = builder.BuildGraph(*code_item); if (graph == nullptr) { - if (shouldCompile) { - LOG(FATAL) << "Could not build graph in optimizing compiler"; - } + CHECK(!shouldCompile) << "Could not build graph in optimizing compiler"; return nullptr; } CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); if (codegen == nullptr) { - if (shouldCompile) { - LOG(FATAL) << "Could not find code generator for optimizing compiler"; - } + CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; return nullptr; } @@ -305,7 +312,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite stack_map); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; - return nullptr; + UNREACHABLE(); } else { unoptimized_compiled_methods_++; codegen->CompileBaseline(&allocator); diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index b430c7e..75bab82 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -417,9 +417,23 @@ void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, StoreToOffset(kStoreWord, R0, SP, 0); // Write out entry spills. + int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>); for (size_t i = 0; i < entry_spills.size(); ++i) { - Register reg = entry_spills.at(i).AsArm().AsCoreRegister(); - StoreToOffset(kStoreWord, reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize)); + ArmManagedRegister reg = entry_spills.at(i).AsArm(); + if (reg.IsNoRegister()) { + // only increment stack offset. + ManagedRegisterSpill spill = entry_spills.at(i); + offset += spill.getSize(); + } else if (reg.IsCoreRegister()) { + StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); + offset += 4; + } else if (reg.IsSRegister()) { + StoreSToOffset(reg.AsSRegister(), SP, offset); + offset += 4; + } else if (reg.IsDRegister()) { + StoreDToOffset(reg.AsDRegister(), SP, offset); + offset += 8; + } } } diff --git a/runtime/Android.mk b/runtime/Android.mk index 0ef0fef..6f6dcbc 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -222,6 +222,7 @@ LIBART_TARGET_SRC_FILES_arm := \ arch/arm/memcmp16_arm.S \ arch/arm/portable_entrypoints_arm.S \ arch/arm/quick_entrypoints_arm.S \ + arch/arm/quick_entrypoints_cc_arm.cc \ arch/arm/thread_arm.cc \ arch/arm/fault_handler_arm.cc diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index 5388cc0..8cd2a27 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -19,9 +19,9 @@ #include "asm_support.h" -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32 -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 48 +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112 // Flag for enabling R4 optimization in arm runtime #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 96ffc93..fd9c626 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -97,6 +97,23 @@ void ArmContext::SmashCallerSaves() { gprs_[R1] = const_cast<uint32_t*>(&gZero); gprs_[R2] = nullptr; gprs_[R3] = nullptr; + + fprs_[S0] = nullptr; + fprs_[S1] = nullptr; + fprs_[S2] = nullptr; + fprs_[S3] = nullptr; + fprs_[S4] = nullptr; + fprs_[S5] = nullptr; + fprs_[S6] = nullptr; + fprs_[S7] = nullptr; + fprs_[S8] = nullptr; + fprs_[S9] = nullptr; + fprs_[S10] = nullptr; + fprs_[S11] = nullptr; + fprs_[S12] = nullptr; + fprs_[S13] = nullptr; + fprs_[S14] = nullptr; + fprs_[S15] = nullptr; } extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*); diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index ff0eb4a..24e9b1d 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -77,23 +77,17 @@ extern "C" void art_quick_handle_fill_data(void*, void*); extern "C" void art_quick_lock_object(void*); extern "C" void art_quick_unlock_object(void*); -// Math entrypoints. -extern int32_t CmpgDouble(double a, double b); -extern int32_t CmplDouble(double a, double b); -extern int32_t CmpgFloat(float a, float b); -extern int32_t CmplFloat(float a, float b); - -// Math conversions. -extern "C" int32_t __aeabi_f2iz(float op1); // FLOAT_TO_INT -extern "C" int32_t __aeabi_d2iz(double op1); // DOUBLE_TO_INT -extern "C" float __aeabi_l2f(int64_t op1); // LONG_TO_FLOAT -extern "C" double __aeabi_l2d(int64_t op1); // LONG_TO_DOUBLE - +// Used by soft float. // Single-precision FP arithmetics. -extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] - +extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] // Double-precision FP arithmetics. -extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR] +extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR] + +// Used by hard float. +extern "C" int64_t art_quick_f2l(float f); // FLOAT_TO_LONG +extern "C" int64_t art_quick_d2l(double d); // DOUBLE_TO_LONG +extern "C" float art_quick_fmodf(float a, float b); // REM_FLOAT[_2ADDR] +extern "C" double art_quick_fmod(double a, double b); // REM_DOUBLE[_2ADDR] // Integer arithmetics. extern "C" int __aeabi_idivmod(int32_t, int32_t); // [DIV|REM]_INT[_2ADDR|_LIT8|_LIT16] @@ -205,25 +199,24 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, qpoints->pUnlockObject = art_quick_unlock_object; // Math - qpoints->pCmpgDouble = CmpgDouble; - qpoints->pCmpgFloat = CmpgFloat; - qpoints->pCmplDouble = CmplDouble; - qpoints->pCmplFloat = CmplFloat; - qpoints->pFmod = fmod; - qpoints->pL2d = __aeabi_l2d; - qpoints->pFmodf = fmodf; - qpoints->pL2f = __aeabi_l2f; - qpoints->pD2iz = __aeabi_d2iz; - qpoints->pF2iz = __aeabi_f2iz; qpoints->pIdivmod = __aeabi_idivmod; - qpoints->pD2l = art_d2l; - qpoints->pF2l = art_f2l; qpoints->pLdiv = __aeabi_ldivmod; qpoints->pLmod = __aeabi_ldivmod; // result returned in r2:r3 qpoints->pLmul = art_quick_mul_long; qpoints->pShlLong = art_quick_shl_long; qpoints->pShrLong = art_quick_shr_long; qpoints->pUshrLong = art_quick_ushr_long; + if (kArm32QuickCodeUseSoftFloat) { + qpoints->pFmod = fmod; + qpoints->pFmodf = fmodf; + qpoints->pD2l = art_d2l; + qpoints->pF2l = art_f2l; + } else { + qpoints->pFmod = art_quick_fmod; + qpoints->pFmodf = art_quick_fmodf; + qpoints->pD2l = art_quick_d2l; + qpoints->pF2l = art_quick_f2l; + } // Intrinsics qpoints->pIndexOf = art_quick_indexof; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index aae0c94..632b414 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -40,10 +40,10 @@ .cfi_rel_offset r10, 24 .cfi_rel_offset r11, 28 .cfi_rel_offset lr, 32 - vpush {s0-s31} @ 32 words (128 bytes) of floats. - .pad #128 - .cfi_adjust_cfa_offset 128 - sub sp, #12 @ 3 words of space, bottom word will hold Method*. + vpush {s16-s31} @ 16 words (64 bytes) of floats. + .pad #64 + .cfi_adjust_cfa_offset 64 + sub sp, #12 @ 3 words of space, bottom word will hold Method* .pad #12 .cfi_adjust_cfa_offset 12 RUNTIME_CURRENT1 \rTemp1, \rTemp2 @ Load Runtime::Current into rTemp1. @@ -53,7 +53,7 @@ str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 128 + 12) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12) #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected." #endif .endm @@ -101,15 +101,7 @@ .endm .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN - add sp, #4 @ bottom word holds Method* - pop {r5-r8, r10-r11, lr} @ 7 words of callee saves - .cfi_restore r5 - .cfi_restore r6 - .cfi_restore r7 - .cfi_restore r8 - .cfi_restore r10 - .cfi_restore r11 - .cfi_adjust_cfa_offset -FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME bx lr @ return .endm @@ -117,9 +109,10 @@ * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). */ -.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2 - push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves +.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY + push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. .save {r1-r3, r5-r8, r10-r11, lr} + .cfi_adjust_cfa_offset 40 .cfi_rel_offset r1, 0 .cfi_rel_offset r2, 4 .cfi_rel_offset r3, 8 @@ -130,47 +123,39 @@ .cfi_rel_offset r10, 28 .cfi_rel_offset r11, 32 .cfi_rel_offset lr, 36 - .cfi_adjust_cfa_offset 40 + vpush {s0-s15} @ 16 words of float args. + .pad #64 + .cfi_adjust_cfa_offset 64 sub sp, #8 @ 2 words of space, bottom word will hold Method* .pad #8 .cfi_adjust_cfa_offset 8 + // Ugly compile-time check, but we only have the preprocessor. +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8) +#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected." +#endif +.endm + +.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2 + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY RUNTIME_CURRENT3 \rTemp1, \rTemp2 @ Load Runtime::Current into rTemp1. THIS_LOAD_REQUIRES_READ_BARRIER @ rTemp1 is kRefsAndArgs Method*. ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET] str \rTemp1, [sp, #0] @ Place Method* at bottom of stack. str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. - - // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8) -#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected." -#endif .endm .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0 - push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves - .save {r1-r3, r5-r8, r10-r11, lr} - .cfi_rel_offset r1, 0 - .cfi_rel_offset r2, 4 - .cfi_rel_offset r3, 8 - .cfi_rel_offset r5, 12 - .cfi_rel_offset r6, 16 - .cfi_rel_offset r7, 20 - .cfi_rel_offset r8, 24 - .cfi_rel_offset r10, 28 - .cfi_rel_offset r11, 32 - .cfi_rel_offset lr, 36 - .cfi_adjust_cfa_offset 40 - sub sp, #8 @ 2 words of space, bottom word will hold Method* - .pad #8 - .cfi_adjust_cfa_offset 8 - + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY str r0, [sp, #0] @ Store ArtMethod* to bottom of stack. str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. .endm .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME add sp, #8 @ rewind sp + .cfi_adjust_cfa_offset -8 + vpop {s0-s15} + .cfi_adjust_cfa_offset -64 pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves .cfi_restore r1 .cfi_restore r2 @@ -181,7 +166,7 @@ .cfi_restore r8 .cfi_restore r10 .cfi_restore r11 - .cfi_adjust_cfa_offset -48 + .cfi_adjust_cfa_offset -40 .endm @@ -373,60 +358,91 @@ INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvoke INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck /* - * Quick invocation stub. + * Quick invocation stub internal. * On entry: * r0 = method pointer * r1 = argument array or NULL for no argument methods * r2 = size of argument array in bytes * r3 = (managed) thread pointer * [sp] = JValue* result - * [sp + 4] = shorty + * [sp + 4] = result_in_float + * [sp + 8] = core register argument array + * [sp + 12] = fp register argument array + * +-------------------------+ + * | uint32_t* fp_reg_args | + * | uint32_t* core_reg_args | + * | result_in_float | <- Caller frame + * | Jvalue* result | + * +-------------------------+ + * | lr | + * | r11 | + * | r9 | + * | r4 | <- r11 + * +-------------------------+ + * | uint32_t out[n-1] | + * | : : | Outs + * | uint32_t out[0] | + * | StackRef<ArtMethod> | <- SP value=null + * +-------------------------+ */ -ENTRY art_quick_invoke_stub - push {r0, r4, r5, r9, r11, lr} @ spill regs - .save {r0, r4, r5, r9, r11, lr} - .pad #24 - .cfi_adjust_cfa_offset 24 - .cfi_rel_offset r0, 0 - .cfi_rel_offset r4, 4 - .cfi_rel_offset r5, 8 - .cfi_rel_offset r9, 12 - .cfi_rel_offset r11, 16 - .cfi_rel_offset lr, 20 +ENTRY art_quick_invoke_stub_internal + push {r4, r9, r11, lr} @ spill regs + .save {r4, r9, r11, lr} + .pad #16 + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r9, 4 + .cfi_rel_offset r11, 8 + .cfi_rel_offset lr, 12 mov r11, sp @ save the stack pointer .cfi_def_cfa_register r11 + mov r9, r3 @ move managed thread pointer into r9 -#ifdef ARM_R4_SUSPEND_FLAG - mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval -#endif - add r5, r2, #4 @ create space for method pointer in frame - sub r5, sp, r5 @ reserve & align *stack* to 16 bytes: native calling - and r5, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART - mov sp, r5 @ 16B alignment ourselves. + add r4, r2, #4 @ create space for method pointer in frame + sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling + and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART + mov sp, r4 @ 16B alignment ourselves. + mov r4, r0 @ save method* add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy bl memcpy @ memcpy (dest, src, bytes) - ldr r0, [r11] @ restore method* - ldr r1, [sp, #4] @ copy arg value for r1 - ldr r2, [sp, #8] @ copy arg value for r2 - ldr r3, [sp, #12] @ copy arg value for r3 mov ip, #0 @ set ip to 0 str ip, [sp] @ store NULL for method* at bottom of frame + + ldr ip, [r11, #28] @ load fp register argument array pointer + vldm ip, {s0-s15} @ copy s0 - s15 + + ldr ip, [r11, #24] @ load core register argument array pointer + mov r0, r4 @ restore method* + add ip, ip, #4 @ skip r0 + ldm ip, {r1-r3} @ copy r1 - r3 + +#ifdef ARM_R4_SUSPEND_FLAG + mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval +#endif + ldr ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET] @ get pointer to the code blx ip @ call the method + mov sp, r11 @ restore the stack pointer - ldr ip, [sp, #24] @ load the result pointer - strd r0, [ip] @ store r0/r1 into result pointer - pop {r0, r4, r5, r9, r11, lr} @ restore spill regs - .cfi_restore r0 + .cfi_def_cfa_register sp + + ldr r4, [sp, #20] @ load result_is_float + ldr r9, [sp, #16] @ load the result pointer + cmp r4, #0 + ite eq + strdeq r0, [r9] @ store r0/r1 into result pointer + vstrne d0, [r9] @ store s0-s1/d0 into result pointer + + pop {r4, r9, r11, lr} @ restore spill regs .cfi_restore r4 - .cfi_restore r5 .cfi_restore r9 + .cfi_restore r11 .cfi_restore lr - .cfi_adjust_cfa_offset -24 + .cfi_adjust_cfa_offset -16 bx lr -END art_quick_invoke_stub +END art_quick_invoke_stub_internal /* * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_ @@ -869,13 +885,14 @@ ENTRY art_quick_proxy_invoke_handler mov r3, sp @ pass SP blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP) ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ - add sp, #16 @ skip r1-r3, 4 bytes padding. - .cfi_adjust_cfa_offset -16 - cbnz r2, 1f @ success if no exception is pending + // Tear down the callee-save frame. Skip arg registers. + add sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) + .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + cbnz r2, 1f @ success if no exception is pending + vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... bx lr @ return on success 1: - RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME DELIVER_PENDING_EXCEPTION END art_quick_proxy_invoke_handler @@ -977,20 +994,13 @@ ENTRY art_quick_generic_jni_trampoline ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ cbnz r2, .Lexception_in_native - // Tear down the callee-save frame. - add sp, #12 @ rewind sp - // Do not pop r0 and r1, they contain the return value. - pop {r2-r3, r5-r8, r10-r11, lr} @ 9 words of callee saves - .cfi_restore r2 - .cfi_restore r3 - .cfi_restore r5 - .cfi_restore r6 - .cfi_restore r7 - .cfi_restore r8 - .cfi_restore r10 - .cfi_restore r11 - .cfi_adjust_cfa_offset -48 + // Tear down the callee-save frame. Skip arg registers. + add sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE + .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + // store into fpr, for when it's a fpr return... + vmov d0, r0, r1 bx lr // ret .Lentry_error: @@ -1010,11 +1020,13 @@ ENTRY art_quick_to_interpreter_bridge mov r2, sp @ pass SP blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP) ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ - add sp, #16 @ skip r1-r3, 4 bytes padding. - .cfi_adjust_cfa_offset -16 + // Tear down the callee-save frame. Skip arg registers. + add sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) + .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME cbnz r2, 1f @ success if no exception is pending - bx lr @ return on success + vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... + bx lr @ return on success 1: DELIVER_PENDING_EXCEPTION END art_quick_to_interpreter_bridge @@ -1435,3 +1447,54 @@ ENTRY art_quick_string_compareto .Ldone: pop {r4, r7-r12, pc} END art_quick_string_compareto + + /* Assembly routines used to handle ABI differences. */ + + /* double fmod(double a, double b) */ + .extern fmod +ENTRY art_quick_fmod + push {lr} + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset lr, 0 + sub sp, #4 + .cfi_adjust_cfa_offset 4 + vmov r0, r1, d0 + vmov r2, r3, d1 + bl fmod + vmov d0, r0, r1 + add sp, #4 + .cfi_adjust_cfa_offset -4 + pop {pc} + .cfi_adjust_cfa_offset -4 +END art_quick_fmod + + /* float fmodf(float a, float b) */ + .extern fmodf +ENTRY art_quick_fmodf + push {lr} + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset lr, 0 + sub sp, #4 + .cfi_adjust_cfa_offset 4 + vmov r0, r1, d0 + bl fmodf + vmov s0, r0 + add sp, #4 + .cfi_adjust_cfa_offset -4 + pop {pc} + .cfi_adjust_cfa_offset -4 +END art_quick_fmod + + /* int64_t art_d2l(double d) */ + .extern art_d2l +ENTRY art_quick_d2l + vmov r0, r1, d0 + b art_d2l +END art_quick_d2l + + /* int64_t art_f2l(float f) */ + .extern art_f2l +ENTRY art_quick_f2l + vmov r0, s0 + b art_f2l +END art_quick_f2l diff --git a/runtime/arch/arm/quick_entrypoints_cc_arm.cc b/runtime/arch/arm/quick_entrypoints_cc_arm.cc new file mode 100644 index 0000000..e21e6c1 --- /dev/null +++ b/runtime/arch/arm/quick_entrypoints_cc_arm.cc @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mirror/art_method.h" +#include "utils.h" // For RoundUp(). + +namespace art { + +// Assembly stub that does the final part of the up-call into Java. +extern "C" void art_quick_invoke_stub_internal(mirror::ArtMethod*, uint32_t*, uint32_t, + Thread* self, JValue* result, uint32_t, uint32_t*, + uint32_t*); + +template <bool kIsStatic> +static void quick_invoke_reg_setup(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size, + Thread* self, JValue* result, const char* shorty) { + // Note: We do not follow aapcs ABI in quick code for both softfp and hardfp. + uint32_t core_reg_args[4]; // r0 ~ r3 + uint32_t fp_reg_args[16]; // s0 ~ s15 (d0 ~ d7) + uint32_t gpr_index = 1; // Index into core registers. Reserve r0 for mirror::ArtMethod*. + uint32_t fpr_index = 0; // Index into float registers. + uint32_t fpr_double_index = 0; // Index into float registers for doubles. + uint32_t arg_index = 0; // Index into argument array. + const uint32_t result_in_float = kArm32QuickCodeUseSoftFloat ? 0 : + (shorty[0] == 'F' || shorty[0] == 'D') ? 1 : 0; + + if (!kIsStatic) { + // Copy receiver for non-static methods. + core_reg_args[gpr_index++] = args[arg_index++]; + } + + for (uint32_t shorty_index = 1; shorty[shorty_index] != '\0'; ++shorty_index, ++arg_index) { + char arg_type = shorty[shorty_index]; + if (kArm32QuickCodeUseSoftFloat) { + arg_type = (arg_type == 'D') ? 'J' : arg_type; // Regard double as long. + arg_type = (arg_type == 'F') ? 'I' : arg_type; // Regard float as int. + } + switch (arg_type) { + case 'D': { + // Copy double argument into fp_reg_args if there are still floating point reg arguments. + // Double should not overlap with float. + fpr_double_index = std::max(fpr_double_index, RoundUp(fpr_index, 2)); + if (fpr_double_index < arraysize(fp_reg_args)) { + fp_reg_args[fpr_double_index++] = args[arg_index]; + fp_reg_args[fpr_double_index++] = args[arg_index + 1]; + } + ++arg_index; + break; + } + case 'F': + // Copy float argument into fp_reg_args if there are still floating point reg arguments. + // If fpr_index is odd then its pointing at a hole next to an existing float argument. If we + // encounter a float argument then pick it up from that hole. In the case fpr_index is even, + // ensure that we don't pick up an argument that overlaps with with a double from + // fpr_double_index. In either case, take care not to go beyond the maximum number of + // floating point arguments. + if (fpr_index % 2 == 0) { + fpr_index = std::max(fpr_double_index, fpr_index); + } + if (fpr_index < arraysize(fp_reg_args)) { + fp_reg_args[fpr_index++] = args[arg_index]; + } + break; + case 'J': + if (gpr_index < arraysize(core_reg_args)) { + core_reg_args[gpr_index++] = args[arg_index]; + } + ++arg_index; + FALLTHROUGH_INTENDED; // Fall-through to take of the high part. + default: + if (gpr_index < arraysize(core_reg_args)) { + core_reg_args[gpr_index++] = args[arg_index]; + } + break; + } + } + + art_quick_invoke_stub_internal(method, args, args_size, self, result, result_in_float, + core_reg_args, fp_reg_args); +} + +// Called by art::mirror::ArtMethod::Invoke to do entry into a non-static method. +// TODO: migrate into an assembly implementation as with ARM64. +extern "C" void art_quick_invoke_stub(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size, + Thread* self, JValue* result, const char* shorty) { + quick_invoke_reg_setup<false>(method, args, args_size, self, result, shorty); +} + +// Called by art::mirror::ArtMethod::Invoke to do entry into a static method. +// TODO: migrate into an assembly implementation as with ARM64. +extern "C" void art_quick_invoke_static_stub(mirror::ArtMethod* method, uint32_t* args, + uint32_t args_size, Thread* self, JValue* result, + const char* shorty) { + quick_invoke_reg_setup<true>(method, args, args_size, self, result, shorty); +} + +} // namespace art diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h index 7595e94..c1f3fc2 100644 --- a/runtime/arch/arm/quick_method_frame_info_arm.h +++ b/runtime/arch/arm/quick_method_frame_info_arm.h @@ -25,6 +25,8 @@ namespace art { namespace arm { +static constexpr uint32_t kArmCalleeSaveAlwaysSpills = + (1 << art::arm::LR); static constexpr uint32_t kArmCalleeSaveRefSpills = (1 << art::arm::R5) | (1 << art::arm::R6) | (1 << art::arm::R7) | (1 << art::arm::R8) | (1 << art::arm::R10) | (1 << art::arm::R11); @@ -32,23 +34,30 @@ static constexpr uint32_t kArmCalleeSaveArgSpills = (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3); static constexpr uint32_t kArmCalleeSaveAllSpills = (1 << art::arm::R4) | (1 << art::arm::R9); -static constexpr uint32_t kArmCalleeSaveFpAllSpills = + +static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0; +static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0; +static constexpr uint32_t kArmCalleeSaveFpArgSpills = (1 << art::arm::S0) | (1 << art::arm::S1) | (1 << art::arm::S2) | (1 << art::arm::S3) | (1 << art::arm::S4) | (1 << art::arm::S5) | (1 << art::arm::S6) | (1 << art::arm::S7) | (1 << art::arm::S8) | (1 << art::arm::S9) | (1 << art::arm::S10) | (1 << art::arm::S11) | - (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15) | + (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15); +static constexpr uint32_t kArmCalleeSaveFpAllSpills = (1 << art::arm::S16) | (1 << art::arm::S17) | (1 << art::arm::S18) | (1 << art::arm::S19) | (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) | (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) | (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31); constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) { - return kArmCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) | - (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) | (1 << art::arm::LR); + return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills | + (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) | + (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0); } constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0; + return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills | + (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) | + (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0); } constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h index 15c6c07..0e1e32b 100644 --- a/runtime/arch/arm64/quick_method_frame_info_arm64.h +++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h @@ -54,7 +54,7 @@ static constexpr uint32_t kArm64CalleeSaveFpArgSpills = (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) | (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) | (1 << art::arm64::D6) | (1 << art::arm64::D7); -static constexpr uint32_t kArm64FpAllSpills = +static constexpr uint32_t kArm64CalleeSaveFpAllSpills = (1 << art::arm64::D8) | (1 << art::arm64::D9) | (1 << art::arm64::D10) | (1 << art::arm64::D11) | (1 << art::arm64::D12) | (1 << art::arm64::D13) | (1 << art::arm64::D14) | (1 << art::arm64::D15); @@ -68,7 +68,7 @@ constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills | (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) | - (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0); + (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0); } constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index af341bb..93c47dc 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -50,15 +50,19 @@ class QuickArgumentVisitor { // | arg1 spill | | // | Method* | --- // | LR | - // | ... | callee saves - // | R3 | arg3 - // | R2 | arg2 - // | R1 | arg1 - // | R0 | padding + // | ... | 4x6 bytes callee saves + // | R3 | + // | R2 | + // | R1 | + // | S15 | + // | : | + // | S0 | + // | | 4x2 bytes padding // | Method* | <- sp - static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. - static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. + static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat; + static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat; + static constexpr size_t kNumQuickGprArgs = 3; + static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16; static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs); // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = @@ -90,6 +94,7 @@ class QuickArgumentVisitor { // | | padding // | Method* | <- sp static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. + static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr size_t kNumQuickGprArgs = 7; // 7 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = @@ -117,6 +122,7 @@ class QuickArgumentVisitor { // | A1 | arg1 // | A0/Method* | <- sp static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. + static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. @@ -141,6 +147,7 @@ class QuickArgumentVisitor { // | ECX | arg1 // | EAX/Method* | <- sp static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. + static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. @@ -178,6 +185,7 @@ class QuickArgumentVisitor { // | Padding | // | RDI/Method* | <- sp static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. + static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. @@ -222,8 +230,16 @@ class QuickArgumentVisitor { fpr_args_(reinterpret_cast<uint8_t*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset), stack_args_(reinterpret_cast<uint8_t*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize + StackArgumentStartFromShorty(is_static, shorty, shorty_len)), - gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid), - is_split_long_or_double_(false) {} + gpr_index_(0), fpr_index_(0), fpr_double_index_(0), stack_index_(0), + cur_type_(Primitive::kPrimVoid), is_split_long_or_double_(false) { + COMPILE_ASSERT(kQuickSoftFloatAbi == (kNumQuickFprArgs == 0), knum_of_quick_fpr_arg_unexpected); + COMPILE_ASSERT(!(kQuickSoftFloatAbi && kQuickDoubleRegAlignedFloatBackFilled), + kdouble_align_unexpected); + // For register alignment, we want to assume that counters(fpr_double_index_) are even if the + // next register is even. + COMPILE_ASSERT(!kQuickDoubleRegAlignedFloatBackFilled || kNumQuickFprArgs % 2 == 0, + knum_quick_fpr_args_not_even); + } virtual ~QuickArgumentVisitor() {} @@ -237,7 +253,11 @@ class QuickArgumentVisitor { if (!kQuickSoftFloatAbi) { Primitive::Type type = GetParamPrimitiveType(); if (UNLIKELY((type == Primitive::kPrimDouble) || (type == Primitive::kPrimFloat))) { - if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { + if (type == Primitive::kPrimDouble && kQuickDoubleRegAlignedFloatBackFilled) { + if (fpr_double_index_ + 2 < kNumQuickFprArgs + 1) { + return fpr_args_ + (fpr_double_index_ * GetBytesPerFprSpillLocation(kRuntimeISA)); + } + } else if (fpr_index_ + 1 < kNumQuickFprArgs + 1) { return fpr_args_ + (fpr_index_ * GetBytesPerFprSpillLocation(kRuntimeISA)); } return stack_args_ + (stack_index_ * kBytesStackArgLocation); @@ -268,28 +288,30 @@ class QuickArgumentVisitor { uint64_t ReadSplitLongParam() const { DCHECK(IsSplitLongOrDouble()); + // Read low half from register. uint64_t low_half = *reinterpret_cast<uint32_t*>(GetParamAddress()); - uint64_t high_half = *reinterpret_cast<uint32_t*>(stack_args_); + // Read high half from the stack. As current stack_index_ indexes the argument, the high part + // index should be (stack_index_ + 1). + uint64_t high_half = *reinterpret_cast<uint32_t*>(stack_args_ + + (stack_index_ + 1) * kBytesStackArgLocation); return (low_half & 0xffffffffULL) | (high_half << 32); } void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - // This implementation doesn't support reg-spill area for hard float - // ABI targets such as x86_64 and aarch64. So, for those targets whose - // 'kQuickSoftFloatAbi' is 'false': - // (a) 'stack_args_' should point to the first method's argument - // (b) whatever the argument type it is, the 'stack_index_' should - // be moved forward along with every visiting. + // (a) 'stack_args_' should point to the first method's argument + // (b) whatever the argument type it is, the 'stack_index_' should + // be moved forward along with every visiting. gpr_index_ = 0; fpr_index_ = 0; + if (kQuickDoubleRegAlignedFloatBackFilled) { + fpr_double_index_ = 0; + } stack_index_ = 0; if (!is_static_) { // Handle this. cur_type_ = Primitive::kPrimNot; is_split_long_or_double_ = false; Visit(); - if (!kQuickSoftFloatAbi || kNumQuickGprArgs == 0) { - stack_index_++; - } + stack_index_++; if (kNumQuickGprArgs > 0) { gpr_index_++; } @@ -305,9 +327,7 @@ class QuickArgumentVisitor { case Primitive::kPrimInt: is_split_long_or_double_ = false; Visit(); - if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) { - stack_index_++; - } + stack_index_++; if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; } @@ -315,17 +335,24 @@ class QuickArgumentVisitor { case Primitive::kPrimFloat: is_split_long_or_double_ = false; Visit(); + stack_index_++; if (kQuickSoftFloatAbi) { if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; - } else { - stack_index_++; } } else { - if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { + if (fpr_index_ + 1 < kNumQuickFprArgs + 1) { fpr_index_++; + if (kQuickDoubleRegAlignedFloatBackFilled) { + // Double should not overlap with float. + // For example, if fpr_index_ = 3, fpr_double_index_ should be at least 4. + fpr_double_index_ = std::max(fpr_double_index_, RoundUp(fpr_index_, 2)); + // Float should not overlap with double. + if (fpr_index_ % 2 == 0) { + fpr_index_ = std::max(fpr_double_index_, fpr_index_); + } + } } - stack_index_++; } break; case Primitive::kPrimDouble: @@ -334,42 +361,46 @@ class QuickArgumentVisitor { is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) && ((gpr_index_ + 1) == kNumQuickGprArgs); Visit(); - if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) { - if (kBytesStackArgLocation == 4) { - stack_index_+= 2; - } else { - CHECK_EQ(kBytesStackArgLocation, 8U); - stack_index_++; - } + if (kBytesStackArgLocation == 4) { + stack_index_+= 2; + } else { + CHECK_EQ(kBytesStackArgLocation, 8U); + stack_index_++; } if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; if (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) { if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; - } else if (kQuickSoftFloatAbi) { - stack_index_++; } } } } else { is_split_long_or_double_ = (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) && - ((fpr_index_ + 1) == kNumQuickFprArgs); + ((fpr_index_ + 1) == kNumQuickFprArgs) && !kQuickDoubleRegAlignedFloatBackFilled; Visit(); - if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { - fpr_index_++; - if (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) { - if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { - fpr_index_++; - } - } - } if (kBytesStackArgLocation == 4) { stack_index_+= 2; } else { CHECK_EQ(kBytesStackArgLocation, 8U); stack_index_++; } + if (kQuickDoubleRegAlignedFloatBackFilled) { + if (fpr_double_index_ + 2 < kNumQuickFprArgs + 1) { + fpr_double_index_ += 2; + // Float should not overlap with double. + if (fpr_index_ % 2 == 0) { + fpr_index_ = std::max(fpr_double_index_, fpr_index_); + } + } + } else if (fpr_index_ + 1 < kNumQuickFprArgs + 1) { + fpr_index_++; + if (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) { + if (fpr_index_ + 1 < kNumQuickFprArgs + 1) { + fpr_index_++; + } + } + } } break; default: @@ -381,16 +412,8 @@ class QuickArgumentVisitor { private: static size_t StackArgumentStartFromShorty(bool is_static, const char* shorty, uint32_t shorty_len) { - if (kQuickSoftFloatAbi) { - CHECK_EQ(kNumQuickFprArgs, 0U); - return (kNumQuickGprArgs * GetBytesPerGprSpillLocation(kRuntimeISA)) - + sizeof(StackReference<mirror::ArtMethod>) /* StackReference<ArtMethod> */; - } else { - // For now, there is no reg-spill area for the targets with - // hard float ABI. So, the offset pointing to the first method's - // parameter ('this' for non-static methods) should be returned. - return sizeof(StackReference<mirror::ArtMethod>); // Skip StackReference<ArtMethod>. - } + // 'stack_args_' points to the first method's argument + return sizeof(StackReference<mirror::ArtMethod>); // Skip StackReference<ArtMethod>. } protected: @@ -403,7 +426,14 @@ class QuickArgumentVisitor { uint8_t* const fpr_args_; // Address of FPR arguments in callee save frame. uint8_t* const stack_args_; // Address of stack arguments in caller's frame. uint32_t gpr_index_; // Index into spilled GPRs. - uint32_t fpr_index_; // Index into spilled FPRs. + // Index into spilled FPRs. + // In case kQuickDoubleRegAlignedFloatBackFilled, it may index a hole while fpr_double_index_ + // holds a higher register number. + uint32_t fpr_index_; + // Index into spilled FPRs for aligned double. + // Only used when kQuickDoubleRegAlignedFloatBackFilled. Next available double register indexed in + // terms of singles, may be behind fpr_index. + uint32_t fpr_double_index_; uint32_t stack_index_; // Index into arguments on the stack. // The current type of argument during VisitArguments. Primitive::Type cur_type_; @@ -943,8 +973,8 @@ template<class T> class BuildNativeCallFrameStateMachine { delegate_(delegate) { // For register alignment, we want to assume that counters (gpr_index_, fpr_index_) are even iff // the next register is even; counting down is just to make the compiler happy... - CHECK_EQ(kNumNativeGprArgs % 2, 0U); - CHECK_EQ(kNumNativeFprArgs % 2, 0U); + COMPILE_ASSERT(kNumNativeGprArgs % 2 == 0U, knum_native_gpr_args_not_even); + COMPILE_ASSERT(kNumNativeFprArgs % 2 == 0U, knum_native_fpr_args_not_even); } virtual ~BuildNativeCallFrameStateMachine() {} diff --git a/runtime/globals.h b/runtime/globals.h index b7bd44d..4d33196 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -112,6 +112,8 @@ static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceWa static constexpr bool kDefaultMustRelocate = true; +static constexpr bool kArm32QuickCodeUseSoftFloat = false; + } // namespace art #endif // ART_RUNTIME_GLOBALS_H_ diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc index 9584d15..b219004 100644 --- a/runtime/mirror/art_method.cc +++ b/runtime/mirror/art_method.cc @@ -43,7 +43,7 @@ namespace mirror { extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char); extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, const char*); -#ifdef __LP64__ +#if defined(__LP64__) || defined(__arm__) extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, const char*); #endif @@ -396,7 +396,7 @@ void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* } if (!IsPortableCompiled()) { -#ifdef __LP64__ +#if defined(__LP64__) || defined(__arm__) if (!IsStatic()) { (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty); } else { |