diff options
author | Bill Buzbee <buzbee@android.com> | 2014-06-05 22:34:57 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-06-05 22:34:57 +0000 |
commit | 25c4f6a25b3de9b9d7ca5162f1629753a0b7f003 (patch) | |
tree | ea037b8bffd0568edbb835fd1ed06740734375f8 | |
parent | 86e48ce0e82c13d8af470e7c2abe1bfb3c1e0136 (diff) | |
parent | 04f4d8abe45d6e79eca983e057de76aea24b7df9 (diff) | |
download | art-25c4f6a25b3de9b9d7ca5162f1629753a0b7f003.zip art-25c4f6a25b3de9b9d7ca5162f1629753a0b7f003.tar.gz art-25c4f6a25b3de9b9d7ca5162f1629753a0b7f003.tar.bz2 |
Merge "Add an optimization for removing redundant suspend tests in ART"
-rw-r--r-- | compiler/dex/mir_graph.cc | 21 | ||||
-rw-r--r-- | compiler/dex/mir_graph.h | 16 | ||||
-rw-r--r-- | compiler/dex/quick/arm/arm_lir.h | 13 | ||||
-rw-r--r-- | compiler/dex/quick/arm/int_arm.cc | 10 | ||||
-rw-r--r-- | compiler/dex/quick/arm/target_arm.cc | 18 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 55 | ||||
-rw-r--r-- | runtime/arch/arm/asm_support_arm.h | 2 | ||||
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 4 |
8 files changed, 129 insertions, 10 deletions
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 3ef1dbf..a2676c8 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -111,7 +111,8 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) merged_df_flags_(0u), ifield_lowering_infos_(arena, 0u), sfield_lowering_infos_(arena, 0u), - method_lowering_infos_(arena, 0u) { + method_lowering_infos_(arena, 0u), + gen_suspend_test_list_(arena, 0u) { try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */); max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg)) - std::abs(static_cast<int>(kVRegTempBaseReg)); @@ -1533,6 +1534,24 @@ bool BasicBlock::IsExceptionBlock() const { return false; } +bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) { + BasicBlock* target = GetBasicBlock(target_id); + + if (source == nullptr || target == nullptr) + return false; + + int idx; + for (idx = gen_suspend_test_list_.Size() - 1; idx >= 0; idx--) { + BasicBlock* bb = gen_suspend_test_list_.Get(idx); + if (bb == source) + return true; // The block has been inserted by a suspend check before. + if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id)) + return true; + } + + return false; +} + ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph) : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false), visited_taken_(false), have_successors_(false) { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 38cd5ee..b6cec66 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -192,6 +192,7 @@ enum OatMethodAttributes { typedef uint16_t BasicBlockId; static const BasicBlockId NullBasicBlockId = 0; +static constexpr bool kLeafOptimization = false; /* * In general, vreg/sreg describe Dalvik registers that originated with dx. However, @@ -1055,6 +1056,20 @@ class MIRGraph { void HandleSSADef(int* defs, int dalvik_reg, int reg_index); bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed); + // Used for removing redudant suspend tests + void AppendGenSuspendTestList(BasicBlock* bb) { + if (gen_suspend_test_list_.Size() == 0 || + gen_suspend_test_list_.Get(gen_suspend_test_list_.Size() - 1) != bb) { + gen_suspend_test_list_.Insert(bb); + } + } + + /* This is used to check if there is already a method call dominating the + * source basic block of a backedge and being dominated by the target basic + * block of the backedge. + */ + bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id); + protected: int FindCommonParent(int block1, int block2); void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1, @@ -1162,6 +1177,7 @@ class MIRGraph { GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_; GrowableArray<MirMethodLoweringInfo> method_lowering_infos_; static const uint64_t oat_data_flow_attributes_[kMirOpLast]; + GrowableArray<BasicBlock*> gen_suspend_test_list_; // List of blocks containing suspend tests friend class ClassInitCheckEliminationTest; friend class LocalValueNumberingTest; diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index e384f6b..e32e7cb 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -29,7 +29,8 @@ namespace art { * pointer in r0 as a hidden arg0. Otherwise used as codegen scratch * registers. * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit - * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist) + * r4 : If ARM_R4_SUSPEND_FLAG is set then reserved as a suspend check/debugger + * assist flag, otherwise a callee save promotion target. * r5 : Callee save (promotion target) * r6 : Callee save (promotion target) * r7 : Callee save (promotion target) @@ -95,6 +96,8 @@ namespace art { // First FP callee save. #define ARM_FP_CALLEE_SAVE_BASE 16 +// Flag for using R4 to do suspend check +#define ARM_R4_SUSPEND_FLAG enum ArmResourceEncodingPos { kArmGPReg0 = 0, @@ -117,7 +120,11 @@ enum ArmNativeRegisterPool { r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2, r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3, +#ifdef ARM_R4_SUSPEND_FLAG rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4, +#else + r4 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4, +#endif r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5, r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6, r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7, @@ -207,7 +214,11 @@ constexpr RegStorage rs_r0(RegStorage::kValid | r0); constexpr RegStorage rs_r1(RegStorage::kValid | r1); constexpr RegStorage rs_r2(RegStorage::kValid | r2); constexpr RegStorage rs_r3(RegStorage::kValid | r3); +#ifdef ARM_R4_SUSPEND_FLAG constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND); +#else +constexpr RegStorage rs_r4(RegStorage::kValid | r4); +#endif constexpr RegStorage rs_r5(RegStorage::kValid | r5); constexpr RegStorage rs_r6(RegStorage::kValid | r6); constexpr RegStorage rs_r7(RegStorage::kValid | r7); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 769122d..4732e52 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -953,8 +953,18 @@ void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) { // Test suspend flag, return target of taken suspend branch LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { +#ifdef ARM_R4_SUSPEND_FLAG NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1); return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); +#else + RegStorage t_reg = AllocTemp(); + LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(), + t_reg, kUnsignedHalf); + LIR* cmp_branch = OpCmpImmBranch((target == NULL) ? kCondNe : kCondEq, t_reg, + 0, target); + FreeTemp(t_reg); + return cmp_branch; +#endif } // Decrement register and branch on condition diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 5340d83..bd9c8b4 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -25,9 +25,15 @@ namespace art { +#ifdef ARM_R4_SUSPEND_FLAG static constexpr RegStorage core_regs_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF, rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; +#else +static constexpr RegStorage core_regs_arr[] = + {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF, + rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; +#endif static constexpr RegStorage sp_regs_arr[] = {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20, @@ -36,9 +42,15 @@ static constexpr RegStorage sp_regs_arr[] = static constexpr RegStorage dp_regs_arr[] = {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15}; +#ifdef ARM_R4_SUSPEND_FLAG static constexpr RegStorage reserved_regs_arr[] = {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12}; +#else +static constexpr RegStorage reserved_regs_arr[] = + {rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; +static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r12}; +#endif static constexpr RegStorage sp_temps_arr[] = {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15}; @@ -79,7 +91,11 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { RegStorage res_reg = RegStorage::InvalidReg(); switch (reg) { case kSelf: res_reg = rs_rARM_SELF; break; +#ifdef ARM_R4_SUSPEND_FLAG case kSuspend: res_reg = rs_rARM_SUSPEND; break; +#else + case kSuspend: res_reg = RegStorage::InvalidReg(); break; +#endif case kLr: res_reg = rs_rARM_LR; break; case kPc: res_reg = rs_rARM_PC; break; case kSp: res_reg = rs_rARM_SP; break; @@ -578,11 +594,13 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() { } } +#ifdef ARM_R4_SUSPEND_FLAG // TODO: re-enable this when we can safely save r4 over the suspension code path. bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks(); if (no_suspend) { GetRegInfo(rs_rARM_SUSPEND)->MarkFree(); } +#endif // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. // TODO: adjust when we roll to hard float calling convention. diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 9621995..1f12b6f 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -399,7 +399,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list cu_->class_def_idx)) { GenMemBarrier(kStoreStore); } - if (!mir_graph_->MethodIsLeaf()) { + if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) { GenSuspendTest(opt_flags); } break; @@ -408,7 +408,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list DCHECK(rl_src[0].ref); // Intentional fallthrough. case Instruction::RETURN: - if (!mir_graph_->MethodIsLeaf()) { + if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) { GenSuspendTest(opt_flags); } DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0])); @@ -416,7 +416,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list break; case Instruction::RETURN_WIDE: - if (!mir_graph_->MethodIsLeaf()) { + if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) { GenSuspendTest(opt_flags); } DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0])); @@ -543,7 +543,8 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: - if (mir_graph_->IsBackedge(bb, bb->taken)) { + if (mir_graph_->IsBackedge(bb, bb->taken) && + (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) { GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]); } else { OpUnconditionalBranch(&label_list[bb->taken]); @@ -582,12 +583,15 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), mir_graph_->ConstantValue(rl_src[1].orig_sreg)); BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target_id)) { + if (mir_graph_->IsBackedge(bb, target_id) && + (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) { GenSuspendTest(opt_flags); } OpUnconditionalBranch(&label_list[target_id]); } else { - if (mir_graph_->IsBackwardsBranch(bb)) { + if (mir_graph_->IsBackwardsBranch(bb) && + (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) || + !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) { GenSuspendTest(opt_flags); } GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through); @@ -607,12 +611,15 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list if (rl_src[0].is_const) { bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0); BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target_id)) { + if (mir_graph_->IsBackedge(bb, target_id) && + (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) { GenSuspendTest(opt_flags); } OpUnconditionalBranch(&label_list[target_id]); } else { - if (mir_graph_->IsBackwardsBranch(bb)) { + if (mir_graph_->IsBackwardsBranch(bb) && + (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) || + !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) { GenSuspendTest(opt_flags); } GenCompareZeroAndBranch(opcode, rl_src[0], taken, fall_through); @@ -756,37 +763,69 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::INVOKE_STATIC_RANGE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + // If the invocation is not inlined, we can assume there is already a + // suspend check at the return site + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_STATIC: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_DIRECT: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_DIRECT_RANGE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_VIRTUAL: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_VIRTUAL_RANGE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_SUPER: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_SUPER_RANGE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_INTERFACE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::INVOKE_INTERFACE_RANGE: GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true)); + if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) { + mir_graph_->AppendGenSuspendTestList(bb); + } break; case Instruction::NEG_INT: diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index 9e1db96..ae30aee 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -34,5 +34,7 @@ // Expected size of a heap reference #define HEAP_REFERENCE_SIZE 4 +// Flag for enabling R4 optimization in arm runtime +#define ARM_R4_SUSPEND_FLAG #endif // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_ diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 5212576..83a683d 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -330,7 +330,9 @@ ENTRY art_quick_invoke_stub mov r11, sp @ save the stack pointer .cfi_def_cfa_register r11 mov r9, r3 @ move managed thread pointer into r9 +#ifdef ARM_R4_SUSPEND_FLAG mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval +#endif add r5, r2, #16 @ create space for method pointer in frame and r5, #0xFFFFFFF0 @ align frame size to 16 bytes sub sp, r5 @ reserve stack space for argument array @@ -932,11 +934,13 @@ GENERATE_ALL_ALLOC_ENTRYPOINTS */ .extern artTestSuspendFromCode ENTRY art_quick_test_suspend +#ifdef ARM_R4_SUSPEND_FLAG ldrh r0, [rSELF, #THREAD_FLAGS_OFFSET] mov rSUSPEND, #SUSPEND_CHECK_INTERVAL @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL cbnz r0, 1f @ check Thread::Current()->suspend_count_ == 0 bx lr @ return if suspend_count_ == 0 1: +#endif mov r0, rSELF SETUP_REF_ONLY_CALLEE_SAVE_FRAME @ save callee saves for stack crawl mov r1, sp |