summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorWei Jin <wejin@google.com>2014-05-29 18:04:29 -0700
committerbuzbee <buzbee@google.com>2014-06-05 15:04:51 -0700
commit04f4d8abe45d6e79eca983e057de76aea24b7df9 (patch)
treeea037b8bffd0568edbb835fd1ed06740734375f8 /compiler
parent86e48ce0e82c13d8af470e7c2abe1bfb3c1e0136 (diff)
downloadart-04f4d8abe45d6e79eca983e057de76aea24b7df9.zip
art-04f4d8abe45d6e79eca983e057de76aea24b7df9.tar.gz
art-04f4d8abe45d6e79eca983e057de76aea24b7df9.tar.bz2
Add an optimization for removing redundant suspend tests in ART
This CL: (1) eliminates redundant suspend checks (dominated by another check), (2) removes the special treatment of the R4 register, which got reset on every native call, possibly yielding long execution sequences without any suspend checks, and (3) fixes the absence of suspend checks in leaf methods. (2) and (3) increase the frequency of suspend checks, which improves the performance of GC and the accuracy of profile data. To compensate for the increased number of checks, we implemented an optimization that leverages dominance information to remove redundant suspend checks on back edges. Based on the results of running the Caffeine benchmark on Nexus 7, the patch performs roughly 30% more useful suspend checks, spreading them much more evenly along the execution trace, while incurring less than 1% overhead. For flexibility consideration, this CL defines two flags to control the enabling of optimizations. The original implementation is the default. Change-Id: I31e81a5b3c53030444dbe0434157274c9ab8640f Signed-off-by: Wei Jin <wejin@google.com>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/mir_graph.cc21
-rw-r--r--compiler/dex/mir_graph.h16
-rw-r--r--compiler/dex/quick/arm/arm_lir.h13
-rw-r--r--compiler/dex/quick/arm/int_arm.cc10
-rw-r--r--compiler/dex/quick/arm/target_arm.cc18
-rw-r--r--compiler/dex/quick/mir_to_lir.cc55
6 files changed, 123 insertions, 10 deletions
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 3ef1dbf..a2676c8 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -111,7 +111,8 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
merged_df_flags_(0u),
ifield_lowering_infos_(arena, 0u),
sfield_lowering_infos_(arena, 0u),
- method_lowering_infos_(arena, 0u) {
+ method_lowering_infos_(arena, 0u),
+ gen_suspend_test_list_(arena, 0u) {
try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
- std::abs(static_cast<int>(kVRegTempBaseReg));
@@ -1533,6 +1534,24 @@ bool BasicBlock::IsExceptionBlock() const {
return false;
}
+bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) {
+ BasicBlock* target = GetBasicBlock(target_id);
+
+ if (source == nullptr || target == nullptr)
+ return false;
+
+ int idx;
+ for (idx = gen_suspend_test_list_.Size() - 1; idx >= 0; idx--) {
+ BasicBlock* bb = gen_suspend_test_list_.Get(idx);
+ if (bb == source)
+ return true; // The block has been inserted by a suspend check before.
+ if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
+ return true;
+ }
+
+ return false;
+}
+
ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
: basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 38cd5ee..b6cec66 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -192,6 +192,7 @@ enum OatMethodAttributes {
typedef uint16_t BasicBlockId;
static const BasicBlockId NullBasicBlockId = 0;
+static constexpr bool kLeafOptimization = false;
/*
* In general, vreg/sreg describe Dalvik registers that originated with dx. However,
@@ -1055,6 +1056,20 @@ class MIRGraph {
void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
+ // Used for removing redudant suspend tests
+ void AppendGenSuspendTestList(BasicBlock* bb) {
+ if (gen_suspend_test_list_.Size() == 0 ||
+ gen_suspend_test_list_.Get(gen_suspend_test_list_.Size() - 1) != bb) {
+ gen_suspend_test_list_.Insert(bb);
+ }
+ }
+
+ /* This is used to check if there is already a method call dominating the
+ * source basic block of a backedge and being dominated by the target basic
+ * block of the backedge.
+ */
+ bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id);
+
protected:
int FindCommonParent(int block1, int block2);
void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
@@ -1162,6 +1177,7 @@ class MIRGraph {
GrowableArray<MirSFieldLoweringInfo> sfield_lowering_infos_;
GrowableArray<MirMethodLoweringInfo> method_lowering_infos_;
static const uint64_t oat_data_flow_attributes_[kMirOpLast];
+ GrowableArray<BasicBlock*> gen_suspend_test_list_; // List of blocks containing suspend tests
friend class ClassInitCheckEliminationTest;
friend class LocalValueNumberingTest;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index e384f6b..e32e7cb 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -29,7 +29,8 @@ namespace art {
* pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
* registers.
* r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r4 : If ARM_R4_SUSPEND_FLAG is set then reserved as a suspend check/debugger
+ * assist flag, otherwise a callee save promotion target.
* r5 : Callee save (promotion target)
* r6 : Callee save (promotion target)
* r7 : Callee save (promotion target)
@@ -95,6 +96,8 @@ namespace art {
// First FP callee save.
#define ARM_FP_CALLEE_SAVE_BASE 16
+// Flag for using R4 to do suspend check
+#define ARM_R4_SUSPEND_FLAG
enum ArmResourceEncodingPos {
kArmGPReg0 = 0,
@@ -117,7 +120,11 @@ enum ArmNativeRegisterPool {
r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
+#ifdef ARM_R4_SUSPEND_FLAG
rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
+#else
+ r4 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
+#endif
r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
@@ -207,7 +214,11 @@ constexpr RegStorage rs_r0(RegStorage::kValid | r0);
constexpr RegStorage rs_r1(RegStorage::kValid | r1);
constexpr RegStorage rs_r2(RegStorage::kValid | r2);
constexpr RegStorage rs_r3(RegStorage::kValid | r3);
+#ifdef ARM_R4_SUSPEND_FLAG
constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
+#else
+constexpr RegStorage rs_r4(RegStorage::kValid | r4);
+#endif
constexpr RegStorage rs_r5(RegStorage::kValid | r5);
constexpr RegStorage rs_r6(RegStorage::kValid | r6);
constexpr RegStorage rs_r7(RegStorage::kValid | r7);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 769122d..4732e52 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -953,8 +953,18 @@ void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
// Test suspend flag, return target of taken suspend branch
LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
+#ifdef ARM_R4_SUSPEND_FLAG
NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
+#else
+ RegStorage t_reg = AllocTemp();
+ LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
+ t_reg, kUnsignedHalf);
+ LIR* cmp_branch = OpCmpImmBranch((target == NULL) ? kCondNe : kCondEq, t_reg,
+ 0, target);
+ FreeTemp(t_reg);
+ return cmp_branch;
+#endif
}
// Decrement register and branch on condition
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 5340d83..bd9c8b4 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -25,9 +25,15 @@
namespace art {
+#ifdef ARM_R4_SUSPEND_FLAG
static constexpr RegStorage core_regs_arr[] =
{rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+#else
+static constexpr RegStorage core_regs_arr[] =
+ {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
+ rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+#endif
static constexpr RegStorage sp_regs_arr[] =
{rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
@@ -36,9 +42,15 @@ static constexpr RegStorage sp_regs_arr[] =
static constexpr RegStorage dp_regs_arr[] =
{rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+#ifdef ARM_R4_SUSPEND_FLAG
static constexpr RegStorage reserved_regs_arr[] =
{rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+#else
+static constexpr RegStorage reserved_regs_arr[] =
+ {rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r4, rs_r12};
+#endif
static constexpr RegStorage sp_temps_arr[] =
{rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
@@ -79,7 +91,11 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) {
RegStorage res_reg = RegStorage::InvalidReg();
switch (reg) {
case kSelf: res_reg = rs_rARM_SELF; break;
+#ifdef ARM_R4_SUSPEND_FLAG
case kSuspend: res_reg = rs_rARM_SUSPEND; break;
+#else
+ case kSuspend: res_reg = RegStorage::InvalidReg(); break;
+#endif
case kLr: res_reg = rs_rARM_LR; break;
case kPc: res_reg = rs_rARM_PC; break;
case kSp: res_reg = rs_rARM_SP; break;
@@ -578,11 +594,13 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() {
}
}
+#ifdef ARM_R4_SUSPEND_FLAG
// TODO: re-enable this when we can safely save r4 over the suspension code path.
bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks();
if (no_suspend) {
GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
}
+#endif
// Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
// TODO: adjust when we roll to hard float calling convention.
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 9621995..1f12b6f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -399,7 +399,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
cu_->class_def_idx)) {
GenMemBarrier(kStoreStore);
}
- if (!mir_graph_->MethodIsLeaf()) {
+ if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
GenSuspendTest(opt_flags);
}
break;
@@ -408,7 +408,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
DCHECK(rl_src[0].ref);
// Intentional fallthrough.
case Instruction::RETURN:
- if (!mir_graph_->MethodIsLeaf()) {
+ if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
GenSuspendTest(opt_flags);
}
DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
@@ -416,7 +416,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
break;
case Instruction::RETURN_WIDE:
- if (!mir_graph_->MethodIsLeaf()) {
+ if (!kLeafOptimization || !mir_graph_->MethodIsLeaf()) {
GenSuspendTest(opt_flags);
}
DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
@@ -543,7 +543,8 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
case Instruction::GOTO:
case Instruction::GOTO_16:
case Instruction::GOTO_32:
- if (mir_graph_->IsBackedge(bb, bb->taken)) {
+ if (mir_graph_->IsBackedge(bb, bb->taken) &&
+ (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) {
GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
} else {
OpUnconditionalBranch(&label_list[bb->taken]);
@@ -582,12 +583,15 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg),
mir_graph_->ConstantValue(rl_src[1].orig_sreg));
BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
- if (mir_graph_->IsBackedge(bb, target_id)) {
+ if (mir_graph_->IsBackedge(bb, target_id) &&
+ (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
GenSuspendTest(opt_flags);
}
OpUnconditionalBranch(&label_list[target_id]);
} else {
- if (mir_graph_->IsBackwardsBranch(bb)) {
+ if (mir_graph_->IsBackwardsBranch(bb) &&
+ (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+ !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
GenSuspendTest(opt_flags);
}
GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through);
@@ -607,12 +611,15 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
if (rl_src[0].is_const) {
bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0);
BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
- if (mir_graph_->IsBackedge(bb, target_id)) {
+ if (mir_graph_->IsBackedge(bb, target_id) &&
+ (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
GenSuspendTest(opt_flags);
}
OpUnconditionalBranch(&label_list[target_id]);
} else {
- if (mir_graph_->IsBackwardsBranch(bb)) {
+ if (mir_graph_->IsBackwardsBranch(bb) &&
+ (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+ !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
GenSuspendTest(opt_flags);
}
GenCompareZeroAndBranch(opcode, rl_src[0], taken, fall_through);
@@ -756,37 +763,69 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
case Instruction::INVOKE_STATIC_RANGE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ // If the invocation is not inlined, we can assume there is already a
+ // suspend check at the return site
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_STATIC:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_DIRECT:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_DIRECT_RANGE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_VIRTUAL:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_VIRTUAL_RANGE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_SUPER:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_SUPER_RANGE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_INTERFACE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::INVOKE_INTERFACE_RANGE:
GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
+ if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+ mir_graph_->AppendGenSuspendTestList(bb);
+ }
break;
case Instruction::NEG_INT: