diff options
Diffstat (limited to 'compiler/optimizing')
25 files changed, 693 insertions, 342 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 92fa6db..b2b5496 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -281,15 +281,22 @@ class ArrayAccessInsideLoopFinder : public ValueObject { return false; } + static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { + for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i); + if (!block->Dominates(back_edge)) { + return false; + } + } + return true; + } + void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - // Must be simplified loop. - DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U); for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { HBasicBlock* block = it_loop.Current(); DCHECK(block->IsInLoop()); - HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0); - if (!block->Dominates(back_edge)) { + if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure // that all array accesses collected are really executed in the loop. // For array accesses in a branch inside the loop, don't collect the @@ -1151,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor { bounds_check->GetBlock()->RemoveInstruction(bounds_check); } + static bool HasSameInputAtBackEdges(HPhi* phi) { + DCHECK(phi->IsLoopHeaderPhi()); + // Start with input 1. Input 0 is from the incoming block. + HInstruction* input1 = phi->InputAt(1); + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(1))); + for (size_t i = 2, e = phi->InputCount(); i < e; ++i) { + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(i))); + if (input1 != phi->InputAt(i)) { + return false; + } + } + return true; + } + void VisitPhi(HPhi* phi) { - if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { - DCHECK_EQ(phi->InputCount(), 2U); + if (phi->IsLoopHeaderPhi() + && (phi->GetType() == Primitive::kPrimInt) + && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; int32_t increment; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index beaff5c..bdbd571 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -327,6 +327,7 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + virtual ParallelMoveResolver* GetMoveResolver() = 0; protected: CodeGenerator(HGraph* graph, @@ -370,7 +371,6 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; - virtual ParallelMoveResolver* GetMoveResolver() = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e4c37de..f56e446 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -3539,8 +3543,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset( kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e02a1d..0222f93 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -285,6 +285,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -1034,8 +1038,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5ee091f..cfb8702 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2740,17 +2743,12 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); - break; - } + case Primitive::kPrimInt: case Primitive::kPrimLong: { + // Can't have Location::Any() and output SameAsFirstInput() locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2769,6 +2767,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.IsRegister()); Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); @@ -2781,7 +2780,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { __ shrl(first_reg, second_reg); } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); if (op->IsShl()) { __ shll(first_reg, imm); } else if (op->IsShr()) { @@ -2793,14 +2796,29 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2809,6 +2827,30 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), + loc.ToHigh(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToLow(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2820,6 +2862,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ sarl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2831,6 +2894,30 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), + loc.ToLow(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToHigh(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3909,8 +3996,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ fs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 79dec7a..5a5a37b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -166,6 +166,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5ac6866..9d2fc43 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -771,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3864,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86_64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ gs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 91cd60a..cd427c5 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -65,10 +65,13 @@ void HDeadCodeElimination::RemoveDeadBlocks() { for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (live_blocks.IsBitSet(block->GetBlockId())) { - continue; + // If this block is part of a loop that is being dismantled, we need to + // update its loop information. + block->UpdateLoopInformation(); + } else { + MaybeRecordDeadBlock(block); + block->DisconnectAndDelete(); } - MaybeRecordDeadBlock(block); - block->DisconnectAndDelete(); } // Connect successive blocks created by dead branches. Order does not matter. diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 2bfecc6..8f69f4d 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) { TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header - const int blocks2[] = {2, 3, 4, 5, 8}; - TestBlock(graph, 2, true, 2, blocks2, 5); // loop header + const int blocks2[] = {2, 3, 4, 5}; + TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header TestBlock(graph, 3, false, 2); // block in loop - TestBlock(graph, 4, false, 2); // original back edge - TestBlock(graph, 5, false, 2); // original back edge + TestBlock(graph, 4, false, 2); // back edge + TestBlock(graph, 5, false, 2); // back edge TestBlock(graph, 6, false, -1); // return block TestBlock(graph, 7, false, -1); // exit block - TestBlock(graph, 8, false, 2); // synthesized back edge } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index dc3124b..bb27a94 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -170,7 +170,8 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure the uses of `instruction` are defined in a block of the graph. + // Ensure the uses of `instruction` are defined in a block of the graph, + // and the entry in the use list is consistent. for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); @@ -184,6 +185,27 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { use->GetId(), instruction->GetId())); } + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { + AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + "UseListNode index.", + use->DebugName(), + use->GetId(), + instruction->GetId())); + } + } + + // Ensure the environment uses entries are consistent. + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); use_it.Advance()) { + HEnvironment* use = use_it.Current()->GetUser(); + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) { + AddError(StringPrintf("Environment user of %s:%d has a wrong " + "UseListNode index.", + instruction->DebugName(), + instruction->GetId())); + } } // Ensure 'instruction' has pointers to its inputs' use entries. @@ -191,7 +213,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); HInstruction* input = input_record.GetInstruction(); HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); - if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + size_t use_index = use_node->GetIndex(); + if ((use_node == nullptr) + || !input->GetUses().Contains(use_node) + || (use_index >= e) + || (use_index != i)) { AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " "at input %u (%s:%d).", instruction->DebugName(), @@ -262,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); + HLoopInformation* loop_information = loop_header->GetLoopInformation(); // Ensure the pre-header block is first in the list of // predecessors of a loop header. @@ -271,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } - // Ensure the loop header has only two predecessors and that only the - // second one is a back edge. + // Ensure the loop header has only one incoming branch and the remaining + // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().Size(); if (num_preds < 2) { AddError(StringPrintf( "Loop header %d has less than two predecessors: %zu.", id, num_preds)); - } else if (num_preds > 2) { - AddError(StringPrintf( - "Loop header %d has more than two predecessors: %zu.", - id, - num_preds)); } else { - HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(*first_predecessor)) { AddError(StringPrintf( "First predecessor of loop header %d is a back edge.", id)); } - HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); - if (!loop_information->IsBackEdge(*second_predecessor)) { - AddError(StringPrintf( - "Second predecessor of loop header %d is not a back edge.", - id)); + for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i); + if (!loop_information->IsBackEdge(*predecessor)) { + AddError(StringPrintf( + "Loop header %d has multiple incoming (non back edge) blocks.", + id)); + } } } - const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + const ArenaBitVector& loop_blocks = loop_information->GetBlocks(); - // Ensure there is only one back edge per loop. - size_t num_back_edges = - loop_header->GetLoopInformation()->GetBackEdges().Size(); + // Ensure back edges belong to the loop. + size_t num_back_edges = loop_information->GetBackEdges().Size(); if (num_back_edges == 0) { AddError(StringPrintf( "Loop defined by header %d has no back edge.", id)); - } else if (num_back_edges > 1) { - AddError(StringPrintf( - "Loop defined by header %d has several back edges: %zu.", - id, - num_back_edges)); } else { - DCHECK_EQ(num_back_edges, 1u); - int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); - if (!loop_blocks.IsBitSet(back_edge_id)) { - AddError(StringPrintf( - "Loop defined by header %d has an invalid back edge %d.", - id, - back_edge_id)); + for (size_t i = 0; i < num_back_edges; ++i) { + int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 5d3db5c..43fe374 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -374,4 +374,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) } } // namespace art - diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index dbb7cba..c243ef3 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,8 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "code_generator.h" #include "nodes.h" #include "optimization.h" +#include "parallel_move_resolver.h" namespace art { @@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + static void MoveArguments(HInvoke* invoke, + CodeGenerator* codegen, + InvokeDexCallingConventionVisitor* calling_convention_visitor) { + if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + } + + if (invoke->GetNumberOfArguments() == 0) { + // No argument to move. + return; + } + + LocationSummary* locations = invoke->GetLocations(); + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 259d554..7f7b450 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -77,28 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) { InvokeDexCallingConventionVisitorARM calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -117,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 9cfa782..ca3de99 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -86,28 +86,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -126,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 62cf3eb..1eef1ef 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -111,28 +111,9 @@ static void MoveFromReturnRegister(Location target, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { InvokeDexCallingConventionVisitorX86 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -155,7 +136,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); @@ -749,7 +730,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7e24dca..1fc5432 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -103,28 +103,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -143,7 +124,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -623,7 +604,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 8a96ee9..1914339 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, phi in block 8) + // (constant0, constant4, constant5, phi in block 2) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01110)\n" - " kill: (00010)\n" + " live in: (0110)\n" + " live out: (0111)\n" + " kill: (0001)\n" "Block 3\n" - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 4\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 5\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 4\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 5\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00010)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" - "Block 8\n" // synthesized back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00001)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 699987c..85c0361 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -191,24 +191,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); - // If there are more than one back edge, make them branch to the same block that - // will become the only back edge. This simplifies finding natural loops in the - // graph. - // Also, if the loop is a do/while (that is the back edge is an if), change the - // back edge to be a goto. This simplifies code generation of suspend cheks. - if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(new_back_edge); - new_back_edge->AddInstruction(new (arena_) HGoto()); - for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { - HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - back_edge->ReplaceSuccessor(header, new_back_edge); - } - info->ClearBackEdges(); - info->AddBackEdge(new_back_edge); - new_back_edge->AddSuccessor(header); - } - // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the // loop. @@ -218,11 +200,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); - ArenaBitVector back_edges(arena_, GetBlocks().Size(), false); - HBasicBlock* back_edge = info->GetBackEdges().Get(0); for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); - if (predecessor != back_edge) { + if (!info->IsBackEdge(*predecessor)) { predecessor->ReplaceSuccessor(header, pre_header); pred--; } @@ -230,9 +210,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the second predecessor of a loop header is the back edge. - if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { - header->SwapPredecessors(); + // Make sure the first predecessor of a loop header is the incoming block. + if (info->IsBackEdge(*header->GetPredecessors().Get(0))) { + HBasicBlock* to_swap = header->GetPredecessors().Get(0); + for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors().Get(pred); + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_.Put(pred, to_swap); + header->predecessors_.Put(0, predecessor); + break; + } + } } // Place the suspend check at the beginning of the header, so that live registers @@ -357,26 +345,26 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } bool HLoopInformation::Populate() { - DCHECK_EQ(GetBackEdges().Size(), 1u); - HBasicBlock* back_edge = GetBackEdges().Get(0); - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } + for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = GetBackEdges().Get(i); + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + // This loop is not natural. Do not bother going further. + return false; + } - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + PopulateRecursive(back_edge); + } return true; } HBasicBlock* HLoopInformation::GetPreHeader() const { - DCHECK_EQ(header_->GetPredecessors().Size(), 2u); return header_->GetDominator(); } @@ -388,6 +376,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } +size_t HLoopInformation::GetLifetimeEnd() const { + size_t last_position = 0; + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position); + } + return last_position; +} + bool HBasicBlock::Dominates(HBasicBlock* other) const { // Walk up the dominator tree from `other`, to find out if `this` // is an ancestor. @@ -504,6 +500,16 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_ } } +void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) { + for (size_t i = 0; i < locals.Size(); i++) { + HInstruction* instruction = locals.Get(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::CopyFrom(HEnvironment* env) { for (size_t i = 0; i < env->Size(); i++) { HInstruction* instruction = env->GetInstructionAt(i); @@ -713,6 +719,9 @@ void HPhi::AddInput(HInstruction* input) { void HPhi::RemoveInputAt(size_t index) { RemoveAsUserOfInput(index); inputs_.DeleteAt(index); + for (size_t i = index, e = InputCount(); i < e; ++i) { + InputRecordAt(i).GetUseNode()->SetIndex(i); + } } #define DEFINE_ACCEPT(name, super) \ @@ -961,8 +970,9 @@ void HBasicBlock::DisconnectAndDelete() { HLoopInformation* loop_info = it.Current(); loop_info->Remove(this); if (loop_info->IsBackEdge(*this)) { - // This deliberately leaves the loop in an inconsistent state and will - // fail SSAChecker unless the entire loop is removed during the pass. + // If this was the last back edge of the loop, we deliberately leave the + // loop in an inconsistent state and will fail SSAChecker unless the + // entire loop is removed during the pass. loop_info->RemoveBackEdge(this); } } @@ -1038,6 +1048,20 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } +void HBasicBlock::UpdateLoopInformation() { + // Check if loop information points to a dismantled loop. If so, replace with + // the loop information of a larger loop which contains this block, or nullptr + // otherwise. We iterate in case the larger loop has been destroyed too. + while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) { + if (IsLoopHeader()) { + HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck(); + DCHECK_EQ(suspend_check->GetBlock(), this); + RemoveInstruction(suspend_check); + } + loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation(); + } +} + void HBasicBlock::MergeWith(HBasicBlock* other) { DCHECK_EQ(GetGraph(), other->GetGraph()); DCHECK(GetDominatedBlocks().Contains(other)); @@ -1059,8 +1083,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) { HLoopInformation* loop_info = it.Current(); loop_info->Remove(other); if (loop_info->IsBackEdge(*other)) { - loop_info->ClearBackEdges(); - loop_info->AddBackEdge(this); + loop_info->ReplaceBackEdge(other, this); } } @@ -1291,11 +1314,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { loop_it.Current()->Add(to); } if (info->IsBackEdge(*at)) { - // Only `at` can become a back edge, as the inlined blocks - // are predecessors of `at`. - DCHECK_EQ(1u, info->NumberOfBackEdges()); - info->ClearBackEdges(); - info->AddBackEdge(to); + // Only `to` can become a back edge, as the inlined blocks + // are predecessors of `to`. + info->ReplaceBackEdge(at, to); } } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3fe23e1..5fc0470 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -48,6 +48,7 @@ class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SlowPathCode; class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; @@ -397,11 +398,21 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } - void ClearBackEdges() { - back_edges_.Reset(); + // Returns the lifetime position of the back edge that has the + // greatest lifetime position. + size_t GetLifetimeEnd() const; + + void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + if (back_edges_.Get(i) == existing) { + back_edges_.Put(i, new_back_edge); + return; + } + } + UNREACHABLE(); } - // Find blocks that are part of this loop. Returns whether the loop is a natural loop, + // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, // that is the header dominates the back edge. bool Populate(); @@ -636,7 +647,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true); bool IsLoopHeader() const { - return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); + return IsInLoop() && (loop_information_->GetHeader() == this); } bool IsLoopPreHeaderFirstPredecessor() const { @@ -655,7 +666,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. - } else if (loop_information_ == nullptr) { + } else if (!IsInLoop()) { loop_information_ = info; } else if (loop_information_->Contains(*info->GetHeader())) { // Block is currently part of an outer loop. Make it part of this inner loop. @@ -674,6 +685,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { loop_information_ = info; } + // Checks if the loop information points to a valid loop. If the loop has been + // dismantled (does not have a back edge any more), loop information is + // removed or replaced with the information of the first valid outer loop. + void UpdateLoopInformation(); + bool IsInLoop() const { return loop_information_ != nullptr; } // Returns wheter this block dominates the blocked passed as parameter. @@ -727,7 +743,7 @@ class HLoopInformationOutwardIterator : public ValueObject { void Advance() { DCHECK(!Done()); - current_ = current_->GetHeader()->GetDominator()->GetLoopInformation(); + current_ = current_->GetPreHeader()->GetLoopInformation(); } HLoopInformation* Current() const { @@ -840,13 +856,14 @@ class HUseListNode : public ArenaObject<kArenaAllocMisc> { HUseListNode* GetNext() const { return next_; } T GetUser() const { return user_; } size_t GetIndex() const { return index_; } + void SetIndex(size_t index) { index_ = index; } private: HUseListNode(T user, size_t index) : user_(user), index_(index), prev_(nullptr), next_(nullptr) {} T const user_; - const size_t index_; + size_t index_; HUseListNode<T>* prev_; HUseListNode<T>* next_; @@ -1051,7 +1068,9 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { } } - void CopyFrom(HEnvironment* env); + void CopyFrom(const GrowableArray<HInstruction*>& locals); + void CopyFrom(HEnvironment* environment); + // Copy from `env`. If it's a loop phi for `loop_header`, copy the first // input to the loop phi instead. This is for inserting instructions that // require an environment (like HDeoptimization) in the loop pre-header. @@ -1080,7 +1099,7 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { GrowableArray<HUserRecord<HEnvironment*> > vregs_; - friend HInstruction; + friend class HInstruction; DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; @@ -3236,19 +3255,25 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; } uint32_t GetDexPc() const { return dex_pc_; } + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } + SlowPathCode* GetSlowPath() const { return slow_path_; } DECLARE_INSTRUCTION(SuspendCheck); private: const uint32_t dex_pc_; + // Only used for code generation, in order to share the same slow path between back edges + // of a same loop. + SlowPathCode* slow_path_; + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a8d006f..2375595 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -768,7 +768,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } else { DCHECK(!current->IsHighInterval()); - int hint = current->FindFirstRegisterHint(free_until); + int hint = current->FindFirstRegisterHint(free_until, liveness_); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; @@ -1101,8 +1101,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { - HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); - HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2); DCHECK(block_from != nullptr); DCHECK(block_to != nullptr); @@ -1111,6 +1111,41 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro return Split(interval, to); } + /* + * Non-linear control flow will force moves at every branch instruction to the new location. + * To avoid having all branches doing the moves, we find the next non-linear position and + * split the interval at this position. Take the following example (block number is the linear + * order position): + * + * B1 + * / \ + * B2 B3 + * \ / + * B4 + * + * B2 needs to split an interval, whose next use is in B4. If we were to split at the + * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval + * is now in the correct location. It makes performance worst if the interval is spilled + * and both B2 and B3 need to reload it before entering B4. + * + * By splitting at B3, we give a chance to the register allocator to allocate the + * interval to the same register as in B1, and therefore avoid doing any + * moves in B3. + */ + if (block_from->GetDominator() != nullptr) { + const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks(); + for (size_t i = 0; i < dominated.Size(); ++i) { + size_t position = dominated.Get(i)->GetLifetimeStart(); + if ((position > from) && (block_to->GetLifetimeStart() > position)) { + // Even if we found a better block, we continue iterating in case + // a dominated block is closer. + // Note that dominated blocks are not sorted in liveness order. + block_to = dominated.Get(i); + DCHECK_NE(block_to, block_from); + } + } + } + // If `to` is in a loop, find the outermost loop header which does not contain `from`. for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { HBasicBlock* header = it.Current()->GetHeader(); @@ -1467,23 +1502,28 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { - DCHECK(use == nullptr || use->GetPosition() >= range->GetStart()); + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - LocationSummary* locations = use->GetUser()->GetLocations(); - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } - } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } use = use->GetNext(); } @@ -1561,7 +1601,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + if (kIsDebugBuild) { + // Following uses can only be synthesized uses. + while (use != nullptr) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index b66e655..2a713cc 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->GetInstructionAt(local); + return GetLocalsFor(block)->Get(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->SetRawEnvAt(local, phi); + current_locals_->Put(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->SetRawEnvAt(local, value); + current_locals_->Put(local, value); } } @@ -520,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); // If the operation requests a specific type, we make sure its input is of that type. if (load->GetType() != value->GetType()) { if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { @@ -534,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -544,7 +544,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); - environment->CopyFrom(current_locals_); + environment->CopyFrom(*current_locals_); instruction->SetRawEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 265e95b..1c83c4b 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - HEnvironment* GetLocalsFor(HBasicBlock* block) { - HEnvironment* env = locals_for_.Get(block->GetBlockId()); - if (env == nullptr) { - env = new (GetGraph()->GetArena()) HEnvironment( + GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); + if (locals == nullptr) { + locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals_for_.Put(block->GetBlockId(), env); + locals->SetSize(GetGraph()->GetNumberOfVRegs()); + locals_for_.Put(block->GetBlockId(), locals); } - return env; + return locals; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor { static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); // Locals for the current block being visited. - HEnvironment* current_locals_; + GrowableArray<HInstruction*>* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. GrowableArray<HBasicBlock*> loop_headers_; // HEnvironment for each block. - GrowableArray<HEnvironment*> locals_for_; + GrowableArray<GrowableArray<HInstruction*>*> locals_for_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index b674f74..09a6648 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { - // We rely on having simplified the CFG. - DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); - number_of_forward_predecessors--; + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); } @@ -264,13 +262,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0); + size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); - current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), - back_edge->GetLifetimeEnd()); + current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } } @@ -322,7 +319,8 @@ static int RegisterOrLowRegister(Location location) { return location.IsPair() ? location.low() : location.reg(); } -int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { +int LiveInterval::FindFirstRegisterHint(size_t* free_until, + const SsaLivenessAnalysis& liveness) const { DCHECK(!IsHighInterval()); if (IsTemp()) return kNoRegister; @@ -336,12 +334,32 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { } } + if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. If one location is a register we return it as a hint. This + // will avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2); + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1; + // We know positions above GetStart() do not have a location yet. + if (position < GetStart()) { + LiveInterval* existing = GetParent()->GetSiblingAt(position); + if (existing != nullptr + && existing->HasRegister() + && (free_until[existing->GetRegister()] > GetStart())) { + return existing->GetRegister(); + } + } + } + } + UsePosition* use = first_use_; size_t start = GetStart(); size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= start) { + if (use_position >= start && !use->IsSynthesized()) { HInstruction* user = use->GetUser(); size_t input_index = use->GetInputIndex(); if (user->IsPhi()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b95276a..b550d8a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -23,6 +23,7 @@ namespace art { class CodeGenerator; +class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; @@ -112,12 +113,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { is_environment_(is_environment), position_(position), next_(next) { - DCHECK(user->IsPhi() + DCHECK((user == nullptr) + || user->IsPhi() || (GetPosition() == user->GetLifetimePosition() + 1) || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } + static constexpr size_t kNoInput = -1; + size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } @@ -126,14 +130,16 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } bool GetIsEnvironment() const { return is_environment_; } + bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } void Dump(std::ostream& stream) const { stream << position_; - if (is_environment_) { - stream << " (env)"; - } + } + + HLoopInformation* GetLoopInformation() const { + return user_->GetBlock()->GetLoopInformation(); } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -142,6 +148,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { next_ == nullptr ? nullptr : next_->Dup(allocator)); } + bool RequiresRegister() const { + if (GetIsEnvironment()) return false; + if (IsSynthesized()) return false; + Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); + return location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister); + } + private: HInstruction* const user_; const size_t input_index_; @@ -240,9 +255,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // location of the input just before that instruction (and not potential moves due // to splitting). position = instruction->GetLifetimePosition(); + } else if (!locations->InAt(input_index).IsValid()) { + return; } } + if (!is_environment && instruction->IsInLoop()) { + AddBackEdgeUses(*instruction->GetBlock()); + } + DCHECK(position == instruction->GetLifetimePosition() || position == instruction->GetLifetimePosition() + 1); @@ -306,6 +327,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->IsPhi()); + if (block->IsInLoop()) { + AddBackEdgeUses(*block); + } first_use_ = new (allocator_) UsePosition( instruction, input_index, false, block->GetLifetimeEnd(), first_use_); } @@ -456,27 +480,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return position; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { - return position; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return position; - } + + if (IsDefiningPosition(position) && DefinitionRequiresRegister()) { + return position; } UsePosition* use = first_use_; @@ -484,10 +490,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); if (use_position > position) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister)) { + if (use->RequiresRegister()) { return use_position; } } @@ -505,18 +508,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - if (defined_by_->GetLocations()->Out().IsValid()) { - return position; - } + if (IsDefiningPosition(position)) { + DCHECK(defined_by_->GetLocations()->Out().IsValid()); + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position && location.IsValid()) { + if (use_position > position) { return use_position; } use = use->GetNext(); @@ -664,7 +665,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { stream << " "; } while ((use = use->GetNext()) != nullptr); } - stream << "}, {"; + stream << "}, { "; use = first_env_use_; if (use != nullptr) { do { @@ -690,7 +691,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns the first register hint that is at least free before // the value contained in `free_until`. If none is found, returns // `kNoRegister`. - int FindFirstRegisterHint(size_t* free_until) const; + int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const; // If there is enough at the definition site to find a register (for example // it uses the same input as the first input), returns the register as a hint. @@ -910,6 +911,104 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return range; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + + bool IsDefiningPosition(size_t position) const { + return IsParent() && (position == GetStart()); + } + + bool HasSynthesizeUseAt(size_t position) const { + UsePosition* use = first_use_; + while (use != nullptr) { + size_t use_position = use->GetPosition(); + if ((use_position == position) && use->IsSynthesized()) { + return true; + } + if (use_position > position) break; + use = use->GetNext(); + } + return false; + } + + void AddBackEdgeUses(const HBasicBlock& block_at_use) { + DCHECK(block_at_use.IsInLoop()); + // Add synthesized uses at the back edge of loops to help the register allocator. + // Note that this method is called in decreasing liveness order, to faciliate adding + // uses at the head of the `first_use_` linked list. Because below + // we iterate from inner-most to outer-most, which is in increasing liveness order, + // we need to take extra care of how the `first_use_` linked list is being updated. + UsePosition* first_in_new_list = nullptr; + UsePosition* last_in_new_list = nullptr; + for (HLoopInformationOutwardIterator it(block_at_use); + !it.Done(); + it.Advance()) { + HLoopInformation* current = it.Current(); + if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) { + // This interval is defined in the loop. We can stop going outward. + break; + } + + // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // all back edges is not necessary: anything used in the loop will have its use at the + // last back edge. If we want branches in a loop to have better register allocation than + // another branch, then it is the linear order we should change. + size_t back_edge_use_position = current->GetLifetimeEnd(); + if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + // There was a use already seen in this loop. Therefore the previous call to `AddUse` + // already inserted the backedge use. We can stop going outward. + DCHECK(HasSynthesizeUseAt(back_edge_use_position)); + break; + } + + DCHECK(last_in_new_list == nullptr + || back_edge_use_position > last_in_new_list->GetPosition()); + + UsePosition* new_use = new (allocator_) UsePosition( + nullptr, UsePosition::kNoInput, /* is_environment */ false, + back_edge_use_position, nullptr); + + if (last_in_new_list != nullptr) { + // Going outward. The latest created use needs to point to the new use. + last_in_new_list->SetNext(new_use); + } else { + // This is the inner-most loop. + DCHECK_EQ(current, block_at_use.GetLoopInformation()); + first_in_new_list = new_use; + } + last_in_new_list = new_use; + } + // Link the newly created linked list with `first_use_`. + if (last_in_new_list != nullptr) { + last_in_new_list->SetNext(first_use_); + first_use_ = first_in_new_list; + } + } + ArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test @@ -1022,14 +1121,18 @@ class SsaLivenessAnalysis : public ValueObject { } HBasicBlock* GetBlockFromPosition(size_t index) const { - HInstruction* instruction = GetInstructionFromPosition(index / 2); + HInstruction* instruction = GetInstructionFromPosition(index); if (instruction == nullptr) { // If we are at a block boundary, get the block following. - instruction = GetInstructionFromPosition((index / 2) + 1); + instruction = GetInstructionFromPosition(index + 1); } return instruction->GetBlock(); } + bool IsAtBlockBoundary(size_t index) const { + return GetInstructionFromPosition(index) == nullptr; + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 00c241b..4cc9c3e 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [5]\n" - " 1: IntConstant 4 [14, 8, 8]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [5, 8, 8]\n" + " 2: IntConstant 5 [5]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 8, succ: 6, 3\n" - " 5: Phi(0, 14) [12, 6, 6]\n" + "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n" + " 5: Phi(0, 2, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" "BasicBlock 3, pred: 2, succ: 5, 4\n" " 8: Equal(1, 1) [9]\n" " 9: If(8)\n" - "BasicBlock 4, pred: 3, succ: 8\n" + "BasicBlock 4, pred: 3, succ: 2\n" " 10: Goto\n" - "BasicBlock 5, pred: 3, succ: 8\n" + "BasicBlock 5, pred: 3, succ: 2\n" " 11: Goto\n" "BasicBlock 6, pred: 2, succ: 7\n" " 12: Return(5)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" - // Synthesized single back edge of loop. - "BasicBlock 8, pred: 5, 4, succ: 2\n" - " 14: Phi(1, 2) [5]\n" - " 15: Goto\n"; + " 13: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, |