diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 52 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 11 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 14 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 33 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 34 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/graph_checker.cc | 113 | ||||
-rw-r--r-- | compiler/optimizing/graph_checker.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/locations.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 44 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 12 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 15 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 52 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 3 | ||||
-rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/ssa_phi_elimination.cc | 6 |
19 files changed, 376 insertions, 47 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2547a29..3231c99 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -44,6 +44,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs() + GetGraph()->GetNumberOfTemporaries() + 1 /* filler */, + 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() + 1 /* current method */); GenerateFrameEntry(); @@ -111,10 +112,15 @@ size_t CodeGenerator::AllocateFreeRegisterInternal( return -1; } -void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots) { +void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots) { + first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; + SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize + + maximum_number_of_live_registers * GetWordSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -468,4 +474,48 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } } +size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) { + return first_register_slot_in_slow_path_ + index * GetWordSize(); +} + +void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + SaveCoreRegister(Location::StackSlot(stack_offset), i); + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + +void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + RestoreCoreRegister(Location::StackSlot(stack_offset), i); + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a83d703..55f5d8d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -98,7 +98,9 @@ class CodeGenerator : public ArenaObject { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; - void ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots); + void ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots); virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; @@ -114,6 +116,8 @@ class CodeGenerator : public ArenaObject { virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0; void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); @@ -128,6 +132,8 @@ class CodeGenerator : public ArenaObject { void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector<uint8_t>* vector); + void SaveLiveRegisters(LocationSummary* locations); + void RestoreLiveRegisters(LocationSummary* locations); bool IsLeafMethod() const { return is_leaf_; @@ -141,6 +147,7 @@ class CodeGenerator : public ArenaObject { CodeGenerator(HGraph* graph, size_t number_of_registers) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), + first_register_slot_in_slow_path_(0), graph_(graph), block_labels_(graph->GetArena(), 0), pc_infos_(graph->GetArena(), 32), @@ -166,9 +173,11 @@ class CodeGenerator : public ArenaObject { // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; + uint32_t first_register_slot_in_slow_path_; private: void InitLocations(HInstruction* instruction); + size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ad62279..206ed13 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -98,10 +98,12 @@ class SuspendCheckSlowPathARM : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pTestSuspend).Int32Value(); __ ldr(LR, Address(TR, offset)); __ blx(LR); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ b(GetReturnLabel()); } @@ -182,6 +184,14 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << ArmManagedRegister::FromDRegister(DRegister(reg)); } +void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ str(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + +void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ ldr(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), @@ -577,7 +587,7 @@ void LocationsBuilderARM::VisitIf(HIf* if_instr) { DCHECK(cond->IsCondition()); HCondition* condition = cond->AsCondition(); if (condition->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); + locations->SetInAt(0, Location::RequiresRegister()); } } @@ -590,7 +600,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), EQ); + __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); } else { // Condition has not been materialized, use its inputs as the comparison and its // condition as the branch condition. diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 2480960..0902fb8 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -132,6 +132,8 @@ class CodeGeneratorARM : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kArmWordSize; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 3383cb2..0db4311 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -122,8 +122,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ jmp(GetReturnLabel()); } @@ -161,6 +163,14 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); } +void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id)); +} + +void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex())); +} + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), @@ -541,14 +551,18 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { DCHECK(cond->IsCondition()); HCondition* condition = cond->AsCondition(); if (condition->NeedsMaterialization()) { - // Materialized condition, compare against 0 - Location lhs = if_instr->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0)); - } else { - __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); + // Moves do not affect the eflags register, so if the condition is evaluated + // just before the if, we don't need to evaluate it again. + if (!condition->IsBeforeWhenDisregardMoves(if_instr)) { + // Materialized condition, compare against 0 + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); + } } - __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } else { Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); @@ -625,6 +639,9 @@ void LocationsBuilderX86::VisitCondition(HCondition* comp) { void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { if (comp->NeedsMaterialization()) { LocationSummary* locations = comp->GetLocations(); + Register reg = locations->Out().AsX86().AsCpuRegister(); + // Clear register: setcc only sets the low byte. + __ xorl(reg, reg); if (locations->InAt(1).IsRegister()) { __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), locations->InAt(1).AsX86().AsCpuRegister()); @@ -636,7 +653,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(), Address(ESP, locations->InAt(1).GetStackIndex())); } - __ setb(X86Condition(comp->GetCondition()), locations->Out().AsX86().AsCpuRegister()); + __ setb(X86Condition(comp->GetCondition()), reg); } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f1be0ad..ffcaf60 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -134,6 +134,8 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86WordSize; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ca03af8..56198af 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -103,8 +103,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true)); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ jmp(GetReturnLabel()); } @@ -170,6 +172,14 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); } +void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id)); +} + +void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex())); +} + CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), @@ -424,14 +434,18 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { DCHECK(cond->IsCondition()); HCondition* condition = cond->AsCondition(); if (condition->NeedsMaterialization()) { - // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); - if (lhs.IsRegister()) { - __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0)); - } else { - __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); + // Moves do not affect the eflags register, so if the condition is evaluated + // just before the if, we don't need to evaluate it again. + if (!condition->IsBeforeWhenDisregardMoves(if_instr)) { + // Materialized condition, compare against 0. + Location lhs = if_instr->GetLocations()->InAt(0); + if (lhs.IsRegister()) { + __ cmpl(lhs.AsX86_64().AsCpuRegister(), Immediate(0)); + } else { + __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); + } } - __ j(kEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); } else { Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); @@ -505,6 +519,9 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { if (comp->NeedsMaterialization()) { LocationSummary* locations = comp->GetLocations(); + CpuRegister reg = locations->Out().AsX86_64().AsCpuRegister(); + // Clear register: setcc only sets the low byte. + __ xorq(reg, reg); if (locations->InAt(1).IsRegister()) { __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), locations->InAt(1).AsX86_64().AsCpuRegister()); @@ -515,8 +532,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); } - __ setcc(X86_64Condition(comp->GetCondition()), - comp->GetLocations()->Out().AsX86_64().AsCpuRegister()); + __ setcc(X86_64Condition(comp->GetCondition()), reg); } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 78b60fe..ea21872 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -131,6 +131,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index ad9ed0c..e36b1cd 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -20,6 +20,8 @@ #include <map> #include <sstream> +#include "base/bit_vector-inl.h" + namespace art { void GraphChecker::VisitBasicBlock(HBasicBlock* block) { @@ -158,6 +160,73 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { } } } + + if (block->IsLoopHeader()) { + CheckLoop(block); + } +} + +void SSAChecker::CheckLoop(HBasicBlock* loop_header) { + int id = loop_header->GetBlockId(); + + // Ensure the pre-header block is first in the list of + // predecessors of a loop header. + if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { + std::stringstream error; + error << "Loop pre-header is not the first predecessor of the loop header " + << id << "."; + errors_.Insert(error.str()); + } + + // Ensure the loop header has only two predecessors and that only the + // second one is a back edge. + if (loop_header->GetPredecessors().Size() < 2) { + std::stringstream error; + error << "Loop header " << id << " has less than two predecessors."; + errors_.Insert(error.str()); + } else if (loop_header->GetPredecessors().Size() > 2) { + std::stringstream error; + error << "Loop header " << id << " has more than two predecessors."; + errors_.Insert(error.str()); + } else { + HLoopInformation* loop_information = loop_header->GetLoopInformation(); + HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); + if (loop_information->IsBackEdge(first_predecessor)) { + std::stringstream error; + error << "First predecessor of loop header " << id << " is a back edge."; + errors_.Insert(error.str()); + } + HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); + if (!loop_information->IsBackEdge(second_predecessor)) { + std::stringstream error; + error << "Second predecessor of loop header " << id + << " is not a back edge."; + errors_.Insert(error.str()); + } + } + + // Ensure there is only one back edge per loop. + size_t num_back_edges = + loop_header->GetLoopInformation()->GetBackEdges().Size(); + if (num_back_edges != 1) { + std::stringstream error; + error << "Loop defined by header " << id << " has " + << num_back_edges << " back edge(s)."; + errors_.Insert(error.str()); + } + + // Ensure all blocks in the loop are dominated by the loop header. + const ArenaBitVector& loop_blocks = + loop_header->GetLoopInformation()->GetBlocks(); + for (uint32_t i : loop_blocks.Indexes()) { + HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); + if (!loop_header->Dominates(loop_block)) { + std::stringstream error; + error << "Loop block " << loop_block->GetBlockId() + << " not dominated by loop header " << id; + errors_.Insert(error.str()); + } + } } void SSAChecker::VisitInstruction(HInstruction* instruction) { @@ -180,4 +249,48 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } +void SSAChecker::VisitPhi(HPhi* phi) { + VisitInstruction(phi); + + // Ensure the first input of a phi is not itself. + if (phi->InputAt(0) == phi) { + std::stringstream error; + error << "Loop phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId() + << " is its own first input."; + errors_.Insert(error.str()); + } + + // Ensure the number of phi inputs is the same as the number of + // its predecessors. + const GrowableArray<HBasicBlock*>& predecessors = + phi->GetBlock()->GetPredecessors(); + if (phi->InputCount() != predecessors.Size()) { + std::stringstream error; + error << "Phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId() + << " has " << phi->InputCount() << " inputs, but block " + << phi->GetBlock()->GetBlockId() << " has " + << predecessors.Size() << " predecessors."; + errors_.Insert(error.str()); + } else { + // Ensure phi input at index I either comes from the Ith + // predecessor or from a block that dominates this predecessor. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* predecessor = predecessors.Get(i); + if (!(input->GetBlock() == predecessor + || input->GetBlock()->Dominates(predecessor))) { + std::stringstream error; + error << "Input " << input->GetId() << " at index " << i + << " of phi " << phi->GetId() + << " from block " << phi->GetBlock()->GetBlockId() + << " is not defined in predecessor number " << i + << " nor in a block dominating it."; + errors_.Insert(error.str()); + } + } + } +} + } // namespace art diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 8ddd399..34a770b 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -67,9 +67,12 @@ class SSAChecker : public GraphChecker { // Perform SSA form checks on `block`. virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + // Loop-related checks from block `loop_header`. + void CheckLoop(HBasicBlock* loop_header); - // Perform SSA form checks on `instruction`. + // Perform SSA form checks on instructions. virtual void VisitInstruction(HInstruction* instruction) OVERRIDE; + virtual void VisitPhi(HPhi* phi) OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(SSAChecker); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 7f64be4..0fb4737 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -82,6 +82,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } char GetTypeId(Primitive::Type type) { + // Note that Primitive::Descriptor would not work for us + // because it does not handle reference types (that is kPrimNot). switch (type) { case Primitive::kPrimBoolean: return 'z'; case Primitive::kPrimByte: return 'b'; @@ -127,6 +129,12 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } else if (location.IsConstant()) { output_ << "constant"; + HConstant* constant = location.GetConstant(); + if (constant->IsIntConstant()) { + output_ << " " << constant->AsIntConstant()->GetValue(); + } else if (constant->IsLongConstant()) { + output_ << " " << constant->AsLongConstant()->GetValue(); + } } else if (location.IsInvalid()) { output_ << "invalid"; } else if (location.IsStackSlot()) { diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index fce97bd..1c36cdf 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -28,7 +28,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), - live_registers_(0) { + live_registers_() { inputs_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 041e85b..06623b6 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -266,6 +266,34 @@ class Location : public ValueObject { uword value_; }; +class RegisterSet : public ValueObject { + public: + RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + + void Add(Location loc) { + // TODO: floating point registers. + core_registers_ |= (1 << loc.reg().RegId()); + } + + bool ContainsCoreRegister(uint32_t id) { + return Contains(core_registers_, id); + } + + bool ContainsFloatingPointRegister(uint32_t id) { + return Contains(floating_point_registers_, id); + } + + static bool Contains(uint32_t register_set, uint32_t reg) { + return (register_set & (1 << reg)) != 0; + } + + private: + uint32_t core_registers_; + uint32_t floating_point_registers_; + + DISALLOW_COPY_AND_ASSIGN(RegisterSet); +}; + /** * The code generator computes LocationSummary for each instruction so that * the instruction itself knows what code to generate: where to find the inputs @@ -327,6 +355,8 @@ class LocationSummary : public ArenaObject { Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } + bool WillCall() const { return call_kind_ == kCall; } + bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; } bool NeedsSafepoint() const { return CanCall(); } void SetStackBit(uint32_t index) { @@ -337,14 +367,22 @@ class LocationSummary : public ArenaObject { register_mask_ |= (1 << reg_id); } - void SetLiveRegister(uint32_t reg_id) { - live_registers_ |= (1 << reg_id); + bool RegisterContainsObject(uint32_t reg_id) { + return RegisterSet::Contains(register_mask_, reg_id); + } + + void AddLiveRegister(Location location) { + live_registers_.Add(location); } BitVector* GetStackMask() const { return stack_mask_; } + RegisterSet* GetLiveRegisters() { + return &live_registers_; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; @@ -359,7 +397,7 @@ class LocationSummary : public ArenaObject { uint32_t register_mask_; // Registers that are in use at this position. - uint32_t live_registers_; + RegisterSet live_registers_; DISALLOW_COPY_AND_ASSIGN(LocationSummary); }; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 72c5834..09412a9 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -555,14 +555,22 @@ bool HCondition::NeedsMaterialization() const { return true; } - // TODO: should we allow intervening instructions with no side-effect between this condition - // and the If instruction? + // TODO: if there is no intervening instructions with side-effect between this condition + // and the If instruction, we should move the condition just before the If. if (GetNext() != user) { return true; } return false; } +bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { + HInstruction* previous = if_->GetPrevious(); + while (previous != nullptr && previous->IsParallelMove()) { + previous = previous->GetPrevious(); + } + return previous == this; +} + bool HInstruction::Equals(HInstruction* other) const { if (!InstructionTypeEquals(other)) return false; DCHECK_EQ(GetKind(), other->GetKind()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 47c8eda..be6b355 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -383,6 +383,12 @@ class HBasicBlock : public ArenaObject { return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); } + bool IsLoopPreHeaderFirstPredecessor() const { + DCHECK(IsLoopHeader()); + DCHECK(!GetPredecessors().IsEmpty()); + return GetPredecessors().Get(0) == GetLoopInformation()->GetPreHeader(); + } + HLoopInformation* GetLoopInformation() const { return loop_information_; } @@ -606,7 +612,7 @@ class HInstruction : public ArenaObject { bool IsInLoop() const { return block_->IsInLoop(); } bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); } - virtual size_t InputCount() const = 0; + virtual size_t InputCount() const = 0; virtual HInstruction* InputAt(size_t i) const = 0; virtual void Accept(HGraphVisitor* visitor) = 0; @@ -1089,8 +1095,15 @@ class HCondition : public HBinaryOperation { : HBinaryOperation(Primitive::kPrimBoolean, first, second) {} virtual bool IsCommutative() { return true; } + + // For register allocation purposes, returns whether this instruction needs to be + // materialized (that is, not just be in the processor flags). bool NeedsMaterialization() const; + // For code generation purposes, returns whether this instruction is just before + // `if_`, and disregard moves in between. + bool IsBeforeWhenDisregardMoves(HIf* if_) const; + DECLARE_INSTRUCTION(Condition); virtual IfCondition GetCondition() const = 0; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 9ba75b8..1ac9b78 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -45,7 +45,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, number_of_registers_(-1), registers_array_(nullptr), blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())), - reserved_out_slots_(0) { + reserved_out_slots_(0), + maximum_number_of_live_registers_(0) { codegen->SetupBlockedRegisters(blocked_registers_); physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters()); // Always reserve for the current method and the graph's max out registers. @@ -157,9 +158,34 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { } } + bool core_register = (instruction->GetType() != Primitive::kPrimDouble) + && (instruction->GetType() != Primitive::kPrimFloat); + + GrowableArray<LiveInterval*>& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + if (locations->CanCall()) { - codegen_->MarkNotLeaf(); + if (!instruction->IsSuspendCheck()) { + codegen_->MarkNotLeaf(); + } safepoints_.Add(instruction); + if (locations->OnlyCallsOnSlowPath()) { + // We add a synthesized range at this position to record the live registers + // at this position. Ideally, we could just update the safepoints when locations + // are updated, but we currently need to know the full stack size before updating + // locations (because of parameters and the fact that we don't have a frame pointer). + // And knowing the full stack size requires to know the maximum number of live + // registers at calls in slow paths. + // By adding the following interval in the algorithm, we can compute this + // maximum before updating locations. + LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); + interval->AddRange(position, position + 1); + unhandled.Add(interval); + } + } + + if (locations->WillCall()) { // Block all registers. for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { BlockRegister(Location::RegisterLocation(ManagedRegister(i)), @@ -176,12 +202,6 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { } } - bool core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); - GrowableArray<LiveInterval*>& unhandled = core_register - ? unhandled_core_intervals_ - : unhandled_fp_intervals_; - LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; @@ -405,6 +425,14 @@ void RegisterAllocator::LinearScan() { } } + if (current->IsSlowPathSafepoint()) { + // Synthesized interval to record the maximum number of live registers + // at safepoints. No need to allocate a register for it. + maximum_number_of_live_registers_ = + std::max(maximum_number_of_live_registers_, active_.Size()); + continue; + } + // (4) Try to find an available register. bool success = TryAllocateFreeReg(current); @@ -930,14 +958,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LocationSummary* locations = safepoint->GetLocations(); if (!current->Covers(position)) continue; - if (current->GetType() == Primitive::kPrimNot) { - DCHECK(current->GetParent()->HasSpillSlot()); + if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } switch (source.GetKind()) { case Location::kRegister: { - locations->SetLiveRegister(source.reg().RegId()); + locations->AddLiveRegister(source); if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg().RegId()); } @@ -1020,7 +1047,8 @@ static Location FindLocationAt(LiveInterval* interval, size_t position) { } void RegisterAllocator::Resolve() { - codegen_->ComputeFrameSize(spill_slots_.Size(), reserved_out_slots_); + codegen_->ComputeFrameSize( + spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 7d397e3..3c305c8 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -179,6 +179,9 @@ class RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; + // The maximum live registers at safepoints. + size_t maximum_number_of_live_registers_; + FRIEND_TEST(RegisterAllocatorTest, FreeUntil); DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 33b1f1f..dea6181 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -138,7 +138,8 @@ class LiveInterval : public ArenaObject { HInstruction* defined_by = nullptr, bool is_fixed = false, int reg = kNoRegister, - bool is_temp = false) + bool is_temp = false, + bool is_slow_path_safepoint = false) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), @@ -150,8 +151,14 @@ class LiveInterval : public ArenaObject { spill_slot_(kNoSpillSlot), is_fixed_(is_fixed), is_temp_(is_temp), + is_slow_path_safepoint_(is_slow_path_safepoint), defined_by_(defined_by) {} + static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) { + return new (allocator) LiveInterval( + allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true); + } + static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); } @@ -163,6 +170,7 @@ class LiveInterval : public ArenaObject { } bool IsFixed() const { return is_fixed_; } + bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; } void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { // Set the use within the instruction. @@ -480,6 +488,9 @@ class LiveInterval : public ArenaObject { // Whether the interval is for a temporary. const bool is_temp_; + // Whether the interval is for a safepoint that calls on slow path. + const bool is_slow_path_safepoint_; + // The instruction represented by this interval. HInstruction* const defined_by_; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index d541a62..e02a182 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -83,10 +83,6 @@ void SsaDeadPhiElimination::Run() { } } -static bool LoopPreHeaderIsFirstPredecessor(HBasicBlock* block) { - return block->GetPredecessors().Get(0) == block->GetLoopInformation()->GetPreHeader(); -} - void SsaRedundantPhiElimination::Run() { // Add all phis in the worklist. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { @@ -109,7 +105,7 @@ void SsaRedundantPhiElimination::Run() { // A loop phi cannot have itself as the first phi. Note that this // check relies on our simplification pass ensuring the pre-header // block is first in the list of predecessors of the loop header. - DCHECK(!phi->IsLoopHeaderPhi() || LoopPreHeaderIsFirstPredecessor(phi->GetBlock())); + DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor()); DCHECK_NE(phi, candidate); for (size_t i = 1; i < phi->InputCount(); ++i) { |