diff options
-rw-r--r-- | compiler/optimizing/code_generator.h | 16 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 248 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 22 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 15 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 126 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 24 | ||||
-rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 6 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 30 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 5 | ||||
-rw-r--r-- | test/458-long-to-fpu/expected.txt | 2 | ||||
-rw-r--r-- | test/458-long-to-fpu/info.txt | 2 | ||||
-rw-r--r-- | test/458-long-to-fpu/src/Main.java | 46 | ||||
-rw-r--r-- | test/Android.run-test.mk | 14 |
15 files changed, 424 insertions, 136 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5146afa..b8f4572 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -153,17 +153,13 @@ class CodeGenerator { virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; - virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } - virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - UNUSED(stack_index, reg_id); - UNIMPLEMENTED(FATAL); - UNREACHABLE(); - } + + virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + // Returns whether we should split long moves in parallel moves. + virtual bool ShouldSplitLongMoves() const { return false; } bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 07d88de..a09ecb8 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -673,8 +673,19 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { source.AsRegisterPairHigh<Register>()); } else if (source.IsFpuRegister()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else if (source.IsConstant()) { + HConstant* constant = source.GetConstant(); + int64_t value; + if (constant->IsLongConstant()) { + value = constant->AsLongConstant()->GetValue(); + } else { + DCHECK(constant->IsDoubleConstant()); + value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } + __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value))); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value))); } else { - DCHECK(source.IsDoubleStackSlot()); + DCHECK(source.IsDoubleStackSlot()) << source; EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), @@ -1555,8 +1566,6 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `int-to-byte' instruction. if (in.IsRegister()) { __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); - } else if (in.IsStackSlot()) { - __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); @@ -1760,6 +1769,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ addsd(result, temp); // result = double-to-float(result) __ cvtsd2ss(result, result); + // Restore low. + __ addl(low, Immediate(0x80000000)); break; } @@ -1807,6 +1818,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ addsd(result, constant); // result = result + temp __ addsd(result, temp); + // Restore low. + __ addl(low, Immediate(0x80000000)); break; } @@ -1892,10 +1905,15 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { if (second.IsRegisterPair()) { __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); - } else { + } else if (second.IsDoubleStackSlot()) { __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ adcl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); + __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); } break; } @@ -1965,10 +1983,15 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { if (second.IsRegisterPair()) { __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); - } else { + } else if (second.IsDoubleStackSlot()) { __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ sbbl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value))); + __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); } break; } @@ -1999,12 +2022,6 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { break; case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // TODO: Currently this handles only stack operands: - // - we don't have enough registers because we currently use Quick ABI. - // - by the time we have a working register allocator we will probably change the ABI - // and fix the above. - // - we don't have a way yet to request operands on stack but the base line compiler - // will leave the operands on the stack with Any(). locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); // Needed for imul on 32bits with 64bits output. @@ -2046,39 +2063,83 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } case Primitive::kPrimLong: { - DCHECK(second.IsDoubleStackSlot()); - Register in1_hi = first.AsRegisterPairHigh<Register>(); Register in1_lo = first.AsRegisterPairLow<Register>(); - Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); - Address in2_lo(ESP, second.GetStackIndex()); Register eax = locations->GetTemp(0).AsRegister<Register>(); Register edx = locations->GetTemp(1).AsRegister<Register>(); DCHECK_EQ(EAX, eax); DCHECK_EQ(EDX, edx); - // input: in1 - 64 bits, in2 - 64 bits + // input: in1 - 64 bits, in2 - 64 bits. // output: in1 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] // parts: in1.lo = (in1.lo * in2.lo)[31:0] - - __ movl(eax, in2_hi); - // eax <- in1.lo * in2.hi - __ imull(eax, in1_lo); - // in1.hi <- in1.hi * in2.lo - __ imull(in1_hi, in2_lo); - // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo - __ addl(in1_hi, eax); - // move in1_lo to eax to prepare for double precision - __ movl(eax, in1_lo); - // edx:eax <- in1.lo * in2.lo - __ mull(in2_lo); - // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] - __ addl(in1_hi, edx); - // in1.lo <- (in1.lo * in2.lo)[31:0]; - __ movl(in1_lo, eax); + if (second.IsConstant()) { + DCHECK(second.GetConstant()->IsLongConstant()); + + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + + __ movl(eax, high); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, low); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in2_lo to eax to prepare for double precision + __ movl(eax, low); + // edx:eax <- in1.lo * in2.lo + __ mull(in1_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else if (second.IsRegisterPair()) { + Register in2_hi = second.AsRegisterPairHigh<Register>(); + Register in2_lo = second.AsRegisterPairLow<Register>(); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } else { + DCHECK(second.IsDoubleStackSlot()) << second; + Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); + Address in2_lo(ESP, second.GetStackIndex()); + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + } break; } @@ -2237,7 +2298,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr } void LocationsBuilderX86::VisitDiv(HDiv* div) { - LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong + LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong) ? LocationSummary::kCall : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); @@ -2306,8 +2367,10 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); + LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) + ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); switch (type) { case Primitive::kPrimInt: { @@ -2646,7 +2709,6 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // TODO: we set any here but we don't handle constants locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -2674,18 +2736,24 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { case Primitive::kPrimLong: { if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairHigh<Register>(), Address(ESP, right.GetHighStackIndex(kX86WordSize))); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairHigh<Register>(), + Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>()); - } else { - DCHECK(right.IsDoubleStackSlot()); + } else if (right.IsDoubleStackSlot()) { __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex())); + } else { + DCHECK(right.IsConstant()) << right; + __ cmpl(left.AsRegisterPairLow<Register>(), + Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); } break; } @@ -2770,7 +2838,12 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + // The output overlaps in case of long: we don't want the low move to overwrite + // the object's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { // Long values can be loaded atomically into an XMM using movsd. @@ -2827,6 +2900,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, __ psrlq(temp, Immediate(32)); __ movd(out.AsRegisterPairHigh<Register>(), temp); } else { + DCHECK_NE(base, out.AsRegisterPairLow<Register>()); __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset)); @@ -3064,7 +3138,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in case of long: we don't want the low move to overwrite + // the array's location. + locations->SetOut(Location::RequiresRegister(), + (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap + : Location::kNoOutputOverlap); } void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { @@ -3138,6 +3216,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location out = locations->Out(); + DCHECK_NE(obj, out.AsRegisterPairLow<Register>()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset)); @@ -3569,8 +3648,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } - } else { - DCHECK(constant->IsFloatConstant()); + } else if (constant->IsFloatConstant()) { float value = constant->AsFloatConstant()->GetValue(); Immediate imm(bit_cast<float, int32_t>(value)); if (destination.IsFpuRegister()) { @@ -3583,6 +3661,43 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), imm); } + } else if (constant->IsLongConstant()) { + int64_t value = constant->AsLongConstant()->GetValue(); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsDoubleStackSlot()) { + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } else { + __ movl(destination.AsRegisterPairLow<Register>(), low); + __ movl(destination.AsRegisterPairHigh<Register>(), high); + } + } else { + DCHECK(constant->IsDoubleConstant()); + double dbl_value = constant->AsDoubleConstant()->GetValue(); + int64_t value = bit_cast<double, int64_t>(dbl_value); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + if (destination.IsFpuRegister()) { + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // Easy handling of 0.0. + __ xorpd(dest, dest); + } else { + __ pushl(high); + __ pushl(low); + __ movsd(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(8)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ movl(Address(ESP, destination.GetStackIndex()), low); + __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high); + } } } else { LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source; @@ -3650,6 +3765,33 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (destination.IsFpuRegister() && source.IsStackSlot()) { Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = source.AsFpuRegister<XmmRegister>(); + Address stack(ESP, destination.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(stack, reg); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) { + // Take advantage of the 16 bytes in the XMM register. + XmmRegister reg = destination.AsFpuRegister<XmmRegister>(); + Address stack(ESP, source.GetStackIndex()); + // Load the double into the high doubleword. + __ movhpd(reg, stack); + + // Store the low double into the destination. + __ movsd(stack, reg); + + // Move the high double to the low double. + __ psrldq(reg, Immediate(8)); + } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { + Exchange(destination.GetStackIndex(), source.GetStackIndex()); + Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } @@ -3951,7 +4093,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); } - } else { + } else if (second.IsDoubleStackSlot()) { if (instruction->IsAnd()) { __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex())); __ andl(first.AsRegisterPairHigh<Register>(), @@ -3966,6 +4108,22 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr __ xorl(first.AsRegisterPairHigh<Register>(), Address(ESP, second.GetHighStackIndex(kX86WordSize))); } + } else { + DCHECK(second.IsConstant()) << second; + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + Immediate low(Low32Bits(value)); + Immediate high(High32Bits(value)); + if (instruction->IsAnd()) { + __ andl(first.AsRegisterPairLow<Register>(), low); + __ andl(first.AsRegisterPairHigh<Register>(), high); + } else if (instruction->IsOr()) { + __ orl(first.AsRegisterPairLow<Register>(), low); + __ orl(first.AsRegisterPairHigh<Register>(), high); + } else { + DCHECK(instruction->IsXor()); + __ xorl(first.AsRegisterPairLow<Register>(), low); + __ xorl(first.AsRegisterPairHigh<Register>(), high); + } } } } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f5a9b7d..c5763de 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -245,6 +245,8 @@ class CodeGeneratorX86 : public CodeGenerator { return type == Primitive::kPrimLong; } + bool ShouldSplitLongMoves() const OVERRIDE { return true; } + Label* GetFrameEntryLabel() { return &frame_entry_label_; } private: diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 198cc15..566c0da 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -211,15 +211,25 @@ class Location : public ValueObject { } Location ToLow() const { - return IsRegisterPair() - ? Location::RegisterLocation(low()) - : Location::FpuRegisterLocation(low()); + if (IsRegisterPair()) { + return Location::RegisterLocation(low()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(low()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetStackIndex()); + } } Location ToHigh() const { - return IsRegisterPair() - ? Location::RegisterLocation(high()) - : Location::FpuRegisterLocation(high()); + if (IsRegisterPair()) { + return Location::RegisterLocation(high()); + } else if (IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(high()); + } else { + DCHECK(IsDoubleStackSlot()); + return Location::StackSlot(GetHighStackIndex(4)); + } } static uintptr_t EncodeStackIndex(intptr_t stack_index) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 942aa23..d4498a6 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -3310,8 +3310,19 @@ class HParallelMove : public HTemplateInstruction<0> { if (kIsDebugBuild) { if (instruction != nullptr) { for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) - << "Doing parallel moves for the same instruction."; + if (moves_.Get(i).GetInstruction() == instruction) { + // Special case the situation where the move is for the spill slot + // of the instruction. + if ((GetPrevious() == instruction) + || ((GetPrevious() == nullptr) + && instruction->IsPhi() + && instruction->GetBlock() == GetBlock())) { + DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + << "Doing parallel moves for the same instruction."; + } else { + DCHECK(false) << "Doing parallel moves for the same instruction."; + } + } } } for (size_t i = 0, e = moves_.Size(); i < e; ++i) { diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3470595..475d98c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -520,7 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_file, dex_compilation_unit, &pass_info_printer); - } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { + } else if (shouldOptimize && can_allocate_registers) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 748ab22..cecc210 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -16,6 +16,7 @@ #include "register_allocator.h" +#include <iostream> #include <sstream> #include "base/bit_vector-inl.h" @@ -32,6 +33,9 @@ static constexpr size_t kDefaultNumberOfSpillSlots = 4; // allocate SRegister. static int GetHighForLowRegister(int reg) { return reg + 1; } static bool IsLowRegister(int reg) { return (reg & 1) == 0; } +static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { + return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); +} RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, @@ -70,28 +74,13 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } -bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, +bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, InstructionSet instruction_set) { - if (!Supports(instruction_set)) { - return false; - } - if (instruction_set == kArm64 + return instruction_set == kArm64 || instruction_set == kX86_64 || instruction_set == kArm - || instruction_set == kThumb2) { - return true; - } - for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { - for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); - !it.Done(); - it.Advance()) { - HInstruction* current = it.Current(); - if (instruction_set == kX86 && current->GetType() == Primitive::kPrimLong) { - return false; - } - } - } - return true; + || instruction_set == kX86 + || instruction_set == kThumb2; } static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { @@ -771,8 +760,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { return false; } - if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) { - return false; + if (current->IsLowInterval()) { + // If the high register of this interval is not available, we need to spill. + int high_reg = current->GetHighInterval()->GetRegister(); + if (high_reg == kNoRegister) { + high_reg = GetHighForLowRegister(reg); + } + if (free_until[high_reg] == 0) { + return false; + } } current->SetRegister(reg); @@ -831,16 +827,18 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { return reg; } -bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position, - size_t first_register_use, - size_t* next_use) { +bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); DCHECK(active->HasRegister()); + if (active->IsFixed()) continue; + if (active->IsHighInterval()) continue; + if (first_register_use > next_use[active->GetRegister()]) continue; + // Split the first interval found. - if (first_register_use <= next_use[active->GetRegister()] - && !active->IsLowInterval() - && !active->IsHighInterval()) { + if (!active->IsLowInterval() || IsLowOfUnalignedPairInterval(active)) { LiveInterval* split = Split(active, position); active_.DeleteAt(i); if (split != active) { @@ -921,7 +919,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // When allocating the low part, we made sure the high register was available. DCHECK_LT(first_register_use, next_use[reg]); } else if (current->IsLowInterval()) { - reg = FindAvailableRegisterPair(next_use, current->GetStart()); + reg = FindAvailableRegisterPair(next_use, first_register_use); // We should spill if both registers are not available. should_spill = (first_register_use >= next_use[reg]) || (first_register_use >= next_use[GetHighForLowRegister(reg)]); @@ -934,14 +932,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { DCHECK_NE(reg, kNoRegister); if (should_spill) { DCHECK(!current->IsHighInterval()); - bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1)); + bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1)); if (current->IsLowInterval() && is_allocation_at_use_site - && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) { + && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(), + first_register_use, + next_use)) { // If we're allocating a register for `current` because the instruction at // that position requires it, but we think we should spill, then there are - // non-pair intervals blocking the allocation. We split the first - // interval found, and put ourselves first in the `unhandled_` list. + // non-pair intervals or unaligned pair intervals blocking the allocation. + // We split the first interval found, and put ourselves first in the + // `unhandled_` list. LiveInterval* existing = unhandled_->Peek(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); @@ -951,10 +952,15 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); LiveInterval* split = Split(current, first_register_use - 1); - DCHECK_NE(current, split) << "There is not enough registers available for " - << split->GetParent()->GetDefinedBy()->DebugName() << " " - << split->GetParent()->GetDefinedBy()->GetId() - << " at " << first_register_use - 1; + if (current == split) { + DumpInterval(std::cerr, current); + DumpAllIntervals(std::cerr); + // This situation has the potential to infinite loop, so we make it a non-debug CHECK. + CHECK(false) << "There is not enough registers available for " + << split->GetParent()->GetDefinedBy()->DebugName() << " " + << split->GetParent()->GetDefinedBy()->GetId() + << " at " << first_register_use - 1; + } AddSorted(unhandled_, split); } return false; @@ -1203,7 +1209,24 @@ static bool IsValidDestination(Location destination) { || destination.IsDoubleStackSlot(); } -void RegisterAllocator::AddInputMoveFor(HInstruction* user, +void RegisterAllocator::AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const { + if (type == Primitive::kPrimLong + && codegen_->ShouldSplitLongMoves() + // The parallel move resolver knows how to deal with long constants. + && !source.IsConstant()) { + move->AddMove(source.ToLow(), destination.ToLow(), instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr); + } else { + move->AddMove(source, destination, instruction); + } +} + +void RegisterAllocator::AddInputMoveFor(HInstruction* input, + HInstruction* user, Location source, Location destination) const { if (source.Equals(destination)) return; @@ -1222,7 +1245,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, move = previous->AsParallelMove(); } DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition()); - move->AddMove(source, destination, nullptr); + AddMove(move, source, destination, nullptr, input->GetType()); } static bool IsInstructionStart(size_t position) { @@ -1251,8 +1274,16 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, at = liveness_.GetInstructionFromPosition((position + 1) / 2); // Note that parallel moves may have already been inserted, so we explicitly // ask for the first instruction of the block: `GetInstructionFromPosition` does - // not contain the moves. + // not contain the `HParallelMove` instructions. at = at->GetBlock()->GetFirstInstruction(); + + if (at->GetLifetimePosition() < position) { + // We may insert moves for split siblings and phi spills at the beginning of the block. + // Since this is a different lifetime position, we need to go to the next instruction. + DCHECK(at->IsParallelMove()); + at = at->GetNext(); + } + if (at->GetLifetimePosition() != position) { DCHECK_GT(at->GetLifetimePosition(), position); move = new (allocator_) HParallelMove(allocator_); @@ -1294,7 +1325,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, } } DCHECK_EQ(move->GetLifetimePosition(), position); - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, @@ -1324,7 +1355,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, } else { move = previous->AsParallelMove(); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, @@ -1336,14 +1367,15 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* first = block->GetFirstInstruction(); HParallelMove* move = first->AsParallelMove(); + size_t position = block->GetLifetimeStart(); // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and input moves. - if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) { + if (move == nullptr || move->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(block->GetLifetimeStart()); + move->SetLifetimePosition(position); block->InsertInstructionBefore(move, first); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, @@ -1367,7 +1399,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, move->SetLifetimePosition(position); instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); } - move->AddMove(source, destination, instruction); + AddMove(move, source, destination, instruction, instruction->GetType()); } void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { @@ -1402,7 +1434,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (expected_location.IsUnallocated()) { locations->SetInAt(use->GetInputIndex(), source); } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); } } else { DCHECK(use->GetUser()->IsInvoke()); @@ -1657,7 +1689,7 @@ void RegisterAllocator::Resolve() { Location source = input->GetLiveInterval()->GetLocationAt( predecessor->GetLifetimeEnd() - 1); Location destination = phi->GetLiveInterval()->ToLocation(); - InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination); + InsertParallelMoveAtExitOf(predecessor, phi, source, destination); } } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 579f069..fcc6112 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -66,13 +66,6 @@ class RegisterAllocator { bool log_fatal_on_failure); static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); - static bool Supports(InstructionSet instruction_set) { - return instruction_set == kArm - || instruction_set == kArm64 - || instruction_set == kThumb2 - || instruction_set == kX86 - || instruction_set == kX86_64; - } size_t GetNumberOfSpillSlots() const { return int_spill_slots_.Size() @@ -121,12 +114,21 @@ class RegisterAllocator { Location source, Location destination) const; void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; - void AddInputMoveFor(HInstruction* user, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* input, + HInstruction* user, + Location source, + Location destination) const; void InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, Location destination) const; + void AddMove(HParallelMove* move, + Location source, + Location destination, + HInstruction* instruction, + Primitive::Type type) const; + // Helper methods. void AllocateRegistersInternal(); void ProcessInstruction(HInstruction* instruction); @@ -136,9 +138,11 @@ class RegisterAllocator { int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; int FindAvailableRegister(size_t* next_use) const; - // Try splitting an active non-pair interval at the given `position`. + // Try splitting an active non-pair or unaligned pair interval at the given `position`. // Returns whether it was successful at finding such an interval. - bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use); + bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, + size_t first_register_use, + size_t* next_use); ArenaAllocator* const allocator_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 9ff2f20..5787f0c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -373,13 +373,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (location.IsUnallocated()) { if ((location.GetPolicy() == Location::kRequiresRegister) || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) { + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { return position; } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) || (location.GetPolicy() == Location::kSameAsFirstInput && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { return position; } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return position; } } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 8f4208b..90170ce 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -451,6 +451,36 @@ void X86Assembler::movsd(XmmRegister dst, XmmRegister src) { } +void X86Assembler::movhpd(XmmRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x16); + EmitOperand(dst, src); +} + + +void X86Assembler::movhpd(const Address& dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x17); + EmitOperand(src, dst); +} + + +void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); +} + + void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 2dde907..4d20db0 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -277,6 +277,11 @@ class X86Assembler FINAL : public Assembler { void psrlq(XmmRegister reg, const Immediate& shift_count); void punpckldq(XmmRegister dst, XmmRegister src); + void movhpd(XmmRegister dst, const Address& src); + void movhpd(const Address& dst, XmmRegister src); + + void psrldq(XmmRegister reg, const Immediate& shift_count); + void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); diff --git a/test/458-long-to-fpu/expected.txt b/test/458-long-to-fpu/expected.txt new file mode 100644 index 0000000..daaac9e --- /dev/null +++ b/test/458-long-to-fpu/expected.txt @@ -0,0 +1,2 @@ +42 +42 diff --git a/test/458-long-to-fpu/info.txt b/test/458-long-to-fpu/info.txt new file mode 100644 index 0000000..7459cfb --- /dev/null +++ b/test/458-long-to-fpu/info.txt @@ -0,0 +1,2 @@ +Regression test for x86's code generator, which had a bug in +the long-to-float and long-to-double implementations. diff --git a/test/458-long-to-fpu/src/Main.java b/test/458-long-to-fpu/src/Main.java new file mode 100644 index 0000000..a8b6e78 --- /dev/null +++ b/test/458-long-to-fpu/src/Main.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + System.out.println(floatConvert(false)); + System.out.println(doubleConvert(false)); + } + + public static long floatConvert(boolean flag) { + if (flag) { + // Try defeating inlining. + floatConvert(false); + } + long l = myLong; + myFloat = (float)l; + return l; + } + + public static long doubleConvert(boolean flag) { + if (flag) { + // Try defeating inlining. + floatConvert(false); + } + long l = myLong; + myFloat = (float)l; + return l; + } + + public static long myLong = 42; + public static float myFloat = 2.0f; + public static double myDouble = 4.0d; +} diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index b85ece8..c666d35 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -333,20 +333,6 @@ endif TEST_ART_BROKEN_DEFAULT_RUN_TESTS := -# Tests known to be broken for the optimizing compiler on 32-bit targets due to -# inability to allocate registers for methods with long values. -TEST_ART_BROKEN_OPTIMIZING_32_RUN_TESTS := \ - 441-checker-inliner \ - 442-checker-constant-folding \ - -ifneq (,$(filter optimizing,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_32_RUN_TESTS),32) -endif - -TEST_ART_BROKEN_OPTIMIZING_32_RUN_TESTS := - # Known broken tests for the arm64 optimizing compiler backend. TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := |