diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2015-03-13 13:47:53 -0400 |
---|---|---|
committer | Mark Mendell <mark.p.mendell@intel.com> | 2015-03-13 14:01:43 -0400 |
commit | 3f6c7f61855172d3d9b7a9221baba76136088e7c (patch) | |
tree | b61ab89a880ae74f44956425f5c9794d73ef029d /compiler | |
parent | cc22e3946baf035c8732e9417ab132bfe663aa45 (diff) | |
download | art-3f6c7f61855172d3d9b7a9221baba76136088e7c.zip art-3f6c7f61855172d3d9b7a9221baba76136088e7c.tar.gz art-3f6c7f61855172d3d9b7a9221baba76136088e7c.tar.bz2 |
[optimizing] Improve x86, x86_64 code
Tweak the generated code to allow more use of constants and other small
changes
- Use test vs. compare to 0
- EmitMove of 0.0 should use xorps
- VisitCompare kPrimLong can use constants
- cmp/add/sub/mul on x86_64 can use constants if in int32_t range
- long bit operations on x86 examine long constant high/low to optimize
- Use 3 operand imulq if constant is in int32_t range
Change-Id: I2dd4010fdffa129fe00905b0020590fe95f3f926
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 98 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 118 | ||||
-rw-r--r-- | compiler/optimizing/locations.cc | 13 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 1 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 19 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 2 |
6 files changed, 192 insertions, 59 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a09ecb8..a693f85 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2734,26 +2734,45 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Label less, greater, done; switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { + Register left_low = left.AsRegisterPairLow<Register>(); + Register left_high = left.AsRegisterPairHigh<Register>(); + int32_t val_low = 0; + int32_t val_high = 0; + bool right_is_const = false; + + if (right.IsConstant()) { + DCHECK(right.GetConstant()->IsLongConstant()); + right_is_const = true; + int64_t val = right.GetConstant()->AsLongConstant()->GetValue(); + val_low = Low32Bits(val); + val_high = High32Bits(val); + } + if (right.IsRegisterPair()) { - __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>()); + __ cmpl(left_high, right.AsRegisterPairHigh<Register>()); } else if (right.IsDoubleStackSlot()) { - __ cmpl(left.AsRegisterPairHigh<Register>(), - Address(ESP, right.GetHighStackIndex(kX86WordSize))); + __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize))); } else { - DCHECK(right.IsConstant()) << right; - __ cmpl(left.AsRegisterPairHigh<Register>(), - Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); + DCHECK(right_is_const) << right; + if (val_high == 0) { + __ testl(left_high, left_high); + } else { + __ cmpl(left_high, Immediate(val_high)); + } } __ j(kLess, &less); // Signed compare. __ j(kGreater, &greater); // Signed compare. if (right.IsRegisterPair()) { - __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>()); + __ cmpl(left_low, right.AsRegisterPairLow<Register>()); } else if (right.IsDoubleStackSlot()) { - __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex())); + __ cmpl(left_low, Address(ESP, right.GetStackIndex())); } else { - DCHECK(right.IsConstant()) << right; - __ cmpl(left.AsRegisterPairLow<Register>(), - Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue()))); + DCHECK(right_is_const) << right; + if (val_low == 0) { + __ testl(left_low, left_low); + } else { + __ cmpl(left_low, Immediate(val_low)); + } } break; } @@ -3649,14 +3668,21 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } } else if (constant->IsFloatConstant()) { - float value = constant->AsFloatConstant()->GetValue(); - Immediate imm(bit_cast<float, int32_t>(value)); + float fp_value = constant->AsFloatConstant()->GetValue(); + int32_t value = bit_cast<float, int32_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp = static_cast<Register>(ensure_scratch.GetRegister()); - __ movl(temp, imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), temp); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // Easy handling of 0.0. + __ xorps(dest, dest); + } else { + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + Register temp = static_cast<Register>(ensure_scratch.GetRegister()); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); + } } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(ESP, destination.GetStackIndex()), imm); @@ -4111,18 +4137,38 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } else { DCHECK(second.IsConstant()) << second; int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); - Immediate low(Low32Bits(value)); - Immediate high(High32Bits(value)); + int32_t low_value = Low32Bits(value); + int32_t high_value = High32Bits(value); + Immediate low(low_value); + Immediate high(high_value); + Register first_low = first.AsRegisterPairLow<Register>(); + Register first_high = first.AsRegisterPairHigh<Register>(); if (instruction->IsAnd()) { - __ andl(first.AsRegisterPairLow<Register>(), low); - __ andl(first.AsRegisterPairHigh<Register>(), high); + if (low_value == 0) { + __ xorl(first_low, first_low); + } else if (low_value != -1) { + __ andl(first_low, low); + } + if (high_value == 0) { + __ xorl(first_high, first_high); + } else if (high_value != -1) { + __ andl(first_high, high); + } } else if (instruction->IsOr()) { - __ orl(first.AsRegisterPairLow<Register>(), low); - __ orl(first.AsRegisterPairHigh<Register>(), high); + if (low_value != 0) { + __ orl(first_low, low); + } + if (high_value != 0) { + __ orl(first_high, high); + } } else { DCHECK(instruction->IsXor()); - __ xorl(first.AsRegisterPairLow<Register>(), low); - __ xorl(first.AsRegisterPairHigh<Register>(), high); + if (low_value != 0) { + __ xorl(first_low, low); + } + if (high_value != 0) { + __ xorl(first_high, high); + } } } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 07ba95d..6780cbc 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -961,7 +961,7 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -987,7 +987,18 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { - __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>()); + CpuRegister left_reg = left.AsRegister<CpuRegister>(); + if (right.IsConstant()) { + int64_t value = right.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + if (value == 0) { + __ testq(left_reg, left_reg); + } else { + __ cmpq(left_reg, Immediate(static_cast<int32_t>(value))); + } + } else { + __ cmpq(left_reg, right.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimFloat: { @@ -1869,17 +1880,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. - HInstruction* rhs = add->InputAt(1); - bool is_int32_constant = false; - if (rhs->IsLongConstant()) { - int64_t value = rhs->AsLongConstant()->GetValue(); - if (static_cast<int32_t>(value) == value) { - is_int32_constant = true; - } - } - locations->SetInAt(1, - is_int32_constant ? Location::RegisterOrConstant(rhs) : - Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1977,7 +1978,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2011,7 +2012,13 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { break; } case Primitive::kPrimLong: { - __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second.IsConstant()) { + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); + } else { + __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } break; } @@ -2042,8 +2049,13 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1))); + if (locations->InAt(1).IsConstant()) { + // Can use 3 operand multiply. + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::SameAsFirstInput()); + } break; } case Primitive::kPrimFloat: @@ -2063,9 +2075,9 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { LocationSummary* locations = mul->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); switch (mul->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.Equals(locations->Out())); if (second.IsRegister()) { __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { @@ -2079,16 +2091,27 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { break; } case Primitive::kPrimLong: { - __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second.IsConstant()) { + int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + __ imulq(locations->Out().AsRegister<CpuRegister>(), + first.AsRegister<CpuRegister>(), + Immediate(static_cast<int32_t>(value))); + } else { + DCHECK(first.Equals(locations->Out())); + __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } break; } case Primitive::kPrimFloat: { + DCHECK(first.Equals(locations->Out())); __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { + DCHECK(first.Equals(locations->Out())); __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -3324,20 +3347,35 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (constant->IsFloatConstant()) { - Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue())); + float fp_value = constant->AsFloatConstant()->GetValue(); + int32_t value = bit_cast<float, int32_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - __ movl(CpuRegister(TMP), imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + // easy FP 0.0. + __ xorps(dest, dest); + } else { + __ movl(CpuRegister(TMP), imm); + __ movd(dest, CpuRegister(TMP)); + } } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); - Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue())); + double fp_value = constant->AsDoubleConstant()->GetValue(); + int64_t value = bit_cast<double, int64_t>(fp_value); + Immediate imm(value); if (destination.IsFpuRegister()) { - __ movq(CpuRegister(TMP), imm); - __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); + XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); + if (value == 0) { + __ xorpd(dest, dest); + } else { + __ movq(CpuRegister(TMP), imm); + __ movd(dest, CpuRegister(TMP)); + } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), imm); @@ -3677,8 +3715,9 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio if (instruction->GetType() == Primitive::kPrimInt) { locations->SetInAt(1, Location::Any()); } else { - // Request a register to avoid loading a 64bits constant. + // We can handle 32 bit constants. locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); } locations->SetOut(Location::SameAsFirstInput()); } @@ -3734,13 +3773,34 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } else { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + CpuRegister first_reg = first.AsRegister<CpuRegister>(); + bool second_is_constant = false; + int64_t value = 0; + if (second.IsConstant()) { + second_is_constant = true; + value = second.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(value)); + } + if (instruction->IsAnd()) { - __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ andq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ andq(first_reg, second.AsRegister<CpuRegister>()); + } } else if (instruction->IsOr()) { - __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ orq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ orq(first_reg, second.AsRegister<CpuRegister>()); + } } else { DCHECK(instruction->IsXor()); - __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (second_is_constant) { + __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); + } else { + __ xorq(first_reg, second.AsRegister<CpuRegister>()); + } } } } diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 4ac1fe8..a1ae670 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -56,6 +56,19 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { : Location::RequiresRegister(); } +Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) { + if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) { + return Location::RequiresRegister(); + } + + // Does the long constant fit in a 32 bit int? + int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue(); + + return IsInt<32>(value) + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresRegister(); +} + Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { return instruction->IsConstant() ? Location::ConstantLocation(instruction->AsConstant()) diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 566c0da..de876be 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -345,6 +345,7 @@ class Location : public ValueObject { } static Location RegisterOrConstant(HInstruction* instruction); + static Location RegisterOrInt32LongConstant(HInstruction* instruction); static Location ByteRegisterOrConstant(int reg, HInstruction* instruction); // The location of the first input to the instruction will be diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index f2704b7..bd155ed 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1277,6 +1277,14 @@ void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) { } +void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); // orq only supports 32b immediate. + EmitRex64(dst); + EmitComplex(1, Operand(dst), imm); +} + + void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitRex64(dst, src); @@ -1548,27 +1556,30 @@ void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) { void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) { + imulq(reg, reg, imm); +} + +void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); // imulq only supports 32b immediate. - EmitRex64(reg, reg); + EmitRex64(dst, reg); // See whether imm can be represented as a sign-extended 8bit value. int64_t v64 = imm.value(); if (IsInt<8>(v64)) { // Sign-extension works. EmitUint8(0x6B); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(reg)); EmitUint8(static_cast<uint8_t>(v64 & 0xFF)); } else { // Not representable, use full immediate. EmitUint8(0x69); - EmitOperand(reg.LowBits(), Operand(reg)); + EmitOperand(dst.LowBits(), Operand(reg)); EmitImmediate(imm); } } - void X86_64Assembler::imulq(CpuRegister reg, const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitRex64(reg, address); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 5dfcf45..495f74f 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -429,6 +429,7 @@ class X86_64Assembler FINAL : public Assembler { void orl(CpuRegister dst, CpuRegister src); void orl(CpuRegister reg, const Address& address); void orq(CpuRegister dst, CpuRegister src); + void orq(CpuRegister dst, const Immediate& imm); void xorl(CpuRegister dst, CpuRegister src); void xorl(CpuRegister dst, const Immediate& imm); @@ -467,6 +468,7 @@ class X86_64Assembler FINAL : public Assembler { void imulq(CpuRegister dst, CpuRegister src); void imulq(CpuRegister reg, const Immediate& imm); void imulq(CpuRegister reg, const Address& address); + void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg); void imull(const Address& address); |