summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorMark Mendell <mark.p.mendell@intel.com>2015-03-13 13:47:53 -0400
committerMark Mendell <mark.p.mendell@intel.com>2015-03-13 14:01:43 -0400
commit3f6c7f61855172d3d9b7a9221baba76136088e7c (patch)
treeb61ab89a880ae74f44956425f5c9794d73ef029d /compiler
parentcc22e3946baf035c8732e9417ab132bfe663aa45 (diff)
downloadart-3f6c7f61855172d3d9b7a9221baba76136088e7c.zip
art-3f6c7f61855172d3d9b7a9221baba76136088e7c.tar.gz
art-3f6c7f61855172d3d9b7a9221baba76136088e7c.tar.bz2
[optimizing] Improve x86, x86_64 code
Tweak the generated code to allow more use of constants and other small changes - Use test vs. compare to 0 - EmitMove of 0.0 should use xorps - VisitCompare kPrimLong can use constants - cmp/add/sub/mul on x86_64 can use constants if in int32_t range - long bit operations on x86 examine long constant high/low to optimize - Use 3 operand imulq if constant is in int32_t range Change-Id: I2dd4010fdffa129fe00905b0020590fe95f3f926 Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator_x86.cc98
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc118
-rw-r--r--compiler/optimizing/locations.cc13
-rw-r--r--compiler/optimizing/locations.h1
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc19
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h2
6 files changed, 192 insertions, 59 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a09ecb8..a693f85 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2734,26 +2734,45 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
Label less, greater, done;
switch (compare->InputAt(0)->GetType()) {
case Primitive::kPrimLong: {
+ Register left_low = left.AsRegisterPairLow<Register>();
+ Register left_high = left.AsRegisterPairHigh<Register>();
+ int32_t val_low = 0;
+ int32_t val_high = 0;
+ bool right_is_const = false;
+
+ if (right.IsConstant()) {
+ DCHECK(right.GetConstant()->IsLongConstant());
+ right_is_const = true;
+ int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
+ val_low = Low32Bits(val);
+ val_high = High32Bits(val);
+ }
+
if (right.IsRegisterPair()) {
- __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
+ __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
} else if (right.IsDoubleStackSlot()) {
- __ cmpl(left.AsRegisterPairHigh<Register>(),
- Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+ __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
} else {
- DCHECK(right.IsConstant()) << right;
- __ cmpl(left.AsRegisterPairHigh<Register>(),
- Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+ DCHECK(right_is_const) << right;
+ if (val_high == 0) {
+ __ testl(left_high, left_high);
+ } else {
+ __ cmpl(left_high, Immediate(val_high));
+ }
}
__ j(kLess, &less); // Signed compare.
__ j(kGreater, &greater); // Signed compare.
if (right.IsRegisterPair()) {
- __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
+ __ cmpl(left_low, right.AsRegisterPairLow<Register>());
} else if (right.IsDoubleStackSlot()) {
- __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+ __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
} else {
- DCHECK(right.IsConstant()) << right;
- __ cmpl(left.AsRegisterPairLow<Register>(),
- Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+ DCHECK(right_is_const) << right;
+ if (val_low == 0) {
+ __ testl(left_low, left_low);
+ } else {
+ __ cmpl(left_low, Immediate(val_low));
+ }
}
break;
}
@@ -3649,14 +3668,21 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
}
} else if (constant->IsFloatConstant()) {
- float value = constant->AsFloatConstant()->GetValue();
- Immediate imm(bit_cast<float, int32_t>(value));
+ float fp_value = constant->AsFloatConstant()->GetValue();
+ int32_t value = bit_cast<float, int32_t>(fp_value);
+ Immediate imm(value);
if (destination.IsFpuRegister()) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp = static_cast<Register>(ensure_scratch.GetRegister());
- __ movl(temp, imm);
- __ movd(destination.AsFpuRegister<XmmRegister>(), temp);
+ XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+ if (value == 0) {
+ // Easy handling of 0.0.
+ __ xorps(dest, dest);
+ } else {
+ ScratchRegisterScope ensure_scratch(
+ this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+ Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
+ }
} else {
DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(ESP, destination.GetStackIndex()), imm);
@@ -4111,18 +4137,38 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
} else {
DCHECK(second.IsConstant()) << second;
int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
- Immediate low(Low32Bits(value));
- Immediate high(High32Bits(value));
+ int32_t low_value = Low32Bits(value);
+ int32_t high_value = High32Bits(value);
+ Immediate low(low_value);
+ Immediate high(high_value);
+ Register first_low = first.AsRegisterPairLow<Register>();
+ Register first_high = first.AsRegisterPairHigh<Register>();
if (instruction->IsAnd()) {
- __ andl(first.AsRegisterPairLow<Register>(), low);
- __ andl(first.AsRegisterPairHigh<Register>(), high);
+ if (low_value == 0) {
+ __ xorl(first_low, first_low);
+ } else if (low_value != -1) {
+ __ andl(first_low, low);
+ }
+ if (high_value == 0) {
+ __ xorl(first_high, first_high);
+ } else if (high_value != -1) {
+ __ andl(first_high, high);
+ }
} else if (instruction->IsOr()) {
- __ orl(first.AsRegisterPairLow<Register>(), low);
- __ orl(first.AsRegisterPairHigh<Register>(), high);
+ if (low_value != 0) {
+ __ orl(first_low, low);
+ }
+ if (high_value != 0) {
+ __ orl(first_high, high);
+ }
} else {
DCHECK(instruction->IsXor());
- __ xorl(first.AsRegisterPairLow<Register>(), low);
- __ xorl(first.AsRegisterPairHigh<Register>(), high);
+ if (low_value != 0) {
+ __ xorl(first_low, low);
+ }
+ if (high_value != 0) {
+ __ xorl(first_high, high);
+ }
}
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 07ba95d..6780cbc 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -961,7 +961,7 @@ void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
switch (compare->InputAt(0)->GetType()) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -987,7 +987,18 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
Primitive::Type type = compare->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong: {
- __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
+ CpuRegister left_reg = left.AsRegister<CpuRegister>();
+ if (right.IsConstant()) {
+ int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(value));
+ if (value == 0) {
+ __ testq(left_reg, left_reg);
+ } else {
+ __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+ }
+ } else {
+ __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+ }
break;
}
case Primitive::kPrimFloat: {
@@ -1869,17 +1880,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
// We can use a leaq or addq if the constant can fit in an immediate.
- HInstruction* rhs = add->InputAt(1);
- bool is_int32_constant = false;
- if (rhs->IsLongConstant()) {
- int64_t value = rhs->AsLongConstant()->GetValue();
- if (static_cast<int32_t>(value) == value) {
- is_int32_constant = true;
- }
- }
- locations->SetInAt(1,
- is_int32_constant ? Location::RegisterOrConstant(rhs) :
- Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -1977,7 +1978,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2011,7 +2012,13 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
break;
}
case Primitive::kPrimLong: {
- __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (second.IsConstant()) {
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(value));
+ __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ }
break;
}
@@ -2042,8 +2049,13 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
+ if (locations->InAt(1).IsConstant()) {
+ // Can use 3 operand multiply.
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
break;
}
case Primitive::kPrimFloat:
@@ -2063,9 +2075,9 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
LocationSummary* locations = mul->GetLocations();
Location first = locations->InAt(0);
Location second = locations->InAt(1);
- DCHECK(first.Equals(locations->Out()));
switch (mul->GetResultType()) {
case Primitive::kPrimInt: {
+ DCHECK(first.Equals(locations->Out()));
if (second.IsRegister()) {
__ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
} else if (second.IsConstant()) {
@@ -2079,16 +2091,27 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
break;
}
case Primitive::kPrimLong: {
- __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (second.IsConstant()) {
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(value));
+ __ imulq(locations->Out().AsRegister<CpuRegister>(),
+ first.AsRegister<CpuRegister>(),
+ Immediate(static_cast<int32_t>(value)));
+ } else {
+ DCHECK(first.Equals(locations->Out()));
+ __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ }
break;
}
case Primitive::kPrimFloat: {
+ DCHECK(first.Equals(locations->Out()));
__ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
break;
}
case Primitive::kPrimDouble: {
+ DCHECK(first.Equals(locations->Out()));
__ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
break;
}
@@ -3324,20 +3347,35 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
} else if (constant->IsFloatConstant()) {
- Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+ float fp_value = constant->AsFloatConstant()->GetValue();
+ int32_t value = bit_cast<float, int32_t>(fp_value);
+ Immediate imm(value);
if (destination.IsFpuRegister()) {
- __ movl(CpuRegister(TMP), imm);
- __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+ XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+ if (value == 0) {
+ // easy FP 0.0.
+ __ xorps(dest, dest);
+ } else {
+ __ movl(CpuRegister(TMP), imm);
+ __ movd(dest, CpuRegister(TMP));
+ }
} else {
DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else {
DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
- Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+ double fp_value = constant->AsDoubleConstant()->GetValue();
+ int64_t value = bit_cast<double, int64_t>(fp_value);
+ Immediate imm(value);
if (destination.IsFpuRegister()) {
- __ movq(CpuRegister(TMP), imm);
- __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+ XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+ if (value == 0) {
+ __ xorpd(dest, dest);
+ } else {
+ __ movq(CpuRegister(TMP), imm);
+ __ movd(dest, CpuRegister(TMP));
+ }
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
__ movq(CpuRegister(TMP), imm);
@@ -3677,8 +3715,9 @@ void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instructio
if (instruction->GetType() == Primitive::kPrimInt) {
locations->SetInAt(1, Location::Any());
} else {
- // Request a register to avoid loading a 64bits constant.
+ // We can handle 32 bit constants.
locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
}
locations->SetOut(Location::SameAsFirstInput());
}
@@ -3734,13 +3773,34 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
}
} else {
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+ CpuRegister first_reg = first.AsRegister<CpuRegister>();
+ bool second_is_constant = false;
+ int64_t value = 0;
+ if (second.IsConstant()) {
+ second_is_constant = true;
+ value = second.GetConstant()->AsLongConstant()->GetValue();
+ DCHECK(IsInt<32>(value));
+ }
+
if (instruction->IsAnd()) {
- __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (second_is_constant) {
+ __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ andq(first_reg, second.AsRegister<CpuRegister>());
+ }
} else if (instruction->IsOr()) {
- __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (second_is_constant) {
+ __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ orq(first_reg, second.AsRegister<CpuRegister>());
+ }
} else {
DCHECK(instruction->IsXor());
- __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (second_is_constant) {
+ __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ xorq(first_reg, second.AsRegister<CpuRegister>());
+ }
}
}
}
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 4ac1fe8..a1ae670 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -56,6 +56,19 @@ Location Location::RegisterOrConstant(HInstruction* instruction) {
: Location::RequiresRegister();
}
+Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
+ if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) {
+ return Location::RequiresRegister();
+ }
+
+ // Does the long constant fit in a 32 bit int?
+ int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue();
+
+ return IsInt<32>(value)
+ ? Location::ConstantLocation(instruction->AsConstant())
+ : Location::RequiresRegister();
+}
+
Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
return instruction->IsConstant()
? Location::ConstantLocation(instruction->AsConstant())
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 566c0da..de876be 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -345,6 +345,7 @@ class Location : public ValueObject {
}
static Location RegisterOrConstant(HInstruction* instruction);
+ static Location RegisterOrInt32LongConstant(HInstruction* instruction);
static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
// The location of the first input to the instruction will be
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f2704b7..bd155ed 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1277,6 +1277,14 @@ void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
}
+void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ CHECK(imm.is_int32()); // orq only supports 32b immediate.
+ EmitRex64(dst);
+ EmitComplex(1, Operand(dst), imm);
+}
+
+
void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(dst, src);
@@ -1548,27 +1556,30 @@ void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
+ imulq(reg, reg, imm);
+}
+
+void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK(imm.is_int32()); // imulq only supports 32b immediate.
- EmitRex64(reg, reg);
+ EmitRex64(dst, reg);
// See whether imm can be represented as a sign-extended 8bit value.
int64_t v64 = imm.value();
if (IsInt<8>(v64)) {
// Sign-extension works.
EmitUint8(0x6B);
- EmitOperand(reg.LowBits(), Operand(reg));
+ EmitOperand(dst.LowBits(), Operand(reg));
EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
} else {
// Not representable, use full immediate.
EmitUint8(0x69);
- EmitOperand(reg.LowBits(), Operand(reg));
+ EmitOperand(dst.LowBits(), Operand(reg));
EmitImmediate(imm);
}
}
-
void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitRex64(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5dfcf45..495f74f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -429,6 +429,7 @@ class X86_64Assembler FINAL : public Assembler {
void orl(CpuRegister dst, CpuRegister src);
void orl(CpuRegister reg, const Address& address);
void orq(CpuRegister dst, CpuRegister src);
+ void orq(CpuRegister dst, const Immediate& imm);
void xorl(CpuRegister dst, CpuRegister src);
void xorl(CpuRegister dst, const Immediate& imm);
@@ -467,6 +468,7 @@ class X86_64Assembler FINAL : public Assembler {
void imulq(CpuRegister dst, CpuRegister src);
void imulq(CpuRegister reg, const Immediate& imm);
void imulq(CpuRegister reg, const Address& address);
+ void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
void imull(CpuRegister reg);
void imull(const Address& address);