summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator.cc8
-rw-r--r--compiler/optimizing/code_generator_x86.cc9
-rw-r--r--compiler/optimizing/code_generator_x86.h9
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc11
-rw-r--r--compiler/optimizing/code_generator_x86_64.h9
-rw-r--r--compiler/optimizing/codegen_test.cc18
-rw-r--r--compiler/optimizing/constant_folding_test.cc5
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc5
-rw-r--r--compiler/optimizing/intrinsics_x86.cc152
-rw-r--r--compiler/optimizing/intrinsics_x86.h3
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc208
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h3
-rw-r--r--compiler/optimizing/linearize_test.cc5
-rw-r--r--compiler/optimizing/live_ranges_test.cc25
-rw-r--r--compiler/optimizing/liveness_test.cc5
-rw-r--r--compiler/optimizing/register_allocator_test.cc65
-rw-r--r--compiler/utils/assembler_test.h61
-rw-r--r--compiler/utils/x86/assembler_x86.cc22
-rw-r--r--compiler/utils/x86/assembler_x86.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc24
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc8
-rw-r--r--disassembler/disassembler_x86.cc18
-rw-r--r--runtime/arch/x86/instruction_set_features_x86.h2
24 files changed, 629 insertions, 52 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bd6e943..9b1ef17 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -378,10 +378,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
case kMips:
return nullptr;
case kX86: {
- return new x86::CodeGeneratorX86(graph, compiler_options);
+ return new x86::CodeGeneratorX86(graph,
+ *isa_features.AsX86InstructionSetFeatures(),
+ compiler_options);
}
case kX86_64: {
- return new x86_64::CodeGeneratorX86_64(graph, compiler_options);
+ return new x86_64::CodeGeneratorX86_64(graph,
+ *isa_features.AsX86_64InstructionSetFeatures(),
+ compiler_options);
}
default:
return nullptr;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9b7e01c..f79dbc3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -360,7 +360,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32
return GetFloatingPointSpillSlotSize();
}
-CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
@@ -373,7 +375,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features) {
// Use a fake return address register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -1163,7 +1166,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2a26c86..0cc3c65 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -189,7 +189,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
class CodeGeneratorX86 : public CodeGenerator {
public:
- CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86() {}
void GenerateFrameEntry() OVERRIDE;
@@ -275,6 +277,10 @@ class CodeGeneratorX86 : public CodeGenerator {
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+ const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -283,6 +289,7 @@ class CodeGeneratorX86 : public CodeGenerator {
InstructionCodeGeneratorX86 instruction_visitor_;
ParallelMoveResolverX86 move_resolver_;
X86Assembler assembler_;
+ const X86InstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
};
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b8940e3..9958451 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -411,7 +411,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
-CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
@@ -425,7 +427,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -1233,7 +1236,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -1294,7 +1297,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 4b8f087..375c0b0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -195,7 +195,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
class CodeGeneratorX86_64 : public CodeGenerator {
public:
- CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86_64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -268,6 +270,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+ const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -276,6 +282,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
InstructionCodeGeneratorX86_64 instruction_visitor_;
ParallelMoveResolverX86_64 move_resolver_;
X86_64Assembler assembler_;
+ const X86_64InstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6053ad5..2be117b 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -19,6 +19,8 @@
#include "arch/instruction_set.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
InternalCodeAllocator allocator;
CompilerOptions compiler_options;
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
// We avoid doing a stack overflow check that requires the runtime being setup,
// by making sure the compiler knows the methods we are running are leaf methods.
codegenX86.CompileBaseline(&allocator, true);
@@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenARM, has_result, expected);
}
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
codegenX86_64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kX86_64) {
Run(allocator, codegenX86_64, has_result, expected);
@@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph,
compiler_options);
RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86) {
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86_64) {
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
}
}
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 6853d54..02ad675 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -16,6 +16,7 @@
#include <functional>
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(expected_before, actual_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HConstantFolding(graph).Run();
SSAChecker ssa_checker_cf(&allocator, graph);
ssa_checker_cf.Run();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index a644719..98ae1ec 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
@@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(actual_before, expected_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HDeadCodeElimination(graph).Run();
SSAChecker ssa_checker(&allocator, graph);
ssa_checker.Run();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 384737f..0740471 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86.h"
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000;
static constexpr int kDoubleNaNLow = 0x00000000;
static constexpr int kFloatNaN = 0x7FC00000;
+IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
}
@@ -719,6 +725,148 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
+ HInvoke* invoke,
+ X86Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
+// as it needs 64 bit instructions.
+void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
+ // Do we have instruction support?
+ if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ Register out = locations->Out().AsRegister<Register>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1191,11 +1339,7 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(LongReverse)
UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index e1e8260..4292ec7 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -32,7 +32,7 @@ class X86Assembler;
class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
};
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 736cea8..f6fa013 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86_64.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -30,6 +31,11 @@ namespace art {
namespace x86_64 {
+IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
}
@@ -614,6 +620,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
+ HInvoke* invoke,
+ X86_64Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out, false);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
+ __ movd(inPlusPointFive, out, true);
+
+ // Add in the input.
+ __ addsd(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movq(out, Immediate(kPrimLongMax));
+ // maxLong = long-to-double(out)
+ __ cvtsi2sd(maxLong, out, true);
+
+ // if inPlusPointFive >= maxLong goto done
+ __ comisd(inPlusPointFive, maxLong);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = double-to-long-truncate(input)
+ __ cvttsd2si(out, inPlusPointFive, true);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorq(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1009,11 +1212,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index dfae7fa..0e0e72c 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -32,7 +32,7 @@ class X86_64Assembler;
class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86_64* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
};
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f22b7a7..28c5555 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
#include <fstream>
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index c102c4f..61d6593 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
RemoveSuspendChecks(graph);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b0cfde..81250ca 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) {
graph->TryBuildingSsa();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 7c3a035..3951439 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = new (&allocator) HGraph(&allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
GrowableArray<LiveInterval*> intervals(&allocator, 0);
// Test with two intervals of the same range.
@@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
{
HGraph* graph = BuildDiv(&allocator, &div);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) {
locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 6f8b301..b13edb6 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -123,6 +123,16 @@ class AssemblerTest : public testing::Test {
fmt);
}
+ std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) {
+ return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
+ GetFPRegisters(),
+ GetFPRegisters(),
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ imm_bytes,
+ fmt);
+ }
+
std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
return RepeatTemplatedRegisters<FPReg, Reg>(f,
GetFPRegisters(),
@@ -448,6 +458,57 @@ class AssemblerTest : public testing::Test {
return str;
}
+ template <typename Reg1, typename Reg2>
+ std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
+ const std::vector<Reg1*> reg1_registers,
+ const std::vector<Reg2*> reg2_registers,
+ std::string (AssemblerTest::*GetName1)(const Reg1&),
+ std::string (AssemblerTest::*GetName2)(const Reg2&),
+ size_t imm_bytes,
+ std::string fmt) {
+ std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+ WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+ std::string str;
+ for (auto reg1 : reg1_registers) {
+ for (auto reg2 : reg2_registers) {
+ for (int64_t imm : imms) {
+ Imm new_imm = CreateImmediate(imm);
+ (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+ std::string base = fmt;
+
+ std::string reg1_string = (this->*GetName1)(*reg1);
+ size_t reg1_index;
+ while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+ base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+ }
+
+ std::string reg2_string = (this->*GetName2)(*reg2);
+ size_t reg2_index;
+ while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+ base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+ }
+
+ size_t imm_index = base.find(IMM_TOKEN);
+ if (imm_index != std::string::npos) {
+ std::ostringstream sreg;
+ sreg << imm;
+ std::string imm_string = sreg.str();
+ base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+ }
+
+ if (str.size() > 0) {
+ str += "\n";
+ }
+ str += base;
+ }
+ }
+ }
+ // Add a newline at the end.
+ str += "\n";
+ return str;
+ }
+
template <RegisterView kRegView>
std::string GetRegName(const Reg& reg) {
std::ostringstream sreg;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5773459..b3a1376 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -695,6 +695,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
}
+void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0B);
+ EmitXmmRegisterOperand(dst, src);
+ EmitUint8(imm.value());
+}
+
+
+void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0A);
+ EmitXmmRegisterOperand(dst, src);
+ EmitUint8(imm.value());
+}
+
+
void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6ccf2e3..bdf8843 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler {
void ucomiss(XmmRegister a, XmmRegister b);
void ucomisd(XmmRegister a, XmmRegister b);
+ void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
void sqrtsd(XmmRegister dst, XmmRegister src);
void sqrtss(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index bd155ed..e82d90c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -796,6 +796,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
}
+void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+ EmitUint8(imm.value());
+}
+
+
+void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+ EmitUint8(imm.value());
+}
+
+
void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 495f74f..39f781c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -353,6 +353,9 @@ class X86_64Assembler FINAL : public Assembler {
void ucomiss(XmmRegister a, XmmRegister b);
void ucomisd(XmmRegister a, XmmRegister b);
+ void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
void sqrtsd(XmmRegister dst, XmmRegister src);
void sqrtss(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00f508b..4402dfc 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -692,6 +692,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd");
}
+TEST_F(AssemblerX86_64Test, Roundss) {
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
+}
+
+TEST_F(AssemblerX86_64Test, Roundsd) {
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
+}
+
TEST_F(AssemblerX86_64Test, Xorps) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps");
}
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 203488d..a1834e1 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -561,6 +561,24 @@ DISASSEMBLER_ENTRY(cmp,
instr++;
if (prefix[2] == 0x66) {
switch (*instr) {
+ case 0x0A:
+ opcode1 = "roundss";
+ prefix[2] = 0;
+ has_modrm = true;
+ store = true;
+ src_reg_file = SSE;
+ dst_reg_file = SSE;
+ immediate_bytes = 1;
+ break;
+ case 0x0B:
+ opcode1 = "roundsd";
+ prefix[2] = 0;
+ has_modrm = true;
+ store = true;
+ src_reg_file = SSE;
+ dst_reg_file = SSE;
+ immediate_bytes = 1;
+ break;
case 0x14:
opcode1 = "pextrb";
prefix[2] = 0;
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 926fabb..7b61245 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -58,6 +58,8 @@ class X86InstructionSetFeatures : public InstructionSetFeatures {
virtual ~X86InstructionSetFeatures() {}
+ bool HasSSE4_1() const { return has_SSE4_1_; }
+
protected:
// Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
virtual const InstructionSetFeatures*