summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Geoffray <ngeoffray@google.com>2015-03-27 09:29:06 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-03-27 09:29:07 +0000
commitb3665e3dfdd23cc7a2f17a0b53bb16205bf4151f (patch)
treed86be714298806cfcd6a16be674573369474e8f7
parent03910065cd025ecb07781b85c2240be69c202d75 (diff)
parent09ed1a3125849ec6ac07cb886e3c502e1dcfada2 (diff)
downloadart-b3665e3dfdd23cc7a2f17a0b53bb16205bf4151f.zip
art-b3665e3dfdd23cc7a2f17a0b53bb16205bf4151f.tar.gz
art-b3665e3dfdd23cc7a2f17a0b53bb16205bf4151f.tar.bz2
Merge "[optimizing] Implement X86 intrinsic support"
-rw-r--r--compiler/Android.mk1
-rw-r--r--compiler/optimizing/code_generator_x86.cc83
-rw-r--r--compiler/optimizing/code_generator_x86.h17
-rw-r--r--compiler/optimizing/graph_visualizer.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc1180
-rw-r--r--compiler/optimizing/intrinsics_x86.h83
-rw-r--r--compiler/utils/x86/assembler_x86.cc62
-rw-r--r--compiler/utils/x86/assembler_x86.h10
8 files changed, 1401 insertions, 37 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 6b0e6ff..0247c9d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -112,6 +112,7 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/intrinsics.cc \
optimizing/intrinsics_arm.cc \
optimizing/intrinsics_arm64.cc \
+ optimizing/intrinsics_x86.cc \
optimizing/intrinsics_x86_64.cc \
optimizing/licm.cc \
optimizing/locations.cc \
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4414a65..b18cdd5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -19,6 +19,8 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_x86.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
@@ -60,20 +62,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmReg
#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
-class SlowPathCodeX86 : public SlowPathCode {
- public:
- SlowPathCodeX86() : entry_label_(), exit_label_() {}
-
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
-
- private:
- Label entry_label_;
- Label exit_label_;
-
- DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86);
-};
-
class NullCheckSlowPathX86 : public SlowPathCodeX86 {
public:
explicit NullCheckSlowPathX86(HNullCheck* instruction) : instruction_(instruction) {}
@@ -1140,35 +1128,30 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
-void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-
- // TODO: Implement all kinds of calls:
- // 1) boot -> boot
- // 2) app -> boot
- // 3) app -> app
- //
- // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorX86 intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
- // temp = method;
- codegen_->LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
- // (temp + offset_of_quick_compiled_code)()
- __ call(Address(
- temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
- } else {
- __ call(codegen_->GetFrameEntryLabel());
+void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
}
- DCHECK(!codegen_->IsLeafMethod());
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>());
}
void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -2863,6 +2846,32 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
}
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Register temp) {
+ // TODO: Implement all kinds of calls:
+ // 1) boot -> boot
+ // 2) app -> boot
+ // 3) app -> app
+ //
+ // Currently we implement the app -> app logic, which looks up in the resolve cache.
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(
+ temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ } else {
+ __ call(GetFrameEntryLabel());
+ }
+
+ DCHECK(!IsLeafMethod());
+ RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) {
Label is_null;
__ testl(value, value);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c5763de..9b4b3db 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -228,6 +228,9 @@ class CodeGeneratorX86 : public CodeGenerator {
// Helper method to move a 64bits value between two locations.
void Move64(Location destination, Location source);
+ // Generate a call to a static or direct method.
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp);
+
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value);
@@ -261,6 +264,20 @@ class CodeGeneratorX86 : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
};
+class SlowPathCodeX86 : public SlowPathCode {
+ public:
+ SlowPathCodeX86() : entry_label_(), exit_label_() {}
+
+ Label* GetEntryLabel() { return &entry_label_; }
+ Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Label entry_label_;
+ Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86);
+};
+
} // namespace x86
} // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index cabfa48..49c0d38 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -149,6 +149,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
codegen_.DumpCoreRegister(output_, location.low());
output_ << " and ";
codegen_.DumpCoreRegister(output_, location.high());
+ } else if (location.IsUnallocated()) {
+ output_ << "<U>";
} else {
DCHECK(location.IsDoubleStackSlot());
output_ << "2x" << location.GetStackIndex() << "(sp)";
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
new file mode 100644
index 0000000..bcf947f
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -0,0 +1,1180 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_x86.h"
+
+#include "code_generator_x86.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/x86/assembler_x86.h"
+#include "utils/x86/constants_x86.h"
+
+namespace art {
+
+namespace x86 {
+
+static constexpr int kDoubleNaNHigh = 0x7FF80000;
+static constexpr int kDoubleNaNLow = 0x00000000;
+static constexpr int kFloatNaN = 0x7FC00000;
+
+X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
+ return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
+ return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ LocationSummary* res = invoke->GetLocations();
+ return res != nullptr && res->Intrinsified();
+}
+
+#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
+
+// TODO: target as memory.
+static void MoveFromReturnRegister(Location target,
+ Primitive::Type type,
+ CodeGeneratorX86* codegen) {
+ if (!target.IsValid()) {
+ DCHECK(type == Primitive::kPrimVoid);
+ return;
+ }
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ Register target_reg = target.AsRegister<Register>();
+ if (target_reg != EAX) {
+ __ movl(target_reg, EAX);
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ Register target_reg_lo = target.AsRegisterPairLow<Register>();
+ Register target_reg_hi = target.AsRegisterPairHigh<Register>();
+ if (target_reg_lo != EAX) {
+ __ movl(target_reg_lo, EAX);
+ }
+ if (target_reg_hi != EDX) {
+ __ movl(target_reg_hi, EDX);
+ }
+ break;
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected void type for valid location " << target;
+ UNREACHABLE();
+
+ case Primitive::kPrimDouble: {
+ XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
+ if (target_reg != XMM0) {
+ __ movsd(target_reg, XMM0);
+ }
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
+ if (target_reg != XMM0) {
+ __ movss(target_reg, XMM0);
+ }
+ break;
+ }
+ }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
+ if (invoke->InputCount() == 0) {
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+ InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(arena);
+
+ for (size_t i = 0; i < invoke->InputCount(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+// summary. If an intrinsic modifies those locations before a slowpath call, they must be
+// restored!
+class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
+ public:
+ explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
+ : invoke_(invoke) {
+ // The temporary register has to be EAX for x86 invokes.
+ DCHECK_EQ(temp, EAX);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
+ __ Bind(GetEntryLabel());
+
+ SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+ MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
+ } else {
+ UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+ UNREACHABLE();
+ }
+
+ // Copy the result back to the expected output.
+ Location out = invoke_->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
+ DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+ }
+
+ RestoreLiveRegisters(codegen, invoke_->GetLocations());
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The instruction where this slow path is happening.
+ HInvoke* const invoke_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
+};
+
+#undef __
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ if (is64bit) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ if (is64bit) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ // Need to use the temporary.
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movsd(temp, input.AsFpuRegister<XmmRegister>());
+ __ movd(output.AsRegisterPairLow<Register>(), temp);
+ __ psrlq(temp, Immediate(32));
+ __ movd(output.AsRegisterPairHigh<Register>(), temp);
+ } else {
+ __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
+ }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ // Need to use the temporary.
+ XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ __ movd(temp1, input.AsRegisterPairLow<Register>());
+ __ movd(temp2, input.AsRegisterPairHigh<Register>());
+ __ punpckldq(temp1, temp2);
+ __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
+ } else {
+ __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke, true);
+}
+void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke, true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke, false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+ Primitive::Type size,
+ X86Assembler* assembler) {
+ Register out = locations->Out().AsRegister<Register>();
+
+ switch (size) {
+ case Primitive::kPrimShort:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(out);
+ __ sarl(out, Immediate(16));
+ break;
+ case Primitive::kPrimInt:
+ __ bswapl(out);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+
+// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
+// need is 64b.
+
+static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
+ // TODO: Enable memory operations when the assembler supports them.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ // TODO: Allow x86 to work with memory. This requires assembler support, see below.
+ // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
+ Location output = locations->Out();
+
+ if (output.IsFpuRegister()) {
+ // Create the right constant on an aligned stack.
+ if (is64bit) {
+ __ subl(ESP, Immediate(8));
+ __ pushl(Immediate(0x7FFFFFFF));
+ __ pushl(Immediate(0xFFFFFFFF));
+ __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+ } else {
+ __ subl(ESP, Immediate(12));
+ __ pushl(Immediate(0x7FFFFFFF));
+ __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+ }
+ __ addl(ESP, Immediate(16));
+ } else {
+ // TODO: update when assember support is available.
+ UNIMPLEMENTED(FATAL) << "Needs assembler support.";
+// Once assembler support is available, in-memory operations look like this:
+// if (is64bit) {
+// DCHECK(output.IsDoubleStackSlot());
+// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
+// Immediate(0x7FFFFFFF));
+// } else {
+// DCHECK(output.IsStackSlot());
+// // Can use and with a literal directly.
+// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
+// }
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
+ CreateFloatToFloat(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
+ CreateFloatToFloat(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RegisterLocation(EAX));
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RegisterLocation(EDX));
+}
+
+static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
+ Location output = locations->Out();
+ Register out = output.AsRegister<Register>();
+ DCHECK_EQ(out, EAX);
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ DCHECK_EQ(temp, EDX);
+
+ // Sign extend EAX into EDX.
+ __ cdq();
+
+ // XOR EAX with sign.
+ __ xorl(EAX, EDX);
+
+ // Subtract out sign to correct.
+ __ subl(EAX, EDX);
+
+ // The result is in EAX.
+}
+
+static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ // Compute the sign into the temporary.
+ __ movl(temp, input_hi);
+ __ sarl(temp, Immediate(31));
+
+ // Store the sign into the output.
+ __ movl(output_lo, temp);
+ __ movl(output_hi, temp);
+
+ // XOR the input to the output.
+ __ xorl(output_lo, input_lo);
+ __ xorl(output_hi, input_hi);
+
+ // Subtract the sign.
+ __ subl(output_lo, temp);
+ __ sbbl(output_hi, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
+ CreateAbsIntLocation(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
+ CreateAbsLongLocation(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
+ GenAbsLong(invoke->GetLocations(), GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
+ X86Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ Label nan, done, op2_label;
+ if (is_double) {
+ __ ucomisd(out, op2);
+ } else {
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (is_double) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (is_double) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (is_double) {
+ __ pushl(Immediate(kDoubleNaNHigh));
+ __ pushl(Immediate(kDoubleNaNLow));
+ __ movsd(out, Address(ESP, 0));
+ __ addl(ESP, Immediate(8));
+ } else {
+ __ pushl(Immediate(kFloatNaN));
+ __ movss(out, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (is_double) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
+ X86Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ if (is_long) {
+ // Need to perform a subtract to get the sign right.
+ // op1 is already in the same location as the output.
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+ Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+ // Spare register to compute the subtraction to set condition code.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ // Subtract off op2_low.
+ __ movl(temp, output_lo);
+ __ subl(temp, op2_lo);
+
+ // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
+ __ movl(temp, output_hi);
+ __ sbbl(temp, op2_hi);
+
+ // Now the condition code is correct.
+ Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+ __ cmovl(cond, output_lo, op2_lo);
+ __ cmovl(cond, output_hi, op2_hi);
+ } else {
+ Register out = locations->Out().AsRegister<Register>();
+ Register op2 = op2_loc.AsRegister<Register>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ __ cmpl(out, op2);
+ Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+ __ cmovl(cond, out, op2);
+ }
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Register to use to perform a long subtract to set cc.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
+ CreateLongLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
+ CreateLongLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+
+ GetAssembler()->sqrtsd(out, in);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
+ // The inputs plus one temp.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Location of reference to data array
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ // Starting offset within data array
+ const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
+ // Start of char data with array_
+ const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+ Register obj = locations->InAt(0).AsRegister<Register>();
+ Register idx = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
+
+ // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+ // the cost.
+ // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+ // we will not optimize the code for constants (which would save a register).
+
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
+ codegen_->AddSlowPath(slow_path);
+
+ X86Assembler* assembler = GetAssembler();
+
+ __ cmpl(idx, Address(obj, count_offset));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ j(kAboveEqual, slow_path->GetEntryLabel());
+
+ // Get the actual element.
+ __ movl(temp, idx); // temp := idx.
+ __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
+ __ movl(out, Address(obj, value_offset)); // obj := obj.array.
+ // out = out[2*temp].
+ __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
+ Register address = locations->InAt(0).AsRegisterPairLow<Register>();
+ Location out_loc = locations->Out();
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
+ break;
+ case Primitive::kPrimShort:
+ __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
+ break;
+ case Primitive::kPrimInt:
+ __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
+ break;
+ case Primitive::kPrimLong:
+ __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
+ __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for peek: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateLongToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateLongToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateLongToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ HInstruction *value = invoke->InputAt(1);
+ if (size == Primitive::kPrimByte) {
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
+ } else {
+ locations->SetInAt(1, Location::RegisterOrConstant(value));
+ }
+}
+
+static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
+ Register address = locations->InAt(0).AsRegisterPairLow<Register>();
+ Location value_loc = locations->InAt(1);
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ if (value_loc.IsConstant()) {
+ __ movb(Address(address, 0),
+ Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
+ } else {
+ __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
+ }
+ break;
+ case Primitive::kPrimShort:
+ if (value_loc.IsConstant()) {
+ __ movw(Address(address, 0),
+ Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
+ } else {
+ __ movw(Address(address, 0), value_loc.AsRegister<Register>());
+ }
+ break;
+ case Primitive::kPrimInt:
+ if (value_loc.IsConstant()) {
+ __ movl(Address(address, 0),
+ Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
+ } else {
+ __ movl(Address(address, 0), value_loc.AsRegister<Register>());
+ }
+ break;
+ case Primitive::kPrimLong:
+ if (value_loc.IsConstant()) {
+ int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
+ __ movl(Address(address, 0), Immediate(Low32Bits(value)));
+ __ movl(Address(address, 4), Immediate(High32Bits(value)));
+ } else {
+ __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
+ __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for poke: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
+ Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+ GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
+}
+
+static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
+ bool is_volatile, X86Assembler* assembler) {
+ Register base = locations->InAt(1).AsRegister<Register>();
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location output = locations->Out();
+
+ switch (type) {
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
+ break;
+
+ case Primitive::kPrimLong: {
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+ if (is_volatile) {
+ // Need to use a XMM to read atomically.
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ movd(output_lo, temp);
+ __ psrlq(temp, Immediate(32));
+ __ movd(output_hi, temp);
+ } else {
+ __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
+ }
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unsupported op size " << type;
+ UNREACHABLE();
+ }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
+ bool is_long, bool is_volatile) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ if (is_long) {
+ if (is_volatile) {
+ // Need to use XMM to read volatile.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, true, true);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+}
+
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+}
+
+
+static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
+ Primitive::Type type,
+ HInvoke* invoke,
+ bool is_volatile) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ // Ensure the value is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ } else if (type == Primitive::kPrimLong && is_volatile) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
+}
+
+// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
+// memory model.
+static void GenUnsafePut(LocationSummary* locations,
+ Primitive::Type type,
+ bool is_volatile,
+ CodeGeneratorX86* codegen) {
+ X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+ Register base = locations->InAt(1).AsRegister<Register>();
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location value_loc = locations->InAt(3);
+
+ if (type == Primitive::kPrimLong) {
+ Register value_lo = value_loc.AsRegisterPairLow<Register>();
+ Register value_hi = value_loc.AsRegisterPairHigh<Register>();
+ if (is_volatile) {
+ XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ __ movd(temp1, value_lo);
+ __ movd(temp2, value_hi);
+ __ punpckldq(temp1, temp2);
+ __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
+ } else {
+ __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
+ __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
+ }
+ } else {
+ __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
+ }
+
+ if (is_volatile) {
+ __ mfence();
+ }
+
+ if (type == Primitive::kPrimNot) {
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ base,
+ value_loc.AsRegister<Register>());
+ }
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name) \
+void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+} \
+void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+} // namespace x86
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
new file mode 100644
index 0000000..e1e8260
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace x86 {
+
+class CodeGeneratorX86;
+class X86Assembler;
+
+class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* arena_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
+};
+
+class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorX86(CodeGeneratorX86* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ X86Assembler* GetAssembler();
+
+ ArenaAllocator* GetAllocator();
+
+ CodeGeneratorX86* codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86);
+};
+
+} // namespace x86
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 90170ce..5773459 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -146,6 +146,12 @@ void X86Assembler::movl(const Address& dst, Label* lbl) {
EmitLabel(lbl, dst.length_ + 5);
}
+void X86Assembler::bswapl(Register dst) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC8 + dst);
+}
+
void X86Assembler::movzxb(Register dst, ByteRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -725,6 +731,32 @@ void X86Assembler::xorpd(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::andps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x54);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::andpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x54);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x56);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::xorps(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -733,6 +765,14 @@ void X86Assembler::xorps(XmmRegister dst, const Address& src) {
}
+void X86Assembler::orps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x56);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::xorps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -741,6 +781,14 @@ void X86Assembler::xorps(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::andps(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x54);
+ EmitOperand(dst, src);
+}
+
+
void X86Assembler::andpd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1090,6 +1138,13 @@ void X86Assembler::subl(Register reg, const Address& address) {
}
+void X86Assembler::subl(const Address& address, Register reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x29);
+ EmitOperand(reg, address);
+}
+
+
void X86Assembler::cdq() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x99);
@@ -1175,6 +1230,13 @@ void X86Assembler::sbbl(Register dst, const Address& address) {
}
+void X86Assembler::sbbl(const Address& address, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x19);
+ EmitOperand(src, address);
+}
+
+
void X86Assembler::incl(Register reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x40 + reg);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4d20db0..6ccf2e3 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -231,6 +231,8 @@ class X86Assembler FINAL : public Assembler {
void movl(const Address& dst, const Immediate& imm);
void movl(const Address& dst, Label* lbl);
+ void bswapl(Register dst);
+
void movzxb(Register dst, ByteRegister src);
void movzxb(Register dst, const Address& src);
void movsxb(Register dst, ByteRegister src);
@@ -318,7 +320,13 @@ class X86Assembler FINAL : public Assembler {
void xorps(XmmRegister dst, const Address& src);
void xorps(XmmRegister dst, XmmRegister src);
+ void andpd(XmmRegister dst, XmmRegister src);
void andpd(XmmRegister dst, const Address& src);
+ void andps(XmmRegister dst, XmmRegister src);
+ void andps(XmmRegister dst, const Address& src);
+
+ void orpd(XmmRegister dst, XmmRegister src);
+ void orps(XmmRegister dst, XmmRegister src);
void flds(const Address& src);
void fstps(const Address& dst);
@@ -389,6 +397,7 @@ class X86Assembler FINAL : public Assembler {
void subl(Register dst, Register src);
void subl(Register reg, const Immediate& imm);
void subl(Register reg, const Address& address);
+ void subl(const Address& address, Register src);
void cdq();
@@ -407,6 +416,7 @@ class X86Assembler FINAL : public Assembler {
void sbbl(Register dst, Register src);
void sbbl(Register reg, const Immediate& imm);
void sbbl(Register reg, const Address& address);
+ void sbbl(const Address& address, Register src);
void incl(Register reg);
void incl(const Address& address);