diff options
author | Andreas Gampe <agampe@google.com> | 2014-12-29 17:43:08 -0800 |
---|---|---|
committer | Andreas Gampe <agampe@google.com> | 2015-01-15 10:21:11 -0800 |
commit | 71fb52fee246b7d511f520febbd73dc7a9bbca79 (patch) | |
tree | 444d91e910433aaf887bbdada28dfaa3160bebc2 | |
parent | 420457e6040184a6e1639a4c84fcc8e237bd8a3d (diff) | |
download | art-71fb52fee246b7d511f520febbd73dc7a9bbca79.zip art-71fb52fee246b7d511f520febbd73dc7a9bbca79.tar.gz art-71fb52fee246b7d511f520febbd73dc7a9bbca79.tar.bz2 |
ART: Optimizing compiler intrinsics
Add intrinsics infrastructure to the optimizing compiler.
Add almost all intrinsics supported by Quick to the x86-64 backend.
Further intrinsics require more assembler support.
Change-Id: I48de9b44c82886bb298d16e74e12a9506b8e8807
24 files changed, 1898 insertions, 75 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index db338f0..1f4a36c 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -98,6 +98,8 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/gvn.cc \ optimizing/inliner.cc \ optimizing/instruction_simplifier.cc \ + optimizing/intrinsics.cc \ + optimizing/intrinsics_x86_64.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ optimizing/optimization.cc \ diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 3039852..84c0d93 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -293,9 +293,9 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } } INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0), - INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0), + INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, kIntrinsicFlagToFloatingPoint), INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0), - INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0), + INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint), INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32), INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64), diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f6ca6c7..9c2facb 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = nullptr; if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( - arena_, number_of_arguments, return_type, dex_pc, table_index); + arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); } else if (optimized_invoke_type == kInterface) { invoke = new (arena_) HInvokeInterface( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1862061..c4ba0fd 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1190,7 +1190,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); // temp = temp[index_in_cache] __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())); + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7b19f44..6d2c3de 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1975,7 +1975,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir // Make sure that ArtMethod* is passed in W0 as per the calling convention DCHECK(temp.Is(w0)); size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() + - invoke->GetIndexInDexCache() * kHeapRefSize; + invoke->GetDexMethodIndex() * kHeapRefSize; // TODO: Implement all kinds of calls: // 1) boot -> boot diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 04e36cc..1a0df44 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1135,7 +1135,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec // temp = temp->dex_cache_resolved_methods_; __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); // (temp + offset_of_quick_compiled_code)() __ call(Address( temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5fc24f7..3d7f122 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -18,6 +18,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_x86_64.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> -class SlowPathCodeX86_64 : public SlowPathCode { - public: - SlowPathCodeX86_64() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); -}; - class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} @@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) { return kEqual; } +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + CpuRegister temp) { + // All registers are assumed to be correctly set up. + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + + DCHECK(!IsLeafMethod()); + RecordPcInfo(invoke, invoke->GetDexPc()); +} + void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); } @@ -1123,30 +1136,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } -void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorX86_64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); +void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->GenerateStaticOrDirectCall( + invoke, + invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -1182,10 +1196,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 343fba3..c501568 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -36,6 +36,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); +static constexpr bool kCoalescedImplicitNullCheck = false; + class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { public: InvokeDexCallingConvention() : CallingConvention( @@ -67,7 +69,20 @@ class InvokeDexCallingConventionVisitor { }; class CodeGeneratorX86_64; -class SlowPathCodeX86_64; + +class SlowPathCodeX86_64 : public SlowPathCode { + public: + SlowPathCodeX86_64() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); +}; class ParallelMoveResolverX86_64 : public ParallelMoveResolver { public: @@ -226,6 +241,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { return false; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 493d93f..532167c 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -44,10 +44,10 @@ void HInliner::Run() { instr_it.Advance()) { HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect(); if (current != nullptr) { - if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) { + if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) { if (kIsDebugBuild) { std::string callee_name = - PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile()); + PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); bool should_inline = callee_name.find("$inline$") != std::string::npos; CHECK(!should_inline) << "Could not inline " << callee_name; } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc new file mode 100644 index 0000000..fe0e7f2 --- /dev/null +++ b/compiler/optimizing/intrinsics.cc @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics.h" + +#include "dex/quick/dex_file_method_inliner.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "driver/compiler_driver.h" +#include "invoke_type.h" +#include "nodes.h" +#include "quick/inline_method_analyser.h" + +namespace art { + +// Function that returns whether an intrinsic is static/direct or virtual. +static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kInterface; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + return IsStatic; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kInterface; +} + + + +static Primitive::Type GetType(uint64_t data, bool is_op_size) { + if (is_op_size) { + switch (static_cast<OpSize>(data)) { + case kSignedByte: + return Primitive::Type::kPrimByte; + case kSignedHalf: + return Primitive::Type::kPrimShort; + case k32: + return Primitive::Type::kPrimInt; + case k64: + return Primitive::Type::kPrimLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << data; + UNREACHABLE(); + } + } else { + if ((data & kIntrinsicFlagIsLong) != 0) { + return Primitive::Type::kPrimLong; + } + if ((data & kIntrinsicFlagIsObject) != 0) { + return Primitive::Type::kPrimNot; + } + return Primitive::Type::kPrimInt; + } +} + +static Intrinsics GetIntrinsic(InlineMethod method) { + switch (method.opcode) { + // Floating-point conversions. + case kIntrinsicDoubleCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble; + case kIntrinsicFloatCvt: + return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ? + Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat; + + // Bit manipulations. + case kIntrinsicReverseBits: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverse; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverse; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicReverseBytes: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimShort: + return Intrinsics::kShortReverseBytes; + case Primitive::Type::kPrimInt: + return Intrinsics::kIntegerReverseBytes; + case Primitive::Type::kPrimLong: + return Intrinsics::kLongReverseBytes; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Abs. + case kIntrinsicAbsDouble: + return Intrinsics::kMathAbsDouble; + case kIntrinsicAbsFloat: + return Intrinsics::kMathAbsFloat; + case kIntrinsicAbsInt: + return Intrinsics::kMathAbsInt; + case kIntrinsicAbsLong: + return Intrinsics::kMathAbsLong; + + // Min/max. + case kIntrinsicMinMaxDouble: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble; + case kIntrinsicMinMaxFloat: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat; + case kIntrinsicMinMaxInt: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt; + case kIntrinsicMinMaxLong: + return ((method.d.data & kIntrinsicFlagMin) == 0) ? + Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong; + + // Misc math. + case kIntrinsicSqrt: + return Intrinsics::kMathSqrt; + case kIntrinsicCeil: + return Intrinsics::kMathCeil; + case kIntrinsicFloor: + return Intrinsics::kMathFloor; + case kIntrinsicRint: + return Intrinsics::kMathRint; + case kIntrinsicRoundDouble: + return Intrinsics::kMathRoundDouble; + case kIntrinsicRoundFloat: + return Intrinsics::kMathRoundFloat; + + // System.arraycopy. + case kIntrinsicSystemArrayCopyCharArray: + return Intrinsics::kSystemArrayCopyChar; + + // Thread.currentThread. + case kIntrinsicCurrentThread: + return Intrinsics::kThreadCurrentThread; + + // Memory.peek. + case kIntrinsicPeek: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPeekByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPeekShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPeekIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPeekLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // Memory.poke. + case kIntrinsicPoke: + switch (GetType(method.d.data, true)) { + case Primitive::Type::kPrimByte: + return Intrinsics::kMemoryPokeByte; + case Primitive::Type::kPrimShort: + return Intrinsics::kMemoryPokeShortNative; + case Primitive::Type::kPrimInt: + return Intrinsics::kMemoryPokeIntNative; + case Primitive::Type::kPrimLong: + return Intrinsics::kMemoryPokeLongNative; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + + // String. + case kIntrinsicCharAt: + return Intrinsics::kStringCharAt; + case kIntrinsicCompareTo: + return Intrinsics::kStringCompareTo; + case kIntrinsicIsEmptyOrLength: + return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ? + Intrinsics::kStringLength : Intrinsics::kStringIsEmpty; + case kIntrinsicIndexOf: + return ((method.d.data & kIntrinsicFlagBase0) == 0) ? + Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; + + case kIntrinsicCas: + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimNot: + return Intrinsics::kUnsafeCASObject; + case Primitive::Type::kPrimInt: + return Intrinsics::kUnsafeCASInt; + case Primitive::Type::kPrimLong: + return Intrinsics::kUnsafeCASLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + case kIntrinsicUnsafeGet: { + const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet; + case Primitive::Type::kPrimLong: + return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + case kIntrinsicUnsafePut: { + enum Sync { kNoSync, kVolatile, kOrdered }; + const Sync sync = + ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile : + ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered : + kNoSync; + switch (GetType(method.d.data, false)) { + case Primitive::Type::kPrimInt: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePut; + case kVolatile: + return Intrinsics::kUnsafePutVolatile; + case kOrdered: + return Intrinsics::kUnsafePutOrdered; + } + break; + case Primitive::Type::kPrimLong: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutLong; + case kVolatile: + return Intrinsics::kUnsafePutLongVolatile; + case kOrdered: + return Intrinsics::kUnsafePutLongOrdered; + } + break; + case Primitive::Type::kPrimNot: + switch (sync) { + case kNoSync: + return Intrinsics::kUnsafePutObject; + case kVolatile: + return Intrinsics::kUnsafePutObjectVolatile; + case kOrdered: + return Intrinsics::kUnsafePutObjectOrdered; + } + break; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } + break; + } + + // Virtual cases. + + case kIntrinsicReferenceGetReferent: + return Intrinsics::kReferenceGetReferent; + + // Quick inliner cases. Remove after refactoring. They are here so that we can use the + // compiler to warn on missing cases. + + case kInlineOpNop: + case kInlineOpReturnArg: + case kInlineOpNonWideConst: + case kInlineOpIGet: + case kInlineOpIPut: + return Intrinsics::kNone; + + // No default case to make the compiler warn on missing cases. + } + return Intrinsics::kNone; +} + +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { + // The DexFileMethodInliner should have checked whether the methods are agreeing with + // what we expect, i.e., static methods are called as such. Add another check here for + // our expectations: + // Whenever the intrinsic is marked as static-or-direct, report an error if we find an + // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual + // functions that will perform a check inline. If the precise type is known, however, + // the instruction will be sharpened to an InvokeStaticOrDirect. + InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); + InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ? + invoke->AsInvokeStaticOrDirect()->GetInvokeType() : + invoke->IsInvokeVirtual() ? kVirtual : kSuper; + switch (intrinsic_type) { + case kStatic: + return (invoke_type == kStatic); + case kDirect: + return (invoke_type == kDirect); + case kVirtual: + // Call might be devirtualized. + return (invoke_type == kVirtual || invoke_type == kDirect); + + default: + return false; + } +} + +// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod. +void IntrinsicsRecognizer::Run() { + DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_); + DCHECK(inliner != nullptr); + + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + inst_it.Advance()) { + HInstruction* inst = inst_it.Current(); + if (inst->IsInvoke()) { + HInvoke* invoke = inst->AsInvoke(); + InlineMethod method; + if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { + Intrinsics intrinsic = GetIntrinsic(method); + + if (intrinsic != Intrinsics::kNone) { + if (!CheckInvokeType(intrinsic, invoke)) { + LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " + << intrinsic << " for " + << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_); + } else { + invoke->SetIntrinsic(intrinsic); + } + } + } + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { + switch (intrinsic) { + case Intrinsics::kNone: + os << "No intrinsic."; + break; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + os << # Name; \ + break; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef STATIC_INTRINSICS_LIST +#undef VIRTUAL_INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return os; +} + +} // namespace art + diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h new file mode 100644 index 0000000..29cc8ef --- /dev/null +++ b/compiler/optimizing/intrinsics.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CompilerDriver; +class DexFile; + +// Recognize intrinsics from HInvoke nodes. +class IntrinsicsRecognizer : public HOptimization { + public: + IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver) + : HOptimization(graph, true, "intrinsics_recognition"), + dex_file_(dex_file), driver_(driver) {} + + void Run() OVERRIDE; + + private: + const DexFile* dex_file_; + CompilerDriver* driver_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); +}; + +class IntrinsicVisitor : public ValueObject { + public: + virtual ~IntrinsicVisitor() {} + + // Dispatch logic. + + void Dispatch(HInvoke* invoke) { + switch (invoke->GetIntrinsic()) { + case Intrinsics::kNone: + return; +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + case Intrinsics::k ## Name: \ + Visit ## Name(invoke); \ + return; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Do not put a default case. That way the compiler will complain if we missed a case. + } + } + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ + } +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + protected: + IntrinsicVisitor() {} + + private: + DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_ diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h new file mode 100644 index 0000000..29ca20c --- /dev/null +++ b/compiler/optimizing/intrinsics_list.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ + +// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected +// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual). + +#define INTRINSICS_LIST(V) \ + V(DoubleDoubleToRawLongBits, kStatic) \ + V(DoubleLongBitsToDouble, kStatic) \ + V(FloatFloatToRawIntBits, kStatic) \ + V(FloatIntBitsToFloat, kStatic) \ + V(IntegerReverse, kStatic) \ + V(IntegerReverseBytes, kStatic) \ + V(LongReverse, kStatic) \ + V(LongReverseBytes, kStatic) \ + V(ShortReverseBytes, kStatic) \ + V(MathAbsDouble, kStatic) \ + V(MathAbsFloat, kStatic) \ + V(MathAbsLong, kStatic) \ + V(MathAbsInt, kStatic) \ + V(MathMinDoubleDouble, kStatic) \ + V(MathMinFloatFloat, kStatic) \ + V(MathMinLongLong, kStatic) \ + V(MathMinIntInt, kStatic) \ + V(MathMaxDoubleDouble, kStatic) \ + V(MathMaxFloatFloat, kStatic) \ + V(MathMaxLongLong, kStatic) \ + V(MathMaxIntInt, kStatic) \ + V(MathSqrt, kStatic) \ + V(MathCeil, kStatic) \ + V(MathFloor, kStatic) \ + V(MathRint, kStatic) \ + V(MathRoundDouble, kStatic) \ + V(MathRoundFloat, kStatic) \ + V(SystemArrayCopyChar, kStatic) \ + V(ThreadCurrentThread, kStatic) \ + V(MemoryPeekByte, kStatic) \ + V(MemoryPeekIntNative, kStatic) \ + V(MemoryPeekLongNative, kStatic) \ + V(MemoryPeekShortNative, kStatic) \ + V(MemoryPokeByte, kStatic) \ + V(MemoryPokeIntNative, kStatic) \ + V(MemoryPokeLongNative, kStatic) \ + V(MemoryPokeShortNative, kStatic) \ + V(StringCharAt, kDirect) \ + V(StringCompareTo, kDirect) \ + V(StringIsEmpty, kDirect) \ + V(StringIndexOf, kDirect) \ + V(StringIndexOfAfter, kDirect) \ + V(StringLength, kDirect) \ + V(UnsafeCASInt, kDirect) \ + V(UnsafeCASLong, kDirect) \ + V(UnsafeCASObject, kDirect) \ + V(UnsafeGet, kDirect) \ + V(UnsafeGetVolatile, kDirect) \ + V(UnsafeGetLong, kDirect) \ + V(UnsafeGetLongVolatile, kDirect) \ + V(UnsafePut, kDirect) \ + V(UnsafePutOrdered, kDirect) \ + V(UnsafePutVolatile, kDirect) \ + V(UnsafePutObject, kDirect) \ + V(UnsafePutObjectOrdered, kDirect) \ + V(UnsafePutObjectVolatile, kDirect) \ + V(UnsafePutLong, kDirect) \ + V(UnsafePutLongOrdered, kDirect) \ + V(UnsafePutLongVolatile, kDirect) \ + \ + V(ReferenceGetReferent, kVirtual) + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ +#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc new file mode 100644 index 0000000..c1f4c94 --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -0,0 +1,984 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_x86_64.h" + +#include "code_generator_x86_64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/x86_64/assembler_x86_64.h" +#include "utils/x86_64/constants_x86_64.h" + +namespace art { + +namespace x86_64 { + +static constexpr bool kIntrinsified = true; + +X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { + return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + const LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> + +// TODO: trg as memory. +static void MoveFromReturnRegister(Location trg, + Primitive::Type type, + CodeGeneratorX86_64* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movl(trg_reg, CpuRegister(RAX)); + } + break; + } + case Primitive::kPrimLong: { + CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); + if (trg_reg.AsRegister() != RAX) { + __ movq(trg_reg, CpuRegister(RAX)); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected void type for valid location " << trg; + UNREACHABLE(); + + case Primitive::kPrimDouble: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movsd(trg_reg, XmmRegister(XMM0)); + } + break; + } + case Primitive::kPrimFloat: { + XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); + if (trg_reg.AsFloatRegister() != XMM0) { + __ movss(trg_reg, XmmRegister(XMM0)); + } + break; + } + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr)); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ jmp(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64); +}; + +#undef __ +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); +} + +void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type size, + X86_64Assembler* assembler) { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + switch (size) { + case Primitive::kPrimShort: + // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. + __ bswapl(out); + __ sarl(out, Immediate(16)); + break; + case Primitive::kPrimInt: + __ bswapl(out); + break; + case Primitive::kPrimLong: + __ bswapq(out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + + +// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we +// need is 64b. + +static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + // TODO: Enable memory operations when the assembler supports them. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + // TODO: Allow x86 to work with memory. This requires assembler support, see below. + // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. + locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (output.IsFpuRegister()) { + // In-register + XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + + if (is64bit) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); + __ movd(xmm_temp, cpu_temp); + __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); + } + } else { + // TODO: update when assember support is available. + UNIMPLEMENTED(FATAL) << "Needs assembler support."; +// Once assembler support is available, in-memory operations look like this: +// if (is64bit) { +// DCHECK(output.IsDoubleStackSlot()); +// // No 64b and with literal. +// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); +// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); +// } else { +// DCHECK(output.IsStackSlot()); +// // Can use and with a literal directly. +// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); +// } + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFloatToFloatPlusTemps(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { + Location output = locations->Out(); + CpuRegister out = output.AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (is64bit) { + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + } else { + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + Label nan, done, op2_label; + if (is_double) { + __ ucomisd(out, op2); + } else { + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (is_double) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (is_double) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. + if (is_double) { + __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + } else { + __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + } + __ movd(out, cpu_temp, is_double); + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (is_double) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); // Immediate constant. +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPPlusTempLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, + X86_64Assembler* assembler) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (is_long) { + __ cmpq(out, op2); + } else { + __ cmpl(out, op2); + } + + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + + GetAssembler()->sqrtsd(out, in); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + // Starting offset within data array + const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); + // Start of char data with array_ + const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); + + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location temp_loc = locations->GetTemp(0); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + + // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we + // move to (coalesced) implicit checks, we have to do a null check below. + DCHECK(!kCoalescedImplicitNullCheck); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + X86_64Assembler* assembler = GetAssembler(); + + __ cmpl(idx, Address(obj, count_offset)); + __ j(kAboveEqual, slow_path->GetEntryLabel()); + + // Get the actual element. + __ movl(temp, idx); // temp := idx. + __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. + __ movl(out, Address(obj, value_offset)); // obj := obj.array. + // out = out[2*temp]. + __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + + __ Bind(slow_path->GetExitLabel()); +} + +static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movsxb(out, Address(address, 0)); + break; + case Primitive::kPrimShort: + __ movsxw(out, Address(address, 0)); + break; + case Primitive::kPrimInt: + __ movl(out, Address(address, 0)); + break; + case Primitive::kPrimLong: + __ movq(out, Address(address, 0)); + break; + default: + LOG(FATAL) << "Type not recognized for peek: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { + GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { + CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); + // x86 allows unaligned access. We do not have to check the input or use specific instructions + // to avoid a SIGBUS. + switch (size) { + case Primitive::kPrimByte: + __ movb(Address(address, 0), value); + break; + case Primitive::kPrimShort: + __ movw(Address(address, 0), value); + break; + case Primitive::kPrimInt: + __ movl(Address(address, 0), value); + break; + case Primitive::kPrimLong: + __ movq(Address(address, 0), value); + break; + default: + LOG(FATAL) << "Type not recognized for poke: " << size; + UNREACHABLE(); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { + GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { + CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); + GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); +} + +static void GenUnsafeGet(LocationSummary* locations, bool is_long, + bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); + + if (is_long) { + __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } else { + // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object + // pointer will entail an unpack operation. + __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler()); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler()); +} + +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, + Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); +} + +// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 +// memory model. +static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, + CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } else { + __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); + } + + if (is_volatile) { + __ mfence(); + } + + if (type == Primitive::kPrimNot) { + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(MathFloor) +UNIMPLEMENTED_INTRINSIC(MathCeil) +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h new file mode 100644 index 0000000..c1fa99c --- /dev/null +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace x86_64 { + +class CodeGeneratorX86_64; +class X86_64Assembler; + +class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); +}; + +class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + X86_64Assembler* GetAssembler(); + + ArenaAllocator* GetArena(); + + CodeGeneratorX86_64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64); +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ed5e260..9f2f9ec 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -20,7 +20,9 @@ namespace art { -LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) +LocationSummary::LocationSummary(HInstruction* instruction, + CallKind call_kind, + bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), environment_(instruction->GetBlock()->GetGraph()->GetArena(), @@ -29,7 +31,8 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), - live_registers_() { + live_registers_(), + intrinsified_(intrinsified) { inputs_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 7df99d4..d41b3ae 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -463,7 +463,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { kCall }; - LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall); + LocationSummary(HInstruction* instruction, + CallKind call_kind = kNoCall, + bool intrinsified = false); void SetInAt(uint32_t at, Location location) { DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid()); @@ -574,6 +576,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return output_overlaps_; } + bool Intrinsified() const { + return intrinsified_; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; @@ -593,6 +599,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { // Registers that are in use at this position. RegisterSet live_registers_; + // Whether these are locations for an intrinsified call. + const bool intrinsified_; + ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint); ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint); DISALLOW_COPY_AND_ASSIGN(LocationSummary); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b98bc70..4838e1a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1580,19 +1580,18 @@ class HLongConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HLongConstant); }; +enum class Intrinsics { +#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name, +#include "intrinsics_list.h" + kNone, + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS +}; +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); + class HInvoke : public HInstruction { public: - HInvoke(ArenaAllocator* arena, - uint32_t number_of_arguments, - Primitive::Type return_type, - uint32_t dex_pc) - : HInstruction(SideEffects::All()), - inputs_(arena, number_of_arguments), - return_type_(return_type), - dex_pc_(dex_pc) { - inputs_.SetSize(number_of_arguments); - } - virtual size_t InputCount() const { return inputs_.Size(); } virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); } @@ -1612,12 +1611,38 @@ class HInvoke : public HInstruction { uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexMethodIndex() const { return dex_method_index_; } + + Intrinsics GetIntrinsic() { + return intrinsic_; + } + + void SetIntrinsic(Intrinsics intrinsic) { + intrinsic_ = intrinsic; + } + DECLARE_INSTRUCTION(Invoke); protected: + HInvoke(ArenaAllocator* arena, + uint32_t number_of_arguments, + Primitive::Type return_type, + uint32_t dex_pc, + uint32_t dex_method_index) + : HInstruction(SideEffects::All()), + inputs_(arena, number_of_arguments), + return_type_(return_type), + dex_pc_(dex_pc), + dex_method_index_(dex_method_index), + intrinsic_(Intrinsics::kNone) { + inputs_.SetSize(number_of_arguments); + } + GrowableArray<HInstruction*> inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; + const uint32_t dex_method_index_; + Intrinsics intrinsic_; private: DISALLOW_COPY_AND_ASSIGN(HInvoke); @@ -1629,19 +1654,16 @@ class HInvokeStaticOrDirect : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, - uint32_t index_in_dex_cache, + uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - index_in_dex_cache_(index_in_dex_cache), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), invoke_type_(invoke_type) {} - uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; } InvokeType GetInvokeType() const { return invoke_type_; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); private: - const uint32_t index_in_dex_cache_; const InvokeType invoke_type_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); @@ -1653,8 +1675,9 @@ class HInvokeVirtual : public HInvoke { uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, + uint32_t dex_method_index, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} uint32_t GetVTableIndex() const { return vtable_index_; } @@ -1675,8 +1698,7 @@ class HInvokeInterface : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc), - dex_method_index_(dex_method_index), + : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} uint32_t GetImtIndex() const { return imt_index_; } @@ -1685,7 +1707,6 @@ class HInvokeInterface : public HInvoke { DECLARE_INSTRUCTION(InvokeInterface); private: - const uint32_t dex_method_index_; const uint32_t imt_index_; DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index eaecbb0..6056373 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -25,6 +25,7 @@ #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" @@ -32,6 +33,7 @@ #include "gvn.h" #include "inliner.h" #include "instruction_simplifier.h" +#include "intrinsics.h" #include "jni/quick/jni_compiler.h" #include "mirror/art_method-inl.h" #include "nodes.h" @@ -215,9 +217,12 @@ static void RunOptimizations(HGraph* graph, BoundsCheckElimination bce(graph); InstructionSimplifier simplify2(graph); + IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); + HOptimization* optimizations[] = { &redundant_phi, &dead_phi, + &intrinsics, &dce, &fold, &simplify1, diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index d2f4f9b..1d155f9 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1217,10 +1217,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { Location expected_location = locations->InAt(use->GetInputIndex()); - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(use->GetUser(), source, expected_location); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } } use = use->GetNext(); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index d843a72..c7414a1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -184,6 +184,20 @@ void X86_64Assembler::movl(const Address& dst, const Immediate& imm) { EmitImmediate(imm); } + +void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) { + cmov(c, dst, src, true); +} + +void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x40 + c); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + + void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalByteRegNormalizingRex32(dst, src); @@ -369,19 +383,26 @@ void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) { void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { + movd(dst, src, true); +} + +void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { + movd(dst, src, true); +} + +void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitRex64(dst, src); + EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); EmitUint8(0x0F); EmitUint8(0x6E); EmitOperand(dst.LowBits(), Operand(src)); } - -void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { +void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitRex64(src, dst); + EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex()); EmitUint8(0x0F); EmitUint8(0x7E); EmitOperand(src.LowBits(), Operand(dst)); @@ -826,6 +847,39 @@ void X86_64Assembler::andpd(XmmRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x54); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x54); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x56); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x56); + EmitXmmRegisterOperand(dst.LowBits(), src); +} void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1757,6 +1811,20 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) { EmitUint8(0xC0 + dst.LowBits()); } +void X86_64Assembler::bswapl(CpuRegister dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, false, false, false, dst.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0xC8 + dst.LowBits()); +} + +void X86_64Assembler::bswapq(CpuRegister dst) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex(false, true, false, false, dst.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0xC8 + dst.LowBits()); +} + void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { // TODO: Need to have a code constants table. diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index ac8bc9a..5c8d608 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -276,6 +276,9 @@ class X86_64Assembler FINAL : public Assembler { void movl(const Address& dst, CpuRegister src); void movl(const Address& dst, const Immediate& imm); + void cmov(Condition c, CpuRegister dst, CpuRegister src); // This is the 64b version. + void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit); + void movzxb(CpuRegister dst, CpuRegister src); void movzxb(CpuRegister dst, const Address& src); void movsxb(CpuRegister dst, CpuRegister src); @@ -303,8 +306,10 @@ class X86_64Assembler FINAL : public Assembler { void movsxd(CpuRegister dst, CpuRegister src); void movsxd(CpuRegister dst, const Address& src); - void movd(XmmRegister dst, CpuRegister src); - void movd(CpuRegister dst, XmmRegister src); + void movd(XmmRegister dst, CpuRegister src); // Note: this is the r64 version, formally movq. + void movd(CpuRegister dst, XmmRegister src); // Note: this is the r64 version, formally movq. + void movd(XmmRegister dst, CpuRegister src, bool is64bit); + void movd(CpuRegister dst, XmmRegister src, bool is64bit); void addss(XmmRegister dst, XmmRegister src); void addss(XmmRegister dst, const Address& src); @@ -360,6 +365,11 @@ class X86_64Assembler FINAL : public Assembler { void xorps(XmmRegister dst, XmmRegister src); void andpd(XmmRegister dst, const Address& src); + void andpd(XmmRegister dst, XmmRegister src); + void andps(XmmRegister dst, XmmRegister src); + + void orpd(XmmRegister dst, XmmRegister src); + void orps(XmmRegister dst, XmmRegister src); void flds(const Address& src); void fstps(const Address& dst); @@ -504,6 +514,9 @@ class X86_64Assembler FINAL : public Assembler { void setcc(Condition condition, CpuRegister dst); + void bswapl(CpuRegister dst); + void bswapq(CpuRegister dst); + // // Macros for High-level operations. // diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index e93f45c..6df4144 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -689,6 +689,22 @@ TEST_F(AssemblerX86_64Test, Xorpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } +TEST_F(AssemblerX86_64Test, Andps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andps, "andps %{reg2}, %{reg1}"), "andps"); +} + +TEST_F(AssemblerX86_64Test, Andpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd"); +} + +TEST_F(AssemblerX86_64Test, Orps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps"); +} + +TEST_F(AssemblerX86_64Test, Orpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd"); +} + // X87 std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED, @@ -757,6 +773,14 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) { // MISC // ////////// +TEST_F(AssemblerX86_64Test, Bswapl) { + DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl"); +} + +TEST_F(AssemblerX86_64Test, Bswapq) { + DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq"); +} + std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) { // From Condition diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h index 72b696b..3463025 100644 --- a/runtime/quick/inline_method_analyser.h +++ b/runtime/quick/inline_method_analyser.h @@ -103,6 +103,9 @@ enum IntrinsicFlags { kIntrinsicFlagIsObject = 4, // kIntrinsicUnsafePut kIntrinsicFlagIsOrdered = 8, + + // kIntrinsicDoubleCvt, kIntrinsicFloatCvt. + kIntrinsicFlagToFloatingPoint = kIntrinsicFlagMin, }; struct InlineIGetIPutData { diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java index 56972ff..862fe06 100644 --- a/test/082-inline-execute/src/Main.java +++ b/test/082-inline-execute/src/Main.java @@ -119,6 +119,9 @@ public class Main { } } + // Break up the charAt tests. The optimizing compiler doesn't optimize methods with try-catch yet, + // so we need to separate out the tests that are expected to throw exception + public static void test_String_charAt() { String testStr = "Now is the time"; @@ -127,6 +130,12 @@ public class Main { Assert.assertEquals(' ', testStr.charAt(10)); Assert.assertEquals('e', testStr.charAt(testStr.length()-1)); + test_String_charAtExc(); + test_String_charAtExc2(); + } + + private static void test_String_charAtExc() { + String testStr = "Now is the time"; try { testStr.charAt(-1); Assert.fail(); @@ -146,6 +155,19 @@ public class Main { } } + private static void test_String_charAtExc2() { + try { + test_String_charAtExc3(); + Assert.fail(); + } catch (StringIndexOutOfBoundsException expected) { + } + } + + private static void test_String_charAtExc3() { + String testStr = "Now is the time"; + Assert.assertEquals('N', testStr.charAt(-1)); + } + static int start; private static int[] negIndex = { -100000 }; public static void test_String_indexOf() { |