diff options
93 files changed, 5178 insertions, 2461 deletions
@@ -67,8 +67,13 @@ ifdef TARGET_2ND_ARCH rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex endif +ifneq ($(TMPDIR),) + rm -rf $(TMPDIR)/$(USER)/test-*/dalvik-cache/* + rm -rf $(TMPDIR)/android-data/dalvik-cache/* +else rm -rf /tmp/$(USER)/test-*/dalvik-cache/* rm -rf /tmp/android-data/dalvik-cache/* +endif .PHONY: clean-oat-target clean-oat-target: @@ -309,14 +314,15 @@ else .PHONY: oat-target-$(1) oat-target-$(1): $$(OUT_OAT_FILE) -$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD) +$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD_DEPENDENCY) @mkdir -p $$(dir $$@) $(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \ --boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \ --dex-location=/$(1) --oat-file=$$@ \ --instruction-set=$(DEX2OAT_TARGET_ARCH) \ --instruction-set-features=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \ - --android-root=$(PRODUCT_OUT)/system --include-patch-information + --android-root=$(PRODUCT_OUT)/system --include-patch-information \ + --runtime-arg -Xnorelocate endif diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 6e27190..17c478c 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -91,6 +91,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \ runtime/entrypoints_order_test.cc \ runtime/exception_test.cc \ + runtime/gc/accounting/card_table_test.cc \ runtime/gc/accounting/space_bitmap_test.cc \ runtime/gc/heap_test.cc \ runtime/gc/space/dlmalloc_space_base_test.cc \ @@ -113,6 +114,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/monitor_pool_test.cc \ runtime/monitor_test.cc \ runtime/parsed_options_test.cc \ + runtime/proxy_test.cc \ runtime/reference_table_test.cc \ runtime/thread_pool_test.cc \ runtime/transaction_test.cc \ @@ -123,7 +125,6 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ COMPILER_GTEST_COMMON_SRC_FILES := \ runtime/jni_internal_test.cc \ - runtime/proxy_test.cc \ runtime/reflection_test.cc \ compiler/dex/global_value_numbering_test.cc \ compiler/dex/local_value_numbering_test.cc \ diff --git a/build/Android.oat.mk b/build/Android.oat.mk index cd6b13a..10936a4 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -26,7 +26,7 @@ include art/build/Android.common_path.mk # Use dex2oat debug version for better error reporting # $(1): 2ND_ or undefined, 2ND_ for 32-bit host builds. define create-core-oat-host-rules -$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD) +$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY) @echo "host dex2oat: $$@ ($$?)" @mkdir -p $$(dir $$@) $$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \ @@ -49,7 +49,7 @@ $(eval $(call create-core-oat-host-rules,2ND_)) endif define create-core-oat-target-rules -$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD) +$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY) @echo "target dex2oat: $$@ ($$?)" @mkdir -p $$(dir $$@) $$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \ diff --git a/compiler/compilers.cc b/compiler/compilers.cc index 250924a..5cf846f 100644 --- a/compiler/compilers.cc +++ b/compiler/compilers.cc @@ -38,9 +38,6 @@ extern "C" art::CompiledMethod* ArtQuickJniCompileMethod(art::CompilerDriver* dr uint32_t access_flags, uint32_t method_idx, const art::DexFile& dex_file); -// Hack for CFI CIE initialization -extern std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64); - void QuickCompiler::Init() const { ArtInitQuickCompilerContext(GetCompilerDriver()); } @@ -126,17 +123,6 @@ Backend* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_ return mir_to_lir; } -std::vector<uint8_t>* QuickCompiler::GetCallFrameInformationInitialization( - const CompilerDriver& driver) const { - if (driver.GetInstructionSet() == kX86) { - return X86CFIInitialization(false); - } - if (driver.GetInstructionSet() == kX86_64) { - return X86CFIInitialization(true); - } - return nullptr; -} - CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, diff --git a/compiler/compilers.h b/compiler/compilers.h index 2c231e1..151bf6f 100644 --- a/compiler/compilers.h +++ b/compiler/compilers.h @@ -56,17 +56,6 @@ class QuickCompiler : public Compiler { void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE {} - /* - * @brief Generate and return Dwarf CFI initialization, if supported by the - * backend. - * @param driver CompilerDriver for this compile. - * @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF - * information. - * @note This is used for backtrace information in generated code. - */ - std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver) const - OVERRIDE; - private: DISALLOW_COPY_AND_ASSIGN(QuickCompiler); }; diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 8d0a5a3..6aee563 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -69,6 +69,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) : reg_location_(NULL), + block_id_map_(std::less<unsigned int>(), arena->Adapter()), cu_(cu), ssa_base_vregs_(NULL), ssa_subscripts_(NULL), @@ -101,11 +102,14 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) num_blocks_(0), current_code_item_(NULL), dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc), + m_units_(arena->Adapter()), + method_stack_(arena->Adapter()), current_method_(kInvalidEntry), current_offset_(kInvalidEntry), def_count_(0), opcode_count_(NULL), num_ssa_regs_(0), + extended_basic_blocks_(arena->Adapter()), method_sreg_(0), attributes_(METHOD_IS_LEAF), // Start with leaf assumption, change on encountering invoke. checkstats_(NULL), diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 768ae21..491d72e 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -27,6 +27,7 @@ #include "mir_method_info.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" +#include "utils/arena_containers.h" #include "utils/scoped_arena_containers.h" #include "reg_location.h" #include "reg_storage.h" @@ -1051,8 +1052,8 @@ class MIRGraph { std::set<uint32_t> catches_; // TODO: make these private. - RegLocation* reg_location_; // Map SSA names to location. - SafeMap<unsigned int, unsigned int> block_id_map_; // Block collapse lookup cache. + RegLocation* reg_location_; // Map SSA names to location. + ArenaSafeMap<unsigned int, unsigned int> block_id_map_; // Block collapse lookup cache. static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst]; static const uint32_t analysis_attributes_[kMirOpLast]; @@ -1171,15 +1172,15 @@ class MIRGraph { unsigned int num_blocks_; const DexFile::CodeItem* current_code_item_; GrowableArray<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. - std::vector<DexCompilationUnit*> m_units_; // List of methods included in this graph + ArenaVector<DexCompilationUnit*> m_units_; // List of methods included in this graph typedef std::pair<int, int> MIRLocation; // Insert point, (m_unit_ index, offset) - std::vector<MIRLocation> method_stack_; // Include stack + ArenaVector<MIRLocation> method_stack_; // Include stack int current_method_; DexOffset current_offset_; // Offset in code units int def_count_; // Used to estimate size of ssa name storage. int* opcode_count_; // Dex opcode coverage stats. int num_ssa_regs_; // Number of names following SSA transformation. - std::vector<BasicBlockId> extended_basic_blocks_; // Heads of block "traces". + ArenaVector<BasicBlockId> extended_basic_blocks_; // Heads of block "traces". int method_sreg_; unsigned int attributes_; Checkstats* checkstats_; diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 5059c5f..b133991 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -43,8 +43,7 @@ namespace art { * add rARM_PC, r_disp ; This is the branch from which we compute displacement * cbnz r_idx, lp */ -void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { +void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); @@ -92,8 +91,7 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, } -void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { +void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index e0b8ec6..072acbe 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -84,6 +84,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; // Required for target - Dalvik-level generators. + void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, @@ -92,12 +94,6 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); - void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -112,16 +108,6 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); - void GenNotLong(RegLocation rl_dest, RegLocation rl_src); - void GenNegLong(RegLocation rl_dest, RegLocation rl_src); - void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div); RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -144,8 +130,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); // Required for target - single operation generators. LIR* OpUnconditionalBranch(LIR* target); @@ -201,6 +187,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { size_t GetInstructionOffset(LIR* lir); private: + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); LIR* LoadFPConstantValue(int r_dest, int value); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index dd14ed9..6711ab3 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -1039,15 +1039,6 @@ bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { #endif } -void ArmMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { - LOG(FATAL) << "Unexpected use GenNotLong()"; -} - -void ArmMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div) { - LOG(FATAL) << "Unexpected use GenDivRemLong()"; -} - void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); @@ -1173,29 +1164,23 @@ void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, StoreValueWide(rl_dest, rl_result); } -void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; -} - -void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; -} - -void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; -} +void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + switch (opcode) { + case Instruction::MUL_LONG: + case Instruction::MUL_LONG_2ADDR: + GenMulLong(opcode, rl_dest, rl_src1, rl_src2); + return; + case Instruction::NEG_LONG: + GenNegLong(rl_dest, rl_src2); + return; -void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; -} + default: + break; + } -void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of genXoLong for Arm"; + // Fallback for all other ops. + Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); } /* diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 90cb156..a449cbd 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -267,6 +267,8 @@ enum ArmOpcode { kA64Fcvtzs2xf, // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0]. kA64Fcvt2Ss, // fcvt [0001111000100010110000] rn[9-5] rd[4-0]. kA64Fcvt2sS, // fcvt [0001111001100010010000] rn[9-5] rd[4-0]. + kA64Fcvtms2ws, // fcvtms [0001111000110000000000] rn[9-5] rd[4-0]. + kA64Fcvtms2xS, // fcvtms [1001111001110000000000] rn[9-5] rd[4-0]. kA64Fdiv3fff, // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0]. kA64Fmax3fff, // fmax[000111100s1] rm[20-16] [010010] rn[9-5] rd[4-0]. kA64Fmin3fff, // fmin[000111100s1] rm[20-16] [010110] rn[9-5] rd[4-0]. @@ -278,6 +280,9 @@ enum ArmOpcode { kA64Fmov2xS, // fmov[1001111001101111000000] rn[9-5] rd[4-0]. kA64Fmul3fff, // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0]. kA64Fneg2ff, // fneg[000111100s100001010000] rn[9-5] rd[4-0]. + kA64Frintp2ff, // frintp [000111100s100100110000] rn[9-5] rd[4-0]. + kA64Frintm2ff, // frintm [000111100s100101010000] rn[9-5] rd[4-0]. + kA64Frintn2ff, // frintn [000111100s100100010000] rn[9-5] rd[4-0]. kA64Frintz2ff, // frintz [000111100s100101110000] rn[9-5] rd[4-0]. kA64Fsqrt2ff, // fsqrt[000111100s100001110000] rn[9-5] rd[4-0]. kA64Fsub3fff, // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index c46be53..15c89f2 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -260,6 +260,14 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fcvt", "!0s, !1S", kFixupNone), + ENCODING_MAP(kA64Fcvtms2ws, NO_VARIANTS(0x1e300000), + kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fcvtms", "!0w, !1s", kFixupNone), + ENCODING_MAP(kA64Fcvtms2xS, NO_VARIANTS(0x9e700000), + kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fcvtms", "!0x, !1S", kFixupNone), ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800), kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, @@ -304,6 +312,18 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fneg", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Frintp2ff), FLOAT_VARIANTS(0x1e24c000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "frintp", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Frintm2ff), FLOAT_VARIANTS(0x1e254000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "frintm", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Frintn2ff), FLOAT_VARIANTS(0x1e244000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "frintn", "!0f, !1f", kFixupNone), ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000), kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, @@ -521,7 +541,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { ENCODING_MAP(WIDE(kA64StpPre4ffXD), CUSTOM_VARIANTS(0x2d800000, 0x6d800000), kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE, - "stp", "!0r, !1f, [!2X, #!3D]!!", kFixupNone), + "stp", "!0f, !1f, [!2X, #!3D]!!", kFixupNone), ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000), kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE, diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 6fa8a4a..7c5c4fa 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -43,8 +43,7 @@ namespace art { * br r_base * quit: */ -void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { +void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); @@ -96,8 +95,7 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, } -void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { +void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 18f2a29..2cd24c6 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -59,332 +59,340 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { bool initialized_; }; - public: - Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); - - // Required for target - codegen helpers. - bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, - RegLocation rl_dest, int lit) OVERRIDE; - bool SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, - RegLocation rl_dest, int64_t lit); - bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, - RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; - bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div, - RegLocation rl_src, RegLocation rl_dest, int64_t lit); - bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; - LIR* CheckSuspendUsingLoad() OVERRIDE; - RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE; - LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size, VolatileKind is_volatile) OVERRIDE; - LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, - VolatileKind is_volatile) - OVERRIDE; - LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, - OpSize size) OVERRIDE; - LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale) - OVERRIDE; - LIR* LoadConstantNoClobber(RegStorage r_dest, int value); - LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size, VolatileKind is_volatile) OVERRIDE; - LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, - VolatileKind is_volatile) OVERRIDE; - LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, - OpSize size) OVERRIDE; - LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) - OVERRIDE; - void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE; - LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, - int offset, int check_value, LIR* target, LIR** compare) OVERRIDE; - - // Required for target - register utilities. - RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE; - RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE { - if (wide_kind == kWide || wide_kind == kRef) { - return As64BitReg(TargetReg(symbolic_reg)); - } else { - return Check32BitReg(TargetReg(symbolic_reg)); - } - } - RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE { + public: + Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + // Required for target - codegen helpers. + bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, + RegLocation rl_dest, int lit) OVERRIDE; + bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, + RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; + bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div, + RegLocation rl_src, RegLocation rl_dest, int64_t lit); + bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; + LIR* CheckSuspendUsingLoad() OVERRIDE; + RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE; + LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size, VolatileKind is_volatile) OVERRIDE; + LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, + VolatileKind is_volatile) OVERRIDE; + LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, + OpSize size) OVERRIDE; + LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale) + OVERRIDE; + LIR* LoadConstantNoClobber(RegStorage r_dest, int value) OVERRIDE; + LIR* LoadConstantWide(RegStorage r_dest, int64_t value) OVERRIDE; + LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size, + VolatileKind is_volatile) OVERRIDE; + LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, VolatileKind is_volatile) + OVERRIDE; + LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, + OpSize size) OVERRIDE; + LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) OVERRIDE; + void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE; + LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, + int offset, int check_value, LIR* target, LIR** compare) OVERRIDE; + + // Required for target - register utilities. + RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE; + RegStorage TargetReg(SpecialTargetRegister symbolic_reg, WideKind wide_kind) OVERRIDE { + if (wide_kind == kWide || wide_kind == kRef) { return As64BitReg(TargetReg(symbolic_reg)); + } else { + return Check32BitReg(TargetReg(symbolic_reg)); } - RegStorage GetArgMappingToPhysicalReg(int arg_num); - RegLocation GetReturnAlt(); - RegLocation GetReturnWideAlt(); - RegLocation LocCReturn(); - RegLocation LocCReturnRef(); - RegLocation LocCReturnDouble(); - RegLocation LocCReturnFloat(); - RegLocation LocCReturnWide(); - ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; - void AdjustSpillMask(); - void ClobberCallerSave(); - void FreeCallTemps(); - void LockCallTemps(); - void CompilerInitializeRegAlloc(); - - // Required for target - miscellaneous. - void AssembleLIR(); - uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset); - int AssignInsnOffsets(); - void AssignOffsets(); - uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); - void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; - void SetupTargetResourceMasks(LIR* lir, uint64_t flags, - ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; - const char* GetTargetInstFmt(int opcode); - const char* GetTargetInstName(int opcode); - std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - ResourceMask GetPCUseDefEncoding() const OVERRIDE; - uint64_t GetTargetInstFlags(int opcode); - size_t GetInsnSize(LIR* lir) OVERRIDE; - bool IsUnconditionalBranch(LIR* lir); - - // Get the register class for load/store of a field. - RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; - - // Required for target - Dalvik-level generators. - void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation lr_shift); - void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2); - void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale); - void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_src, int scale, bool card_mark); - void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_shift); - void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); - bool GenInlinedReverseBits(CallInfo* info, OpSize size); - bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE; - bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE; - bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); - bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long); - bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double); - bool GenInlinedSqrt(CallInfo* info); - bool GenInlinedPeek(CallInfo* info, OpSize size); - bool GenInlinedPoke(CallInfo* info, OpSize size); - bool GenInlinedAbsLong(CallInfo* info); - void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); - void GenNotLong(RegLocation rl_dest, RegLocation rl_src); - void GenNegLong(RegLocation rl_dest, RegLocation rl_src); - void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div); - RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); - RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); - void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenDivZeroCheckWide(RegStorage reg); - void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); - void GenExitSequence(); - void GenSpecialExitSequence(); - void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); - void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); - void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); - void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE; - void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, - int32_t true_val, int32_t false_val, RegStorage rs_dest, - int dest_reg_class) OVERRIDE; - // Helper used in the above two. - void GenSelect(int32_t left, int32_t right, ConditionCode code, RegStorage rs_dest, - int result_reg_class); - - bool GenMemBarrier(MemBarrierKind barrier_kind); - void GenMonitorEnter(int opt_flags, RegLocation rl_src); - void GenMonitorExit(int opt_flags, RegLocation rl_src); - void GenMoveException(RegLocation rl_dest); - void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, - int first_bit, int second_bit); - void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); - void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); - - // Spill core and FP registers. Returns the SP difference: either spill size, or whole - // frame size. - int SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size); - - // Unspill core and FP registers. - void UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size); - - // Required for target - single operation generators. - LIR* OpUnconditionalBranch(LIR* target); - LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target); - LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target); - LIR* OpCondBranch(ConditionCode cc, LIR* target); - LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target); - LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src); - LIR* OpIT(ConditionCode cond, const char* guide); - void OpEndIT(LIR* it); - LIR* OpMem(OpKind op, RegStorage r_base, int disp); - LIR* OpPcRelLoad(RegStorage reg, LIR* target); - LIR* OpReg(OpKind op, RegStorage r_dest_src); - void OpRegCopy(RegStorage r_dest, RegStorage r_src); - LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); - LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value); - LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); - LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); - LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); - LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); - LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); - LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value); - LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); - LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); - LIR* OpTestSuspend(LIR* target); - LIR* OpVldm(RegStorage r_base, int count); - LIR* OpVstm(RegStorage r_base, int count); - void OpRegCopyWide(RegStorage dest, RegStorage src); - - LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size); - LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); - LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, - int shift); - LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, - A64RegExtEncodings ext, uint8_t amount); - LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); - LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, - A64RegExtEncodings ext, uint8_t amount); - static const ArmEncodingMap EncodingMap[kA64Last]; - int EncodeShift(int code, int amount); - int EncodeExtend(int extend_type, int amount); - bool IsExtendEncoding(int encoded_value); - int EncodeLogicalImmediate(bool is_wide, uint64_t value); - uint64_t DecodeLogicalImmediate(bool is_wide, int value); - - ArmConditionCode ArmConditionEncoding(ConditionCode code); - bool InexpensiveConstantInt(int32_t value); - bool InexpensiveConstantFloat(int32_t value); - bool InexpensiveConstantLong(int64_t value); - bool InexpensiveConstantDouble(int64_t value); - - void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); - - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this); - - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + } + RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE { + return As64BitReg(TargetReg(symbolic_reg)); + } + RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE; + RegLocation GetReturnAlt() OVERRIDE; + RegLocation GetReturnWideAlt() OVERRIDE; + RegLocation LocCReturn() OVERRIDE; + RegLocation LocCReturnRef() OVERRIDE; + RegLocation LocCReturnDouble() OVERRIDE; + RegLocation LocCReturnFloat() OVERRIDE; + RegLocation LocCReturnWide() OVERRIDE; + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; + void AdjustSpillMask() OVERRIDE; + void ClobberCallerSave() OVERRIDE; + void FreeCallTemps() OVERRIDE; + void LockCallTemps() OVERRIDE; + void CompilerInitializeRegAlloc() OVERRIDE; + + // Required for target - miscellaneous. + void AssembleLIR() OVERRIDE; + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; + const char* GetTargetInstFmt(int opcode) OVERRIDE; + const char* GetTargetInstName(int opcode) OVERRIDE; + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) OVERRIDE; + ResourceMask GetPCUseDefEncoding() const OVERRIDE; + uint64_t GetTargetInstFlags(int opcode) OVERRIDE; + size_t GetInsnSize(LIR* lir) OVERRIDE; + bool IsUnconditionalBranch(LIR* lir) OVERRIDE; + + // Get the register class for load/store of a field. + RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; + + // Required for target - Dalvik-level generators. + void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation lr_shift) OVERRIDE; + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_dest, int scale) OVERRIDE; + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale, bool card_mark) OVERRIDE; + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_shift) OVERRIDE; + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) OVERRIDE; + bool GenInlinedReverseBits(CallInfo* info, OpSize size) OVERRIDE; + bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE; + bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE; + bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) OVERRIDE; + bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) OVERRIDE; + bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) OVERRIDE; + bool GenInlinedSqrt(CallInfo* info) OVERRIDE; + bool GenInlinedCeil(CallInfo* info) OVERRIDE; + bool GenInlinedFloor(CallInfo* info) OVERRIDE; + bool GenInlinedRint(CallInfo* info) OVERRIDE; + bool GenInlinedRound(CallInfo* info, bool is_double) OVERRIDE; + bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE; + bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE; + bool GenInlinedAbsLong(CallInfo* info) OVERRIDE; + void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; + void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; + RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div) + OVERRIDE; + RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) + OVERRIDE; + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE; + void GenDivZeroCheckWide(RegStorage reg) OVERRIDE; + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; + void GenExitSequence() OVERRIDE; + void GenSpecialExitSequence() OVERRIDE; + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src) OVERRIDE; + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE; + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE; + void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE; + void GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, + int32_t true_val, int32_t false_val, RegStorage rs_dest, + int dest_reg_class) OVERRIDE; + + bool GenMemBarrier(MemBarrierKind barrier_kind) OVERRIDE; + void GenMonitorEnter(int opt_flags, RegLocation rl_src) OVERRIDE; + void GenMonitorExit(int opt_flags, RegLocation rl_src) OVERRIDE; + void GenMoveException(RegLocation rl_dest) OVERRIDE; + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, + int first_bit, int second_bit) OVERRIDE; + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; + void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; + void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; + + // Required for target - single operation generators. + LIR* OpUnconditionalBranch(LIR* target) OVERRIDE; + LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) OVERRIDE; + LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) OVERRIDE; + LIR* OpCondBranch(ConditionCode cc, LIR* target) OVERRIDE; + LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) OVERRIDE; + LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE; + LIR* OpIT(ConditionCode cond, const char* guide) OVERRIDE; + void OpEndIT(LIR* it) OVERRIDE; + LIR* OpMem(OpKind op, RegStorage r_base, int disp) OVERRIDE; + LIR* OpPcRelLoad(RegStorage reg, LIR* target) OVERRIDE; + LIR* OpReg(OpKind op, RegStorage r_dest_src) OVERRIDE; + void OpRegCopy(RegStorage r_dest, RegStorage r_src) OVERRIDE; + LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) OVERRIDE; + LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) OVERRIDE; + LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OVERRIDE; + LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) OVERRIDE; + LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) OVERRIDE; + LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) OVERRIDE; + LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) OVERRIDE; + LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) OVERRIDE; + LIR* OpTestSuspend(LIR* target) OVERRIDE; + LIR* OpVldm(RegStorage r_base, int count) OVERRIDE; + LIR* OpVstm(RegStorage r_base, int count) OVERRIDE; + void OpRegCopyWide(RegStorage dest, RegStorage src) OVERRIDE; + + bool InexpensiveConstantInt(int32_t value) OVERRIDE; + bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE; + bool InexpensiveConstantFloat(int32_t value) OVERRIDE; + bool InexpensiveConstantLong(int64_t value) OVERRIDE; + bool InexpensiveConstantDouble(int64_t value) OVERRIDE; + + void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; + + int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this); - InToRegStorageMapping in_to_reg_storage_mapping_; + bool skip_this) OVERRIDE; - bool WideGPRsAreAliases() OVERRIDE { - return true; // 64b architecture. - } - bool WideFPRsAreAliases() OVERRIDE { - return true; // 64b architecture. - } - size_t GetInstructionOffset(LIR* lir); - - LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; - - private: - /** - * @brief Given register xNN (dNN), returns register wNN (sNN). - * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2). - * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2). - * @see As64BitReg - */ - RegStorage As32BitReg(RegStorage reg) { - DCHECK(!reg.IsPair()); - if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { - if (kFailOnSizeError) { - LOG(FATAL) << "Expected 64b register"; - } else { - LOG(WARNING) << "Expected 64b register"; - return reg; - } + int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this) OVERRIDE; + + bool WideGPRsAreAliases() OVERRIDE { + return true; // 64b architecture. + } + bool WideFPRsAreAliases() OVERRIDE { + return true; // 64b architecture. + } + + size_t GetInstructionOffset(LIR* lir) OVERRIDE; + + LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; + + private: + /** + * @brief Given register xNN (dNN), returns register wNN (sNN). + * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2). + * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2). + * @see As64BitReg + */ + RegStorage As32BitReg(RegStorage reg) { + DCHECK(!reg.IsPair()); + if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Expected 64b register"; + } else { + LOG(WARNING) << "Expected 64b register"; + return reg; } - RegStorage ret_val = RegStorage(RegStorage::k32BitSolo, - reg.GetRawBits() & RegStorage::kRegTypeMask); - DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask) - ->GetReg().GetReg(), - ret_val.GetReg()); - return ret_val; } + RegStorage ret_val = RegStorage(RegStorage::k32BitSolo, + reg.GetRawBits() & RegStorage::kRegTypeMask); + DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask) + ->GetReg().GetReg(), + ret_val.GetReg()); + return ret_val; + } - RegStorage Check32BitReg(RegStorage reg) { - if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { - if (kFailOnSizeError) { - LOG(FATAL) << "Checked for 32b register"; - } else { - LOG(WARNING) << "Checked for 32b register"; - return As32BitReg(reg); - } + RegStorage Check32BitReg(RegStorage reg) { + if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Checked for 32b register"; + } else { + LOG(WARNING) << "Checked for 32b register"; + return As32BitReg(reg); } - return reg; } + return reg; + } - /** - * @brief Given register wNN (sNN), returns register xNN (dNN). - * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2). - * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2). - * @see As32BitReg - */ - RegStorage As64BitReg(RegStorage reg) { - DCHECK(!reg.IsPair()); - if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { - if (kFailOnSizeError) { - LOG(FATAL) << "Expected 32b register"; - } else { - LOG(WARNING) << "Expected 32b register"; - return reg; - } + /** + * @brief Given register wNN (sNN), returns register xNN (dNN). + * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2). + * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2). + * @see As32BitReg + */ + RegStorage As64BitReg(RegStorage reg) { + DCHECK(!reg.IsPair()); + if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Expected 32b register"; + } else { + LOG(WARNING) << "Expected 32b register"; + return reg; } - RegStorage ret_val = RegStorage(RegStorage::k64BitSolo, - reg.GetRawBits() & RegStorage::kRegTypeMask); - DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask) - ->GetReg().GetReg(), - ret_val.GetReg()); - return ret_val; } + RegStorage ret_val = RegStorage(RegStorage::k64BitSolo, + reg.GetRawBits() & RegStorage::kRegTypeMask); + DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask) + ->GetReg().GetReg(), + ret_val.GetReg()); + return ret_val; + } - RegStorage Check64BitReg(RegStorage reg) { - if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { - if (kFailOnSizeError) { - LOG(FATAL) << "Checked for 64b register"; - } else { - LOG(WARNING) << "Checked for 64b register"; - return As64BitReg(reg); - } + RegStorage Check64BitReg(RegStorage reg) { + if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) { + if (kFailOnSizeError) { + LOG(FATAL) << "Checked for 64b register"; + } else { + LOG(WARNING) << "Checked for 64b register"; + return As64BitReg(reg); } - return reg; } + return reg; + } + + int32_t EncodeImmSingle(uint32_t bits); + int32_t EncodeImmDouble(uint64_t bits); + LIR* LoadFPConstantValue(RegStorage r_dest, int32_t value); + LIR* LoadFPConstantValueWide(RegStorage r_dest, int64_t value); + void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void AssignDataOffsets(); + RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + bool is_div, bool check_zero); + RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div); + size_t GetLoadStoreSize(LIR* lir); + + bool SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, + RegLocation rl_dest, int64_t lit); + + uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset); + int AssignInsnOffsets(); + void AssignOffsets(); + uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); + + // Spill core and FP registers. Returns the SP difference: either spill size, or whole + // frame size. + int SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size); + + // Unspill core and FP registers. + void UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size); + + void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + + LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value); + LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value); + + LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); + LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, + int shift); + int EncodeShift(int code, int amount); + + LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, + A64RegExtEncodings ext, uint8_t amount); + LIR* OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, + A64RegExtEncodings ext, uint8_t amount); + int EncodeExtend(int extend_type, int amount); + bool IsExtendEncoding(int encoded_value); + + LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size); + LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); + + int EncodeLogicalImmediate(bool is_wide, uint64_t value); + uint64_t DecodeLogicalImmediate(bool is_wide, int value); + ArmConditionCode ArmConditionEncoding(ConditionCode code); + + // Helper used in the two GenSelect variants. + void GenSelect(int32_t left, int32_t right, ConditionCode code, RegStorage rs_dest, + int result_reg_class); + + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); - LIR* LoadFPConstantValue(RegStorage r_dest, int32_t value); - LIR* LoadFPConstantValueWide(RegStorage r_dest, int64_t value); - void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); - void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); - void AssignDataOffsets(); - RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, - bool is_div, bool check_zero); - RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div); - size_t GetLoadStoreSize(LIR* lir); + InToRegStorageMapping in_to_reg_storage_mapping_; + static const ArmEncodingMap EncodingMap[kA64Last]; }; } // namespace art diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index ed13c04..d0b2636 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -17,6 +17,7 @@ #include "arm64_lir.h" #include "codegen_arm64.h" #include "dex/quick/mir_to_lir-inl.h" +#include "utils.h" namespace art { @@ -386,6 +387,52 @@ bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { return true; } +bool Arm64Mir2Lir::GenInlinedCeil(CallInfo* info) { + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); + rl_src = LoadValueWide(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(FWIDE(kA64Frintp2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); + return true; +} + +bool Arm64Mir2Lir::GenInlinedFloor(CallInfo* info) { + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); + rl_src = LoadValueWide(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(FWIDE(kA64Frintm2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); + return true; +} + +bool Arm64Mir2Lir::GenInlinedRint(CallInfo* info) { + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); + rl_src = LoadValueWide(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(FWIDE(kA64Frintn2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); + return true; +} + +bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) { + int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f)); + ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0); + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info); + rl_src = (is_double) ? LoadValueWide(rl_src, kFPReg) : LoadValue(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage r_tmp = (is_double) ? AllocTempDouble() : AllocTempSingle(); + // 0.5f and 0.5d are encoded in the same way. + NewLIR2(kA64Fmov2fI | wide, r_tmp.GetReg(), encoded_imm); + NewLIR3(kA64Fadd3fff | wide, rl_src.reg.GetReg(), rl_src.reg.GetReg(), r_tmp.GetReg()); + NewLIR2((is_double) ? kA64Fcvtms2xS : kA64Fcvtms2ws, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + (is_double) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result); + return true; +} + bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) { DCHECK_EQ(cu_->instruction_set, kArm64); int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 9403d5e..147fee8 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -931,34 +931,52 @@ void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } -void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2); -} - -void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2); -} - -void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2); -} - -void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2); -} - -void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2); -} - -void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2); +void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + switch (opcode) { + case Instruction::NOT_LONG: + GenNotLong(rl_dest, rl_src2); + return; + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2); + return; + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2); + return; + case Instruction::MUL_LONG: + case Instruction::MUL_LONG_2ADDR: + GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2); + return; + case Instruction::DIV_LONG: + case Instruction::DIV_LONG_2ADDR: + GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); + return; + case Instruction::REM_LONG: + case Instruction::REM_LONG_2ADDR: + GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); + return; + case Instruction::AND_LONG_2ADDR: + case Instruction::AND_LONG: + GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2); + return; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2); + return; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2); + return; + case Instruction::NEG_LONG: { + GenNegLong(rl_dest, rl_src2); + return; + } + default: + LOG(FATAL) << "Invalid long arith op"; + return; + } } /* @@ -1192,22 +1210,7 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) { - if (!rl_src2.is_const) { - return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); - } - } else { - // Associativity. - if (!rl_src2.is_const) { - DCHECK(rl_src1.is_const); - std::swap(rl_src1, rl_src2); - } - } - DCHECK(rl_src2.is_const); - OpKind op = kOpBkpt; - int64_t val = mir_graph_->ConstantValueWide(rl_src2); - switch (opcode) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: @@ -1233,6 +1236,20 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_de LOG(FATAL) << "Unexpected opcode"; } + if (op == kOpSub) { + if (!rl_src2.is_const) { + return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + } + } else { + // Associativity. + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + std::swap(rl_src1, rl_src2); + } + } + DCHECK(rl_src2.is_const); + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val); diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 5131bd8..5326e74 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -23,7 +23,7 @@ namespace art { /* This file contains codegen for the A64 ISA. */ -static int32_t EncodeImmSingle(uint32_t bits) { +int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) { /* * Valid values will have the form: * @@ -55,7 +55,7 @@ static int32_t EncodeImmSingle(uint32_t bits) { return (bit7 | bit6 | bit5_to_0); } -static int32_t EncodeImmDouble(uint64_t bits) { +int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) { /* * Valid values will have the form: * @@ -269,8 +269,47 @@ int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { return (n << 12 | imm_r << 6 | imm_s); } +// Maximum number of instructions to use for encoding the immediate. +static const int max_num_ops_per_const_load = 2; + +/** + * @brief Return the number of fast halfwords in the given uint64_t integer. + * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The + * number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for + * a more accurate description. + * @param value The input 64-bit integer. + * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is + * the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits + * set (0xffff). Additionally (retval & 0x8) is set when m > n. + */ +static int GetNumFastHalfWords(uint64_t value) { + unsigned int num_0000_halfwords = 0; + unsigned int num_ffff_halfwords = 0; + for (int shift = 0; shift < 64; shift += 16) { + uint16_t halfword = static_cast<uint16_t>(value >> shift); + if (halfword == 0) + num_0000_halfwords++; + else if (halfword == UINT16_C(0xffff)) + num_ffff_halfwords++; + } + if (num_0000_halfwords >= num_ffff_halfwords) { + DCHECK_LE(num_0000_halfwords, 4U); + return num_0000_halfwords; + } else { + DCHECK_LE(num_ffff_halfwords, 4U); + return num_ffff_halfwords | 0x8; + } +} + +// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a +// constant is considered for promotion. If the constant is "inexpensive" then the promotion +// algorithm will give it a low priority for promotion, even when it is referenced many times in +// the code. + bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { - return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); + // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool). + // We therefore return true and give it a low priority for promotion. + return true; } bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { @@ -278,13 +317,70 @@ bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { } bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { - return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); + int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7); + if (num_slow_halfwords <= max_num_ops_per_const_load) { + return true; + } + return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0); } bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { return EncodeImmDouble(value) >= 0; } +// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use +// when one of the operands is an immediate (e.g. register version or immediate version of add). + +bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { + switch (opcode) { + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + // The code below is consistent with the implementation of OpRegRegImm(). + { + int32_t abs_value = std::abs(value); + if (abs_value < 0x1000) { + return true; + } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { + return true; + } + return false; + } + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + return true; + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + case Instruction::AND_INT_LIT16: + case Instruction::AND_INT_LIT8: + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + case Instruction::OR_INT_LIT16: + case Instruction::OR_INT_LIT8: + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + case Instruction::XOR_INT_LIT16: + case Instruction::XOR_INT_LIT8: + if (value == 0 || value == INT32_C(-1)) { + return true; + } + return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0); + default: + return false; + } +} + /* * Load a immediate using one single instruction when possible; otherwise * use a pair of movz and movk instructions. @@ -358,9 +454,6 @@ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { // TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { - // Maximum number of instructions to use for encoding the immediate. - const int max_num_ops = 2; - if (r_dest.IsFloat()) { return LoadFPConstantValueWide(r_dest, value); } @@ -378,19 +471,12 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. - int num_0000_halfwords = 0; - int num_ffff_halfwords = 0; uint64_t uvalue = static_cast<uint64_t>(value); - for (int shift = 0; shift < 64; shift += 16) { - uint16_t halfword = static_cast<uint16_t>(uvalue >> shift); - if (halfword == 0) - num_0000_halfwords++; - else if (halfword == UINT16_C(0xffff)) - num_ffff_halfwords++; - } - int num_fast_halfwords = std::max(num_0000_halfwords, num_ffff_halfwords); + int num_fast_halfwords = GetNumFastHalfWords(uvalue); + int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7); + bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0; - if (num_fast_halfwords < 3) { + if (num_slow_halfwords > 1) { // A single movz/movn is not enough. Try the logical immediate route. int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); if (log_imm >= 0) { @@ -398,19 +484,19 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } } - if (num_fast_halfwords >= 4 - max_num_ops) { + if (num_slow_halfwords <= max_num_ops_per_const_load) { // We can encode the number using a movz/movn followed by one or more movk. ArmOpcode op; uint16_t background; LIR* res = nullptr; // Decide whether to use a movz or a movn. - if (num_0000_halfwords >= num_ffff_halfwords) { - op = WIDE(kA64Movz3rdM); - background = 0; - } else { + if (more_ffff_halfwords) { op = WIDE(kA64Movn3rdM); background = 0xffff; + } else { + op = WIDE(kA64Movz3rdM); + background = 0; } // Emit the first instruction (movz, movn). @@ -726,7 +812,7 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1 int64_t abs_value = (neg) ? -value : value; ArmOpcode opcode = kA64Brk1d; ArmOpcode alt_opcode = kA64Brk1d; - int32_t log_imm = -1; + bool is_logical = false; bool is_wide = r_dest.Is64Bit(); ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); int info = 0; @@ -761,65 +847,89 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); } else { - log_imm = -1; alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); } break; - // case kOpRsub: - // opcode = kThumb2RsubRRI8M; - // alt_opcode = kThumb2RsubRRR; - // break; case kOpAdc: - log_imm = -1; alt_opcode = kA64Adc3rrr; break; case kOpSbc: - log_imm = -1; alt_opcode = kA64Sbc3rrr; break; case kOpOr: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64Orr3Rrl; alt_opcode = kA64Orr4rrro; break; case kOpAnd: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64And3Rrl; alt_opcode = kA64And4rrro; break; case kOpXor: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64Eor3Rrl; alt_opcode = kA64Eor4rrro; break; case kOpMul: // TUNING: power of 2, shift & add - log_imm = -1; alt_opcode = kA64Mul3rrr; break; default: LOG(FATAL) << "Bad opcode: " << op; } - if (log_imm >= 0) { - return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); - } else { - RegStorage r_scratch; - if (is_wide) { - r_scratch = AllocTempWide(); - LoadConstantWide(r_scratch, value); + if (is_logical) { + int log_imm = EncodeLogicalImmediate(is_wide, value); + if (log_imm >= 0) { + return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); } else { - r_scratch = AllocTemp(); - LoadConstant(r_scratch, value); + // When the immediate is either 0 or ~0, the logical operation can be trivially reduced + // to a - possibly negated - assignment. + if (value == 0) { + switch (op) { + case kOpOr: + case kOpXor: + // Or/Xor by zero reduces to an assignment. + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + default: + // And by zero reduces to a `mov rdest, xzr'. + DCHECK(op == kOpAnd); + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); + } + } else if (value == INT64_C(-1) + || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) { + switch (op) { + case kOpAnd: + // And by -1 reduces to an assignment. + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + case kOpXor: + // Xor by -1 reduces to an `mvn rdest, rsrc'. + return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + default: + // Or by -1 reduces to a `mvn rdest, xzr'. + DCHECK(op == kOpOr); + return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); + } + } } - if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) - res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); - else - res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); - FreeTemp(r_scratch); - return res; } + + RegStorage r_scratch; + if (is_wide) { + r_scratch = AllocTempWide(); + LoadConstantWide(r_scratch, value); + } else { + r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + } + if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); + else + res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); + FreeTemp(r_scratch); + return res; } LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 463f277..9f60427 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -394,6 +394,18 @@ LIR* Mir2Lir::ScanLiteralPoolMethod(LIR* data_target, const MethodReference& met return nullptr; } +/* Search the existing constants in the literal pool for an exact class match */ +LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) { + while (data_target) { + if (static_cast<uint32_t>(data_target->operands[0]) == type_idx && + UnwrapPointer(data_target->operands[1]) == &dex_file) { + return data_target; + } + data_target = data_target->next; + } + return nullptr; +} + /* * The following are building blocks to insert constants into the pool or * instruction streams. @@ -492,10 +504,13 @@ void Mir2Lir::InstallLiteralPools() { data_lir = class_literal_list_; while (data_lir != NULL) { uint32_t target_method_idx = data_lir->operands[0]; + const DexFile* class_dex_file = + reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1])); cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx, cu_->method_idx, target_method_idx, + class_dex_file, code_buffer_.size()); const DexFile::TypeId& target_method_id = cu_->dex_file->GetTypeId(target_method_idx); // unique value based on target to ensure code deduplication works @@ -983,6 +998,8 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena estimated_native_code_size_(0), reg_pool_(NULL), live_sreg_(0), + core_vmap_table_(mir_graph->GetArena()->Adapter()), + fp_vmap_table_(mir_graph->GetArena()->Adapter()), num_core_spills_(0), num_fp_spills_(0), frame_size_(0), @@ -1220,12 +1237,14 @@ void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target); } -void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) { +void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx, + SpecialTargetRegister symbolic_reg) { // Use the literal pool and a PC-relative load from a data word. - LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0); + LIR* data_target = ScanLiteralPoolClass(class_literal_list_, dex_file, type_idx); if (data_target == nullptr) { data_target = AddWordData(&class_literal_list_, type_idx); } + data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file)); // Loads a Class pointer, which is a reference as it lives in the heap. LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target); AppendLIR(load_pc_rel); diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 0e46c96..dbceaff 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -48,7 +48,12 @@ static constexpr bool kIntrinsicIsStatic[] = { true, // kIntrinsicMinMaxFloat true, // kIntrinsicMinMaxDouble true, // kIntrinsicSqrt - false, // kIntrinsicGet + true, // kIntrinsicCeil + true, // kIntrinsicFloor + true, // kIntrinsicRint + true, // kIntrinsicRoundFloat + true, // kIntrinsicRoundDouble + false, // kIntrinsicReferenceGet false, // kIntrinsicCharAt false, // kIntrinsicCompareTo false, // kIntrinsicIsEmptyOrLength @@ -75,7 +80,12 @@ COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxLong], MinMaxLong_must_be_stat COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], MinMaxFloat_must_be_static); COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], MinMaxDouble_must_be_static); COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicSqrt], Sqrt_must_be_static); -COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicGet], Get_must_not_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicCeil], Ceil_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicFloor], Floor_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRint], Rint_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundFloat], RoundFloat_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundDouble], RoundDouble_must_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicReferenceGet], Get_must_not_be_static); COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCharAt], CharAt_must_not_be_static); COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCompareTo], CompareTo_must_not_be_static); COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], IsEmptyOrLength_must_not_be_static); @@ -155,7 +165,11 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "max", // kNameCacheMax "min", // kNameCacheMin "sqrt", // kNameCacheSqrt - "get", // kNameCacheGet + "ceil", // kNameCacheCeil + "floor", // kNameCacheFloor + "rint", // kNameCacheRint + "round", // kNameCacheRound + "get", // kNameCacheReferenceGet "charAt", // kNameCacheCharAt "compareTo", // kNameCacheCompareTo "isEmpty", // kNameCacheIsEmpty @@ -314,7 +328,18 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods INTRINSIC(JavaLangMath, Sqrt, D_D, kIntrinsicSqrt, 0), INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0), - INTRINSIC(JavaLangRefReference, Get, _Object, kIntrinsicGet, 0), + INTRINSIC(JavaLangMath, Ceil, D_D, kIntrinsicCeil, 0), + INTRINSIC(JavaLangStrictMath, Ceil, D_D, kIntrinsicCeil, 0), + INTRINSIC(JavaLangMath, Floor, D_D, kIntrinsicFloor, 0), + INTRINSIC(JavaLangStrictMath, Floor, D_D, kIntrinsicFloor, 0), + INTRINSIC(JavaLangMath, Rint, D_D, kIntrinsicRint, 0), + INTRINSIC(JavaLangStrictMath, Rint, D_D, kIntrinsicRint, 0), + INTRINSIC(JavaLangMath, Round, F_I, kIntrinsicRoundFloat, 0), + INTRINSIC(JavaLangStrictMath, Round, F_I, kIntrinsicRoundFloat, 0), + INTRINSIC(JavaLangMath, Round, D_J, kIntrinsicRoundDouble, 0), + INTRINSIC(JavaLangStrictMath, Round, D_J, kIntrinsicRoundDouble, 0), + + INTRINSIC(JavaLangRefReference, ReferenceGet, _Object, kIntrinsicReferenceGet, 0), INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0), INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0), @@ -436,8 +461,18 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */); case kIntrinsicSqrt: return backend->GenInlinedSqrt(info); - case kIntrinsicGet: - return backend->GenInlinedGet(info); + case kIntrinsicCeil: + return backend->GenInlinedCeil(info); + case kIntrinsicFloor: + return backend->GenInlinedFloor(info); + case kIntrinsicRint: + return backend->GenInlinedRint(info); + case kIntrinsicRoundFloat: + return backend->GenInlinedRound(info, false /* is_double */); + case kIntrinsicRoundDouble: + return backend->GenInlinedRound(info, true /* is_double */); + case kIntrinsicReferenceGet: + return backend->GenInlinedReferenceGet(info); case kIntrinsicCharAt: return backend->GenInlinedCharAt(info); case kIntrinsicCompareTo: diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index cb8c165..b875e2b 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -141,7 +141,11 @@ class DexFileMethodInliner { kNameCacheMax, kNameCacheMin, kNameCacheSqrt, - kNameCacheGet, + kNameCacheCeil, + kNameCacheFloor, + kNameCacheRint, + kNameCacheRound, + kNameCacheReferenceGet, kNameCacheCharAt, kNameCacheCompareTo, kNameCacheIsEmpty, diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index aae9155..3f22913 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -256,7 +256,7 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_temp = UpdateLoc(rl_src2); int32_t constant_value = mir_graph_->ConstantValue(rl_src2); if ((rl_temp.location == kLocDalvikFrame) && - InexpensiveConstantInt(constant_value)) { + InexpensiveConstantInt(constant_value, opcode)) { // OK - convert this to a compare immediate and branch OpCmpImmBranch(cond, rl_src1.reg, mir_graph_->ConstantValue(rl_src2), taken); return; @@ -361,7 +361,7 @@ void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest, &direct_type_ptr, &is_finalizable)) { // The fast path. if (!use_direct_type_ptr) { - LoadClassType(type_idx, kArg0); + LoadClassType(*dex_file, type_idx, kArg0); CallRuntimeHelperRegMethodRegLocation(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide), rl_src, true); } else { @@ -961,7 +961,7 @@ void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) { !is_finalizable) { // The fast path. if (!use_direct_type_ptr) { - LoadClassType(type_idx, kArg0); + LoadClassType(*dex_file, type_idx, kArg0); if (!is_type_initialized) { CallRuntimeHelperRegMethod(kQuickAllocObjectResolved, TargetReg(kArg0, kRef), true); } else { @@ -1808,10 +1808,6 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, switch (opcode) { case Instruction::NOT_LONG: - if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) { - GenNotLong(rl_dest, rl_src2); - return; - } rl_src2 = LoadValueWide(rl_src2, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); // Check for destructive overlap @@ -1829,39 +1825,22 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, return; case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: - if (cu_->instruction_set != kThumb2) { - GenAddLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } first_op = kOpAdd; second_op = kOpAdc; break; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: - if (cu_->instruction_set != kThumb2) { - GenSubLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } first_op = kOpSub; second_op = kOpSbc; break; case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: - if (cu_->instruction_set != kMips) { - GenMulLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } else { - call_out = true; - TargetReg(kRet0, kNotWide).GetReg(); - target = kQuickLmul; - } + call_out = true; + ret_reg = TargetReg(kRet0, kNotWide).GetReg(); + target = kQuickLmul; break; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: - if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) { - GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); - return; - } call_out = true; check_zero = true; ret_reg = TargetReg(kRet0, kNotWide).GetReg(); @@ -1869,10 +1848,6 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, break; case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: - if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) { - GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); - return; - } call_out = true; check_zero = true; target = kQuickLmod; @@ -1882,37 +1857,19 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, break; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 || - cu_->instruction_set == kArm64) { - return GenAndLong(opcode, rl_dest, rl_src1, rl_src2); - } first_op = kOpAnd; second_op = kOpAnd; break; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 || - cu_->instruction_set == kArm64) { - GenOrLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } first_op = kOpOr; second_op = kOpOr; break; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 || - cu_->instruction_set == kArm64) { - GenXorLong(opcode, rl_dest, rl_src1, rl_src2); - return; - } first_op = kOpXor; second_op = kOpXor; break; - case Instruction::NEG_LONG: { - GenNegLong(rl_dest, rl_src2); - return; - } default: LOG(FATAL) << "Invalid long arith op"; } @@ -2051,4 +2008,92 @@ void Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { StoreValueWide(rl_dest, rl_result); } +void Mir2Lir::GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + const uint16_t entries = table[1]; + // Chained cmp-and-branch. + const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]); + int32_t current_key = as_int32[0]; + const int32_t* targets = &as_int32[1]; + rl_src = LoadValue(rl_src, kCoreReg); + int i = 0; + for (; i < entries; i++, current_key++) { + if (!InexpensiveConstantInt(current_key, Instruction::Code::IF_EQ)) { + // Switch to using a temp and add. + break; + } + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpImmBranch(kCondEq, rl_src.reg, current_key, &block_label_list_[case_block->id]); + } + if (i < entries) { + // The rest do not seem to be inexpensive. Try to allocate a temp and use add. + RegStorage key_temp = AllocTypedTemp(false, kCoreReg, false); + if (key_temp.Valid()) { + LoadConstantNoClobber(key_temp, current_key); + for (; i < entries - 1; i++, current_key++) { + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]); + OpRegImm(kOpAdd, key_temp, 1); // Increment key. + } + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]); + } else { + // No free temp, just finish the old loop. + for (; i < entries; i++, current_key++) { + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpImmBranch(kCondEq, rl_src.reg, current_key, &block_label_list_[case_block->id]); + } + } + } +} + +void Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + + const uint16_t entries = table[1]; + if (entries <= kSmallSwitchThreshold) { + GenSmallPackedSwitch(mir, table_offset, rl_src); + } else { + // Use the backend-specific implementation. + GenLargePackedSwitch(mir, table_offset, rl_src); + } +} + +void Mir2Lir::GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + const uint16_t entries = table[1]; + // Chained cmp-and-branch. + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; + rl_src = LoadValue(rl_src, kCoreReg); + for (int i = 0; i < entries; i++) { + int key = keys[i]; + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]); + } +} + +void Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + + const uint16_t entries = table[1]; + if (entries <= kSmallSwitchThreshold) { + GenSmallSparseSwitch(mir, table_offset, rl_src); + } else { + // Use the backend-specific implementation. + GenLargeSparseSwitch(mir, table_offset, rl_src); + } +} + } // namespace art diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 5fc6996..3cfc9a6 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -25,10 +25,8 @@ #include "mirror/class-inl.h" #include "mirror/dex_cache.h" #include "mirror/object_array-inl.h" -#include "mirror/reference-inl.h" #include "mirror/string.h" #include "mir_to_lir-inl.h" -#include "scoped_thread_state_change.h" #include "x86/codegen_x86.h" namespace art { @@ -1129,57 +1127,32 @@ RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) { return res; } -bool Mir2Lir::GenInlinedGet(CallInfo* info) { +bool Mir2Lir::GenInlinedReferenceGet(CallInfo* info) { if (cu_->instruction_set == kMips) { // TODO - add Mips implementation return false; } - // the refrence class is stored in the image dex file which might not be the same as the cu's - // dex file. Query the reference class for the image dex file then reset to starting dex file - // in after loading class type. - uint16_t type_idx = 0; - const DexFile* ref_dex_file = nullptr; - { - ScopedObjectAccess soa(Thread::Current()); - type_idx = mirror::Reference::GetJavaLangRefReference()->GetDexTypeIndex(); - ref_dex_file = mirror::Reference::GetJavaLangRefReference()->GetDexCache()->GetDexFile(); - } - CHECK(LIKELY(ref_dex_file != nullptr)); - - // address is either static within the image file, or needs to be patched up after compilation. - bool unused_type_initialized; bool use_direct_type_ptr; uintptr_t direct_type_ptr; - bool is_finalizable; - const DexFile* old_dex = cu_->dex_file; - cu_->dex_file = ref_dex_file; + ClassReference ref; + if (!cu_->compiler_driver->CanEmbedReferenceTypeInCode(&ref, + &use_direct_type_ptr, &direct_type_ptr)) { + return false; + } + RegStorage reg_class = TargetReg(kArg1, kRef); Clobber(reg_class); LockTemp(reg_class); - if (!cu_->compiler_driver->CanEmbedTypeInCode(*ref_dex_file, type_idx, &unused_type_initialized, - &use_direct_type_ptr, &direct_type_ptr, - &is_finalizable) || is_finalizable) { - cu_->dex_file = old_dex; - // address is not known and post-compile patch is not possible, cannot insert intrinsic. - return false; - } if (use_direct_type_ptr) { LoadConstant(reg_class, direct_type_ptr); } else { - LoadClassType(type_idx, kArg1); + uint16_t type_idx = ref.first->GetClassDef(ref.second).class_idx_; + LoadClassType(*ref.first, type_idx, kArg1); } - cu_->dex_file = old_dex; - // get the offset for flags in reference class. - uint32_t slow_path_flag_offset = 0; - uint32_t disable_flag_offset = 0; - { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference(); - slow_path_flag_offset = reference_class->GetSlowPathFlagOffset().Uint32Value(); - disable_flag_offset = reference_class->GetDisableIntrinsicFlagOffset().Uint32Value(); - } + uint32_t slow_path_flag_offset = cu_->compiler_driver->GetReferenceSlowFlagOffset(); + uint32_t disable_flag_offset = cu_->compiler_driver->GetReferenceDisableFlagOffset(); CHECK(slow_path_flag_offset && disable_flag_offset && (slow_path_flag_offset != disable_flag_offset)); @@ -1427,6 +1400,22 @@ bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) { return false; } +bool Mir2Lir::GenInlinedCeil(CallInfo* info) { + return false; +} + +bool Mir2Lir::GenInlinedFloor(CallInfo* info) { + return false; +} + +bool Mir2Lir::GenInlinedRint(CallInfo* info) { + return false; +} + +bool Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) { + return false; +} + bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) { if (cu_->instruction_set == kMips) { // TODO - add Mips implementation diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index eec2b32..e0f4691 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -200,7 +200,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { /* Initialize alias list */ alias_list.clear(); ResourceMask alias_reg_list_mask = kEncodeNone; - if (!this_mem_mask.Intersects(kEncodeLiteral)) { + if (!this_mem_mask.Intersects(kEncodeMem) && !this_mem_mask.Intersects(kEncodeLiteral)) { alias_list.push_back(dest_reg_id); SetupRegMask(&alias_reg_list_mask, dest_reg_id); } @@ -248,7 +248,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { bool is_check_lir_load = check_flags & IS_LOAD; bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id); - if (alias_mem_mask.Equals(kEncodeLiteral)) { + if (!alias_mem_mask.Intersects(kEncodeMem) && alias_mem_mask.Equals(kEncodeLiteral)) { DCHECK(check_flags & IS_LOAD); /* Same value && same register type */ if (reg_compatible && (this_lir->target == check_lir->target)) { diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index 9adddf0..4577a4c 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -61,8 +61,7 @@ bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, * done: * */ -void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, - RegLocation rl_src) { +void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); @@ -139,8 +138,7 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, * jr rRA * done: */ -void MipsMir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, - RegLocation rl_src) { +void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index 4bd2748..43cbde7 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -92,12 +92,6 @@ class MipsMir2Lir FINAL : public Mir2Lir { RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); - void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -112,16 +106,8 @@ class MipsMir2Lir FINAL : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); - void GenNotLong(RegLocation rl_dest, RegLocation rl_src); - void GenNegLong(RegLocation rl_dest, RegLocation rl_src); - void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div); + void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -142,8 +128,8 @@ class MipsMir2Lir FINAL : public Mir2Lir { int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special); // Required for target - single operation generators. @@ -196,6 +182,12 @@ class MipsMir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; private: + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void ConvertShortToLongBranch(LIR* lir); RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, bool check_zero); diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index d727615..ea56989 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -392,11 +392,6 @@ void MipsMir2Lir::OpEndIT(LIR* it) { } -void MipsMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenMulLong for Mips"; -} - void MipsMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); @@ -441,13 +436,27 @@ void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, StoreValueWide(rl_dest, rl_result); } -void MipsMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { - LOG(FATAL) << "Unexpected use GenNotLong()"; -} +void MipsMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + GenAddLong(opcode, rl_dest, rl_src1, rl_src2); + return; + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + GenSubLong(opcode, rl_dest, rl_src1, rl_src2); + return; + case Instruction::NEG_LONG: + GenNegLong(rl_dest, rl_src2); + return; + + default: + break; + } -void MipsMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div) { - LOG(FATAL) << "Unexpected use GenDivRemLong()"; + // Fallback for all other ops. + Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); } void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { @@ -470,22 +479,6 @@ void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } -void MipsMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenAndLong for Mips"; -} - -void MipsMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenOrLong for Mips"; -} - -void MipsMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenXorLong for Mips"; -} - /* * Generate array load */ diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 4d8b91e..e519011 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -926,11 +926,11 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::XOR_INT: case Instruction::XOR_INT_2ADDR: if (rl_src[0].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[1], mir_graph_->ConstantValue(rl_src[0].orig_sreg)); } else if (rl_src[1].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1].orig_sreg)); } else { @@ -951,7 +951,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::USHR_INT: case Instruction::USHR_INT_2ADDR: if (rl_src[1].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1])); } else { GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index b19942d..2221bb5 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -32,6 +32,7 @@ #include "safe_map.h" #include "utils/array_ref.h" #include "utils/arena_allocator.h" +#include "utils/arena_containers.h" #include "utils/growable_array.h" #include "utils/stack_checks.h" @@ -228,6 +229,9 @@ class Mir2Lir : public Backend { static constexpr bool kFailOnSizeError = true && kIsDebugBuild; static constexpr bool kReportSizeError = true && kIsDebugBuild; + // TODO: If necessary, this could be made target-dependent. + static constexpr uint16_t kSmallSwitchThreshold = 5; + /* * Auxiliary information describing the location of data embedded in the Dalvik * byte code stream. @@ -681,6 +685,7 @@ class Mir2Lir : public Backend { LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta); LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi); LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method); + LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx); LIR* AddWordData(LIR* *constant_list_p, int value); LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi); void ProcessSwitchTables(); @@ -867,8 +872,8 @@ class Mir2Lir : public Backend { RegLocation rl_src1, RegLocation rl_shift); void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, int lit); - void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2); + virtual void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src); virtual void GenSuspendTest(int opt_flags); virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target); @@ -954,7 +959,7 @@ class Mir2Lir : public Backend { */ RegLocation InlineTargetWide(CallInfo* info); - bool GenInlinedGet(CallInfo* info); + bool GenInlinedReferenceGet(CallInfo* info); virtual bool GenInlinedCharAt(CallInfo* info); bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size); @@ -965,6 +970,10 @@ class Mir2Lir : public Backend { virtual bool GenInlinedAbsDouble(CallInfo* info) = 0; bool GenInlinedFloatCvt(CallInfo* info); bool GenInlinedDoubleCvt(CallInfo* info); + virtual bool GenInlinedCeil(CallInfo* info); + virtual bool GenInlinedFloor(CallInfo* info); + virtual bool GenInlinedRint(CallInfo* info); + virtual bool GenInlinedRound(CallInfo* info, bool is_double); virtual bool GenInlinedArrayCopyCharArray(CallInfo* info); virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based); bool GenInlinedStringCompareTo(CallInfo* info); @@ -1105,11 +1114,13 @@ class Mir2Lir : public Backend { /* * @brief Load the Class* of a Dex Class type into the register. + * @param dex DexFile that contains the class type. * @param type How the method will be invoked. * @param register that will contain the code address. * @note register will be passed to TargetReg to get physical register. */ - virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg); + virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx, + SpecialTargetRegister symbolic_reg); // Routines that work for the generic case, but may be overriden by target. /* @@ -1246,15 +1257,6 @@ class Mir2Lir : public Backend { // Required for target - Dalvik-level generators. virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; - virtual void GenMulLong(Instruction::Code, - RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; - virtual void GenAddLong(Instruction::Code, - RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; - virtual void GenAndLong(Instruction::Code, - RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; virtual void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; @@ -1282,16 +1284,6 @@ class Mir2Lir : public Backend { virtual bool GenInlinedSqrt(CallInfo* info) = 0; virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0; virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0; - virtual void GenNotLong(RegLocation rl_dest, RegLocation rl_src) = 0; - virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0; - virtual void GenOrLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; - virtual void GenSubLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; - virtual void GenXorLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) = 0; - virtual void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div) = 0; virtual RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div) = 0; virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, @@ -1369,8 +1361,19 @@ class Mir2Lir : public Backend { int first_bit, int second_bit) = 0; virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0; - virtual void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; - virtual void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; + + // Create code for switch statements. Will decide between short and long versions below. + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + + // Potentially backend-specific versions of switch instructions for shorter switch statements. + // The default implementation will create a chained compare-and-branch. + virtual void GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + virtual void GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + // Backend-specific versions of switch instructions for longer switch statements. + virtual void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; + virtual void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; + virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) = 0; virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, @@ -1441,6 +1444,9 @@ class Mir2Lir : public Backend { virtual bool InexpensiveConstantFloat(int32_t value) = 0; virtual bool InexpensiveConstantLong(int64_t value) = 0; virtual bool InexpensiveConstantDouble(int64_t value) = 0; + virtual bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { + return InexpensiveConstantInt(value); + } // May be optimized by targets. virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src); @@ -1711,8 +1717,8 @@ class Mir2Lir : public Backend { CodeBuffer code_buffer_; // The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix. std::vector<uint8_t> encoded_mapping_table_; - std::vector<uint32_t> core_vmap_table_; - std::vector<uint32_t> fp_vmap_table_; + ArenaVector<uint32_t> core_vmap_table_; + ArenaVector<uint32_t> fp_vmap_table_; std::vector<uint8_t> native_gc_map_; int num_core_spills_; int num_fp_spills_; diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 45244e1..be966e1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -1171,12 +1171,13 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num } else { counts[p_map_idx].count += use_count; } - } else if (!IsInexpensiveConstant(loc)) { + } else { if (loc.wide && WideGPRsAreAliases()) { - // Longs and doubles can be counted together. i++; } - counts[p_map_idx].count += use_count; + if (!IsInexpensiveConstant(loc)) { + counts[p_map_idx].count += use_count; + } } } } @@ -1185,9 +1186,10 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num static int SortCounts(const void *val1, const void *val2) { const Mir2Lir::RefCounts* op1 = reinterpret_cast<const Mir2Lir::RefCounts*>(val1); const Mir2Lir::RefCounts* op2 = reinterpret_cast<const Mir2Lir::RefCounts*>(val2); - // Note that we fall back to sorting on reg so we get stable output - // on differing qsort implementations (such as on host and target or - // between local host and build servers). + // Note that we fall back to sorting on reg so we get stable output on differing qsort + // implementations (such as on host and target or between local host and build servers). + // Note also that if a wide val1 and a non-wide val2 have the same count, then val1 always + // ``loses'' (as STARTING_WIDE_SREG is or-ed in val1->s_reg). return (op1->count == op2->count) ? (op1->s_reg - op2->s_reg) : (op1->count < op2->count ? 1 : -1); @@ -1230,8 +1232,8 @@ void Mir2Lir::DoPromotion() { * TUNING: replace with linear scan once we have the ability * to describe register live ranges for GC. */ - size_t core_reg_count_size = cu_->target64 ? num_regs * 2 : num_regs; - size_t fp_reg_count_size = num_regs * 2; + size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2; + size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2; RefCounts *core_regs = static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size, kArenaAllocRegAlloc)); @@ -1261,7 +1263,6 @@ void Mir2Lir::DoPromotion() { // Sum use counts of SSA regs by original Dalvik vreg. CountRefs(core_regs, fp_regs, num_regs); - // Sort the count arrays qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts); qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts); diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index efd9079..8ebe55c 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -35,16 +35,16 @@ const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = { rm32_i32, rm32_i32_modrm, \ rm32_i8, rm32_i8_modrm) \ { kX86 ## opname ## 8MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0, true }, #opname "8MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 8AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0, true}, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 8AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0, true }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ { kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0, 0, 0, true }, #opname "8TR", "fs:[!0d],!1r" }, \ { kX86 ## opname ## 8RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RR", "!0r,!1r" }, \ { kX86 ## opname ## 8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## 8RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ { kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RT", "!0r,fs:[!1d]" }, \ { kX86 ## opname ## 8RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1, true }, #opname "8RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 8MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 8AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 8MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, false}, #opname "8MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 8AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, false}, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, false}, #opname "8TI", "fs:[!0d],!1d" }, \ \ { kX86 ## opname ## 16MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "16MR", "[!0r+!1d],!2r" }, \ { kX86 ## opname ## 16AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ @@ -170,9 +170,9 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov8RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, { kX86Mov8RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RT", "!0r,fs:[!1d]" }, { kX86Mov8RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB0, 0, 0, 0, 0, 1, true }, "Mov8RI", "!0r,!1d" }, - { kX86Mov8MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8MI", "[!0r+!1d],!2d" }, - { kX86Mov8AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8TI", "fs:[!0d],!1d" }, + { kX86Mov8MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8MI", "[!0r+!1d],!2d" }, + { kX86Mov8AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, false}, "Mov8TI", "fs:[!0d],!1d" }, { kX86Mov16MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov16MR", "[!0r+!1d],!2r" }, { kX86Mov16AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" }, @@ -286,7 +286,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Test32RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" }, { kX86Test64RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" }, - { kX86Test32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!1d]" }, + { kX86Test32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RM", "!0r,[!1r+!2d]" }, #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \ reg, reg_kind, reg_flags, \ @@ -407,9 +407,9 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Haddpd, 0x66, 0x7C, REG_DEF0_USE0), EXT_0F_ENCODING_MAP(Haddps, 0xF2, 0x7C, REG_DEF0_USE0), - { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" }, + { kX86PextrbRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" }, { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" }, - { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" }, + { kX86PextrdRRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" }, { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrbMRI", "[!0r+!1d],!2r,!3d" }, { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrwMRI", "[!0r+!1d],!2r,!3d" }, { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrdMRI", "[!0r+!1d],!2r,!3d" }, @@ -478,7 +478,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86MovsxdRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRM", "!0r,[!1r+!2d]" }, { kX86MovsxdRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true }, "Set8R", "!1c !0r" }, + { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true }, "Set8R", "!1c !0r" }, { kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8M", "!2c [!0r+!1d]" }, { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" }, diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 15aae9e..f5f8671 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -27,8 +27,7 @@ namespace art { * The sparse table in the literal pool is an array of <key,displacement> * pairs. */ -void X86Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, - RegLocation rl_src) { +void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); @@ -61,8 +60,7 @@ void X86Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, * jmp r_start_of_method * done: */ -void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, - RegLocation rl_src) { +void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 40621b1..d3ed48d 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -167,26 +167,12 @@ class X86Mir2Lir : public Mir2Lir { bool GenInlinedCharAt(CallInfo* info) OVERRIDE; // Long instructions. + void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) OVERRIDE; void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE; void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift) OVERRIDE; - void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenNotLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; - void GenNegLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; - void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) OVERRIDE; - void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div) OVERRIDE; void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE; void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, @@ -260,8 +246,8 @@ class X86Mir2Lir : public Mir2Lir { int first_bit, int second_bit) OVERRIDE; void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; - void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; - void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; + void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; + void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; /** * @brief Implement instanceof a final class with x86 specific code. @@ -333,11 +319,13 @@ class X86Mir2Lir : public Mir2Lir { /* * @brief Load the Class* of a Dex Class type into the register. + * @param dex DexFile that contains the class type. * @param type How the method will be invoked. * @param register that will contain the code address. * @note register will be passed to TargetReg to get physical register. */ - void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) OVERRIDE; + void LoadClassType(const DexFile& dex_file, uint32_t type_idx, + SpecialTargetRegister symbolic_reg) OVERRIDE; void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; @@ -369,12 +357,6 @@ class X86Mir2Lir : public Mir2Lir { void InstallLiteralPools() OVERRIDE; /* - * @brief Generate the debug_frame CFI information. - * @returns pointer to vector containing CFE information - */ - static std::vector<uint8_t>* ReturnCommonCallFrameInformation(bool is_x86_64); - - /* * @brief Generate the debug_frame FDE information. * @returns pointer to vector containing CFE information */ @@ -827,6 +809,16 @@ class X86Mir2Lir : public Mir2Lir { void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset); + // Try to do a long multiplication where rl_src2 is a constant. This simplified setup might fail, + // in which case false will be returned. + bool GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val); + void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); + void SpillCoreRegs(); void UnSpillCoreRegs(); void UnSpillFPRegs(); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 057639c..fdc46e2 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1283,91 +1283,113 @@ void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int } } -void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - // All memory accesses below reference dalvik regs. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - - if (cu_->target64) { - if (rl_src1.is_const) { - std::swap(rl_src1, rl_src2); - } - // Are we multiplying by a constant? - if (rl_src2.is_const) { - int64_t val = mir_graph_->ConstantValueWide(rl_src2); - if (val == 0) { - RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegReg(kOpXor, rl_result.reg, rl_result.reg); - StoreValueWide(rl_dest, rl_result); - return; - } else if (val == 1) { - StoreValueWide(rl_dest, rl_src1); - return; - } else if (val == 2) { - GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); +void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + if (!cu_->target64) { + // Some x86 32b ops are fallback. + switch (opcode) { + case Instruction::NOT_LONG: + case Instruction::DIV_LONG: + case Instruction::DIV_LONG_2ADDR: + case Instruction::REM_LONG: + case Instruction::REM_LONG_2ADDR: + Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); return; - } else if (IsPowerOfTwo(val)) { - int shift_amount = LowestSetBit(val); - if (!BadOverlap(rl_src1, rl_dest)) { - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, - rl_src1, shift_amount); - StoreValueWide(rl_dest, rl_result); - return; - } - } - } - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - rl_src2 = LoadValueWide(rl_src2, kCoreReg); - RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && - rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { - NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && - rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { - NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); - } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && - rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { - NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); - } else { - OpRegCopy(rl_result.reg, rl_src1.reg); - NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); + + default: + // Everything else we can handle. + break; } - StoreValueWide(rl_dest, rl_result); - return; } - if (rl_src1.is_const) { - std::swap(rl_src1, rl_src2); + switch (opcode) { + case Instruction::NOT_LONG: + GenNotLong(rl_dest, rl_src2); + return; + + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); + return; + + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); + return; + + case Instruction::MUL_LONG: + case Instruction::MUL_LONG_2ADDR: + GenMulLong(opcode, rl_dest, rl_src1, rl_src2); + return; + + case Instruction::DIV_LONG: + case Instruction::DIV_LONG_2ADDR: + GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); + return; + + case Instruction::REM_LONG: + case Instruction::REM_LONG_2ADDR: + GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); + return; + + case Instruction::AND_LONG_2ADDR: + case Instruction::AND_LONG: + GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); + return; + + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); + return; + + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); + return; + + case Instruction::NEG_LONG: + GenNegLong(rl_dest, rl_src2); + return; + + default: + LOG(FATAL) << "Invalid long arith op"; + return; } - // Are we multiplying by a constant? - if (rl_src2.is_const) { - // Do special compare/branch against simple const operand - int64_t val = mir_graph_->ConstantValueWide(rl_src2); - if (val == 0) { - RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); +} + +bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) { + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + + if (val == 0) { + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + if (cu_->target64) { + OpRegReg(kOpXor, rl_result.reg, rl_result.reg); + } else { OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); + } + StoreValueWide(rl_dest, rl_result); + return true; + } else if (val == 1) { + StoreValueWide(rl_dest, rl_src1); + return true; + } else if (val == 2) { + GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); + return true; + } else if (IsPowerOfTwo(val)) { + int shift_amount = LowestSetBit(val); + if (!BadOverlap(rl_src1, rl_dest)) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, + shift_amount); StoreValueWide(rl_dest, rl_result); - return; - } else if (val == 1) { - StoreValueWide(rl_dest, rl_src1); - return; - } else if (val == 2) { - GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); - return; - } else if (IsPowerOfTwo(val)) { - int shift_amount = LowestSetBit(val); - if (!BadOverlap(rl_src1, rl_dest)) { - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, - rl_src1, shift_amount); - StoreValueWide(rl_dest, rl_result); - return; - } + return true; } + } - // Okay, just bite the bullet and do it. + // Okay, on 32b just bite the bullet and do it, still better than the general case. + if (!cu_->target64) { int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); FlushAllRegs(); @@ -1408,10 +1430,48 @@ void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; StoreValueWide(rl_dest, rl_result); + return true; + } + return false; +} + +void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + if (rl_src1.is_const) { + std::swap(rl_src1, rl_src2); + } + + if (rl_src2.is_const) { + if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) { + return; + } + } + + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + + if (cu_->target64) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && + rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { + NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && + rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { + NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); + } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && + rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { + NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); + } else { + OpRegCopy(rl_result.reg, rl_src1.reg); + NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); + } + StoreValueWide(rl_dest, rl_result); return; } - // Nope. Do it the hard way + // Not multiplying by a constant. Do it the hard way // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == mir_graph_->SRegToVReg(rl_src2.s_reg_low); @@ -1681,31 +1741,6 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, StoreFinalValueWide(rl_dest, rl_src1); } -void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); -} - -void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); -} - -void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); -} - -void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); -} - -void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); -} - void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { if (cu_->target64) { rl_src = LoadValueWide(rl_src, kCoreReg); @@ -2214,7 +2249,7 @@ void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest } else if (shift_amount == 1 && (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { // Need to handle this here to avoid calling StoreValueWide twice. - GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); + GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); return; } if (BadOverlap(rl_src, rl_dest)) { @@ -2246,7 +2281,7 @@ void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, if (rl_src2.is_const) { isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { - GenSubLong(opcode, rl_dest, rl_src1, rl_src2); + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); isConstSuccess = true; } break; diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index a72d94a..69f3e67 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -971,19 +971,21 @@ void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeT method_address_insns_.Insert(move); } -void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) { +void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx, + SpecialTargetRegister symbolic_reg) { /* * For x86, just generate a 32 bit move immediate instruction, that will be filled * in at 'link time'. For now, put a unique value based on target to ensure that * code deduplication works. */ - const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx); + const DexFile::TypeId& id = dex_file.GetTypeId(type_idx); uintptr_t ptr = reinterpret_cast<uintptr_t>(&id); // Generate the move instruction with the unique pointer and save index and type. LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, kNotWide).GetReg(), - static_cast<int>(ptr), type_idx); + static_cast<int>(ptr), type_idx, + WrapPointer(const_cast<DexFile*>(&dex_file))); AppendLIR(move); class_type_address_insns_.Insert(move); } @@ -1068,12 +1070,16 @@ void X86Mir2Lir::InstallLiteralPools() { for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) { LIR* p = class_type_address_insns_.Get(i); DCHECK_EQ(p->opcode, kX86Mov32RI); + + const DexFile* class_dex_file = + reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3])); uint32_t target_method_idx = p->operands[2]; // The offset to patch is the last 4 bytes of the instruction. int patch_offset = p->offset + p->flags.size - 4; cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx, - cu_->method_idx, target_method_idx, patch_offset); + cu_->method_idx, target_method_idx, class_dex_file, + patch_offset); } // And now the PC-relative calls to methods. @@ -1098,11 +1104,6 @@ void X86Mir2Lir::InstallLiteralPools() { } bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { - if (cu_->target64) { - // TODO: Implement ArrayCOpy intrinsic for x86_64 - return false; - } - RegLocation rl_src = info->args[0]; RegLocation rl_srcPos = info->args[1]; RegLocation rl_dst = info->args[2]; @@ -1115,31 +1116,32 @@ bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { return false; } ClobberCallerSave(); - LockCallTemps(); // Using fixed registers - LoadValueDirectFixed(rl_src , rs_rAX); - LoadValueDirectFixed(rl_dst , rs_rCX); - LIR* src_dst_same = OpCmpBranch(kCondEq, rs_rAX , rs_rCX, nullptr); - LIR* src_null_branch = OpCmpImmBranch(kCondEq, rs_rAX , 0, nullptr); - LIR* dst_null_branch = OpCmpImmBranch(kCondEq, rs_rCX , 0, nullptr); - LoadValueDirectFixed(rl_length , rs_rDX); - LIR* len_negative = OpCmpImmBranch(kCondLt, rs_rDX , 0, nullptr); - LIR* len_too_big = OpCmpImmBranch(kCondGt, rs_rDX , 128, nullptr); - LoadValueDirectFixed(rl_src , rs_rAX); - LoadWordDisp(rs_rAX , mirror::Array::LengthOffset().Int32Value(), rs_rAX); + LockCallTemps(); // Using fixed registers. + RegStorage tmp_reg = cu_->target64 ? rs_r11 : rs_rBX; + LoadValueDirectFixed(rl_src, rs_rAX); + LoadValueDirectFixed(rl_dst, rs_rCX); + LIR* src_dst_same = OpCmpBranch(kCondEq, rs_rAX, rs_rCX, nullptr); + LIR* src_null_branch = OpCmpImmBranch(kCondEq, rs_rAX, 0, nullptr); + LIR* dst_null_branch = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr); + LoadValueDirectFixed(rl_length, rs_rDX); + // If the length of the copy is > 128 characters (256 bytes) or negative then go slow path. + LIR* len_too_big = OpCmpImmBranch(kCondHi, rs_rDX, 128, nullptr); + LoadValueDirectFixed(rl_src, rs_rAX); + LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX); LIR* src_bad_len = nullptr; LIR* srcPos_negative = nullptr; if (!rl_srcPos.is_const) { - LoadValueDirectFixed(rl_srcPos , rs_rBX); - srcPos_negative = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr); - OpRegReg(kOpAdd, rs_rBX, rs_rDX); - src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + LoadValueDirectFixed(rl_srcPos, tmp_reg); + srcPos_negative = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr); + OpRegReg(kOpAdd, tmp_reg, rs_rDX); + src_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr); } else { - int pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg); + int32_t pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg); if (pos_val == 0) { - src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr); + src_bad_len = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr); } else { - OpRegRegImm(kOpAdd, rs_rBX, rs_rDX, pos_val); - src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + OpRegRegImm(kOpAdd, tmp_reg, rs_rDX, pos_val); + src_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr); } } LIR* dstPos_negative = nullptr; @@ -1147,49 +1149,49 @@ bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { LoadValueDirectFixed(rl_dst, rs_rAX); LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX); if (!rl_dstPos.is_const) { - LoadValueDirectFixed(rl_dstPos , rs_rBX); - dstPos_negative = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr); - OpRegRegReg(kOpAdd, rs_rBX, rs_rBX, rs_rDX); - dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + LoadValueDirectFixed(rl_dstPos, tmp_reg); + dstPos_negative = OpCmpImmBranch(kCondLt, tmp_reg, 0, nullptr); + OpRegRegReg(kOpAdd, tmp_reg, tmp_reg, rs_rDX); + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr); } else { - int pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg); + int32_t pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg); if (pos_val == 0) { - dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr); + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, rs_rDX, nullptr); } else { - OpRegRegImm(kOpAdd, rs_rBX, rs_rDX, pos_val); - dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + OpRegRegImm(kOpAdd, tmp_reg, rs_rDX, pos_val); + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX, tmp_reg, nullptr); } } - // everything is checked now - LoadValueDirectFixed(rl_src , rs_rAX); - LoadValueDirectFixed(rl_dst , rs_rBX); - LoadValueDirectFixed(rl_srcPos , rs_rCX); + // Everything is checked now. + LoadValueDirectFixed(rl_src, rs_rAX); + LoadValueDirectFixed(rl_dst, tmp_reg); + LoadValueDirectFixed(rl_srcPos, rs_rCX); NewLIR5(kX86Lea32RA, rs_rAX.GetReg(), rs_rAX.GetReg(), - rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value()); - // RAX now holds the address of the first src element to be copied + rs_rCX.GetReg(), 1, mirror::Array::DataOffset(2).Int32Value()); + // RAX now holds the address of the first src element to be copied. - LoadValueDirectFixed(rl_dstPos , rs_rCX); - NewLIR5(kX86Lea32RA, rs_rBX.GetReg(), rs_rBX.GetReg(), - rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value() ); - // RBX now holds the address of the first dst element to be copied + LoadValueDirectFixed(rl_dstPos, rs_rCX); + NewLIR5(kX86Lea32RA, tmp_reg.GetReg(), tmp_reg.GetReg(), + rs_rCX.GetReg(), 1, mirror::Array::DataOffset(2).Int32Value() ); + // RBX now holds the address of the first dst element to be copied. - // check if the number of elements to be copied is odd or even. If odd + // Check if the number of elements to be copied is odd or even. If odd // then copy the first element (so that the remaining number of elements // is even). - LoadValueDirectFixed(rl_length , rs_rCX); + LoadValueDirectFixed(rl_length, rs_rCX); OpRegImm(kOpAnd, rs_rCX, 1); LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr); OpRegImm(kOpSub, rs_rDX, 1); LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSignedHalf); - StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSignedHalf); + StoreBaseIndexedDisp(tmp_reg, rs_rDX, 1, 0, rs_rCX, kSignedHalf); - // since the remaining number of elements is even, we will copy by + // Since the remaining number of elements is even, we will copy by // two elements at a time. - LIR *beginLoop = NewLIR0(kPseudoTargetLabel); - LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_rDX , 0, nullptr); + LIR* beginLoop = NewLIR0(kPseudoTargetLabel); + LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_rDX, 0, nullptr); OpRegImm(kOpSub, rs_rDX, 2); LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSingle); - StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSingle); + StoreBaseIndexedDisp(tmp_reg, rs_rDX, 1, 0, rs_rCX, kSingle); OpUnconditionalBranch(beginLoop); LIR *check_failed = NewLIR0(kPseudoTargetLabel); LIR* launchpad_branch = OpUnconditionalBranch(nullptr); @@ -1197,7 +1199,6 @@ bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { jmp_to_ret->target = return_point; jmp_to_begin_loop->target = beginLoop; src_dst_same->target = check_failed; - len_negative->target = check_failed; len_too_big->target = check_failed; src_null_branch->target = check_failed; if (srcPos_negative != nullptr) @@ -1442,11 +1443,6 @@ static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) { } } - -std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64) { - return X86Mir2Lir::ReturnCommonCallFrameInformation(is_x86_64); -} - static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) { uint8_t buffer[12]; uint8_t *ptr = EncodeUnsignedLeb128(buffer, value); @@ -1463,84 +1459,6 @@ static void EncodeSignedLeb128(std::vector<uint8_t>& buf, int32_t value) { } } -std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation(bool is_x86_64) { - std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; - - // Length (will be filled in later in this routine). - PushWord(*cfi_info, 0); - - // CIE id: always 0. - PushWord(*cfi_info, 0); - - // Version: always 1. - cfi_info->push_back(0x01); - - // Augmentation: 'zR\0' - cfi_info->push_back(0x7a); - cfi_info->push_back(0x52); - cfi_info->push_back(0x0); - - // Code alignment: 1. - EncodeUnsignedLeb128(*cfi_info, 1); - - // Data alignment. - if (is_x86_64) { - EncodeSignedLeb128(*cfi_info, -8); - } else { - EncodeSignedLeb128(*cfi_info, -4); - } - - // Return address register. - if (is_x86_64) { - // R16(RIP) - cfi_info->push_back(0x10); - } else { - // R8(EIP) - cfi_info->push_back(0x08); - } - - // Augmentation length: 1. - cfi_info->push_back(1); - - // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). - cfi_info->push_back(0x03); - - // Initial instructions. - if (is_x86_64) { - // DW_CFA_def_cfa R7(RSP) 8. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x07); - cfi_info->push_back(0x08); - - // DW_CFA_offset R16(RIP) 1 (* -8). - cfi_info->push_back(0x90); - cfi_info->push_back(0x01); - } else { - // DW_CFA_def_cfa R4(ESP) 4. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x04); - cfi_info->push_back(0x04); - - // DW_CFA_offset R8(EIP) 1 (* -4). - cfi_info->push_back(0x88); - cfi_info->push_back(0x01); - } - - // Padding to a multiple of 4 - while ((cfi_info->size() & 3) != 0) { - // DW_CFA_nop is encoded as 0. - cfi_info->push_back(0); - } - - // Set the length of the CIE inside the generated bytes. - uint32_t length = cfi_info->size() - 4; - (*cfi_info)[0] = length; - (*cfi_info)[1] = length >> 8; - (*cfi_info)[2] = length >> 16; - (*cfi_info)[3] = length >> 24; - return cfi_info; -} - static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) { if (is_x86_64) { switch (art_reg_id) { diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index a77d79e..a48613f 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -565,6 +565,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { bool is_fp = r_dest.IsFloat(); // TODO: clean this up once we fully recognize 64-bit storage containers. if (is_fp) { + DCHECK(r_dest.IsDouble()); if (value == 0) { return NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); } else if (base_of_code_ != nullptr) { @@ -594,16 +595,23 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { Clobber(rl_method.reg); store_method_addr_used_ = true; } else { - if (val_lo == 0) { - res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); + if (r_dest.IsPair()) { + if (val_lo == 0) { + res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); + } else { + res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo); + } + if (val_hi != 0) { + RegStorage r_dest_hi = AllocTempDouble(); + LoadConstantNoClobber(r_dest_hi, val_hi); + NewLIR2(kX86PunpckldqRR, low_reg_val, r_dest_hi.GetReg()); + FreeTemp(r_dest_hi); + } } else { - res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo); - } - if (val_hi != 0) { - RegStorage r_dest_hi = AllocTempDouble(); - LoadConstantNoClobber(r_dest_hi, val_hi); - NewLIR2(kX86PunpckldqRR, low_reg_val, r_dest_hi.GetReg()); - FreeTemp(r_dest_hi); + RegStorage r_temp = AllocTypedTempWide(false, kCoreReg); + res = LoadConstantWide(r_temp, value); + OpRegCopyWide(r_dest, r_temp); + FreeTemp(r_temp); } } } else { @@ -1008,8 +1016,8 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) { } void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) { - // If this is a double literal, we will want it in the literal pool. - if (use.is_const) { + // If this is a double literal, we will want it in the literal pool on 32b platforms. + if (use.is_const && !cu_->target64) { store_method_addr_ = true; } } @@ -1043,12 +1051,18 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) { } void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir) { + // For now this is only actual for x86-32. + if (cu_->target64) { + return; + } + uint32_t index = mir->dalvikInsn.vB; if (!(mir->optimization_flags & MIR_INLINED)) { DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); + DexFileMethodInliner* method_inliner = + cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); InlineMethod method; - if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file) - ->IsIntrinsic(index, &method)) { + if (method_inliner->IsIntrinsic(index, &method)) { switch (method.opcode) { case kIntrinsicAbsDouble: case kIntrinsicMinMaxDouble: diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index f85bc65..f40120e 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -353,7 +353,6 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, compiler_enable_auto_elf_loading_(NULL), compiler_get_method_code_addr_(NULL), support_boot_image_fixup_(instruction_set != kMips), - cfi_info_(nullptr), dedupe_code_("dedupe code"), dedupe_mapping_table_("dedupe mapping table"), dedupe_vmap_table_("dedupe vmap table"), @@ -376,11 +375,6 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, CHECK(image_classes_.get() == nullptr); } - // Are we generating CFI information? - if (compiler_options->GetGenerateGDBInformation()) { - cfi_info_.reset(compiler_->GetCallFrameInformationInitialization(*this)); - } - // Read the profile file if one is provided. if (!profile_file.empty()) { profile_present_ = profile_file_.LoadFile(profile_file); @@ -597,7 +591,7 @@ void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFi for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != nullptr); - ResolveDexFile(class_loader, *dex_file, thread_pool, timings); + ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } } @@ -933,13 +927,13 @@ bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_i } *out_is_finalizable = resolved_class->IsFinalizable(); const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot(); + const bool support_boot_image_fixup = GetSupportBootImageFixup(); if (compiling_boot) { // boot -> boot class pointers. // True if the class is in the image at boot compiling time. const bool is_image_class = IsImage() && IsImageClass( dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_)); // True if pc relative load works. - const bool support_boot_image_fixup = GetSupportBootImageFixup(); if (is_image_class && support_boot_image_fixup) { *is_type_initialized = resolved_class->IsInitialized(); *use_direct_type_ptr = false; @@ -952,7 +946,7 @@ bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_i // True if the class is in the image at app compiling time. const bool class_in_image = Runtime::Current()->GetHeap()->FindSpaceFromObject(resolved_class, false)->IsImageSpace(); - if (class_in_image) { + if (class_in_image && support_boot_image_fixup) { // boot -> app class pointers. *is_type_initialized = resolved_class->IsInitialized(); // TODO This is somewhat hacky. We should refactor all of this invoke codepath. @@ -969,6 +963,43 @@ bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_i } } +bool CompilerDriver::CanEmbedReferenceTypeInCode(ClassReference* ref, + bool* use_direct_ptr, + uintptr_t* direct_type_ptr) { + CHECK(ref != nullptr); + CHECK(use_direct_ptr != nullptr); + CHECK(direct_type_ptr != nullptr); + + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference(); + bool is_initialized; + bool unused_finalizable; + // Make sure we have a finished Reference class object before attempting to use it. + if (!CanEmbedTypeInCode(*reference_class->GetDexCache()->GetDexFile(), + reference_class->GetDexTypeIndex(), &is_initialized, + use_direct_ptr, direct_type_ptr, &unused_finalizable) || + !is_initialized) { + return false; + } + ref->first = &reference_class->GetDexFile(); + ref->second = reference_class->GetDexClassDefIndex(); + return true; +} + +uint32_t CompilerDriver::GetReferenceSlowFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetSlowPathFlagOffset().Uint32Value(); +} + +uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); +} + void CompilerDriver::ProcessedInstanceField(bool resolved) { if (!resolved) { stats_->UnresolvedInstanceField(); @@ -1340,12 +1371,14 @@ void CompilerDriver::AddClassPatch(const DexFile* dex_file, uint16_t referrer_class_def_idx, uint32_t referrer_method_idx, uint32_t target_type_idx, + const DexFile* target_type_dex_file, size_t literal_offset) { MutexLock mu(Thread::Current(), compiled_methods_lock_); classes_to_patch_.push_back(new TypePatchInformation(dex_file, referrer_class_def_idx, referrer_method_idx, target_type_idx, + target_type_dex_file, literal_offset)); } @@ -1357,12 +1390,14 @@ class ParallelCompilationManager { jobject class_loader, CompilerDriver* compiler, const DexFile* dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool) : index_(0), class_linker_(class_linker), class_loader_(class_loader), compiler_(compiler), dex_file_(dex_file), + dex_files_(dex_files), thread_pool_(thread_pool) {} ClassLinker* GetClassLinker() const { @@ -1384,6 +1419,10 @@ class ParallelCompilationManager { return dex_file_; } + const std::vector<const DexFile*>& GetDexFiles() const { + return dex_files_; + } + void ForAll(size_t begin, size_t end, Callback callback, size_t work_units) { Thread* self = Thread::Current(); self->AssertNoPendingException(); @@ -1441,11 +1480,24 @@ class ParallelCompilationManager { const jobject class_loader_; CompilerDriver* const compiler_; const DexFile* const dex_file_; + const std::vector<const DexFile*>& dex_files_; ThreadPool* const thread_pool_; DISALLOW_COPY_AND_ASSIGN(ParallelCompilationManager); }; +static bool SkipClassCheckClassPath(const char* descriptor, const DexFile& dex_file, + const std::vector<const DexFile*>& classpath) { + DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, classpath); + CHECK(pair.second != NULL); + if (pair.first != &dex_file) { + LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation() + << " previously found in " << pair.first->GetLocation(); + return true; + } + return false; +} + // Return true if the class should be skipped during compilation. // // The first case where we skip is for redundant class definitions in @@ -1454,20 +1506,23 @@ class ParallelCompilationManager { // The second case where we skip is when an app bundles classes found // in the boot classpath. Since at runtime we will select the class from // the boot classpath, we ignore the one from the app. +// +// The third case is if the app itself has the class defined in multiple dex files. Then we skip +// it if it is not the first occurrence. static bool SkipClass(ClassLinker* class_linker, jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, const DexFile::ClassDef& class_def) { const char* descriptor = dex_file.GetClassDescriptor(class_def); + if (class_loader == NULL) { - DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, class_linker->GetBootClassPath()); - CHECK(pair.second != NULL); - if (pair.first != &dex_file) { - LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation() - << " previously found in " << pair.first->GetLocation(); - return true; - } - return false; + return SkipClassCheckClassPath(descriptor, dex_file, class_linker->GetBootClassPath()); + } + + if (class_linker->IsInBootClassPath(descriptor)) { + return true; } - return class_linker->IsInBootClassPath(descriptor); + + return SkipClassCheckClassPath(descriptor, dex_file, dex_files); } // A fast version of SkipClass above if the class pointer is available @@ -1525,7 +1580,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag // definitions, since many of them many never be referenced by // generated code. const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) { + if (!SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) { ScopedObjectAccess soa(self); StackHandleScope<2> hs(soa.Self()); Handle<mirror::ClassLoader> class_loader( @@ -1632,13 +1687,15 @@ static void ResolveType(const ParallelCompilationManager* manager, size_t type_i } void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); // TODO: we could resolve strings here, although the string table is largely filled with class // and method names. - ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool); + ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, + thread_pool); if (IsImage()) { // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. @@ -1655,7 +1712,7 @@ void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFil for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != NULL); - VerifyDexFile(class_loader, *dex_file, thread_pool, timings); + VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } } @@ -1707,10 +1764,12 @@ static void VerifyClass(const ParallelCompilationManager* manager, size_t class_ } void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { TimingLogger::ScopedTiming t("Verify Dex File", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool); + ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, + thread_pool); context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_); } @@ -1800,10 +1859,12 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl } void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { TimingLogger::ScopedTiming t("InitializeNoClinit", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, thread_pool); + ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, + thread_pool); size_t thread_count; if (IsImage()) { // TODO: remove this when transactional mode supports multithreading. @@ -1824,7 +1885,7 @@ void CompilerDriver::InitializeClasses(jobject class_loader, for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != NULL); - InitializeClasses(class_loader, *dex_file, thread_pool, timings); + InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings); } } @@ -1833,7 +1894,7 @@ void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFi for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != NULL); - CompileDexFile(class_loader, *dex_file, thread_pool, timings); + CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); } } @@ -1843,7 +1904,7 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz const DexFile& dex_file = *manager->GetDexFile(); const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); ClassLinker* class_linker = manager->GetClassLinker(); - if (SkipClass(class_linker, jclass_loader, dex_file, class_def)) { + if (SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) { return; } ClassReference ref(&dex_file, class_def_index); @@ -1912,10 +1973,11 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz } void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { TimingLogger::ScopedTiming t("Compile Dex File", timings); ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, - &dex_file, thread_pool); + &dex_file, dex_files, thread_pool); context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 6dae398..2a5cdb9 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -213,6 +213,12 @@ class CompilerDriver { bool* is_type_initialized, bool* use_direct_type_ptr, uintptr_t* direct_type_ptr, bool* out_is_finalizable); + // Query methods for the java.lang.ref.Reference class. + bool CanEmbedReferenceTypeInCode(ClassReference* ref, + bool* use_direct_type_ptr, uintptr_t* direct_type_ptr); + uint32_t GetReferenceSlowFlagOffset() const; + uint32_t GetReferenceDisableFlagOffset() const; + // Get the DexCache for the mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -356,6 +362,7 @@ class CompilerDriver { uint16_t referrer_class_def_idx, uint32_t referrer_method_idx, uint32_t target_method_idx, + const DexFile* target_dex_file, size_t literal_offset) LOCKS_EXCLUDED(compiled_methods_lock_); @@ -402,10 +409,6 @@ class CompilerDriver { return dump_passes_; } - bool DidIncludeDebugSymbols() const { - return compiler_options_->GetIncludeDebugSymbols(); - } - CumulativeLogger* GetTimingsLogger() const { return timings_logger_; } @@ -549,6 +552,10 @@ class CompilerDriver { class TypePatchInformation : public PatchInformation { public: + const DexFile& GetTargetTypeDexFile() const { + return *target_type_dex_file_; + } + uint32_t GetTargetTypeIdx() const { return target_type_idx_; } @@ -565,13 +572,15 @@ class CompilerDriver { uint16_t referrer_class_def_idx, uint32_t referrer_method_idx, uint32_t target_type_idx, + const DexFile* target_type_dex_file, size_t literal_offset) : PatchInformation(dex_file, referrer_class_def_idx, referrer_method_idx, literal_offset), - target_type_idx_(target_type_idx) { + target_type_idx_(target_type_idx), target_type_dex_file_(target_type_dex_file) { } const uint32_t target_type_idx_; + const DexFile* target_type_dex_file_; friend class CompilerDriver; DISALLOW_COPY_AND_ASSIGN(TypePatchInformation); @@ -599,14 +608,6 @@ class CompilerDriver { std::vector<uint8_t>* DeduplicateGCMap(const std::vector<uint8_t>& code); std::vector<uint8_t>* DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info); - /* - * @brief return the pointer to the Call Frame Information. - * @return pointer to call frame information for this compilation. - */ - std::vector<uint8_t>* GetCallFrameInformation() const { - return cfi_info_.get(); - } - ProfileFile profile_file_; bool profile_present_; @@ -658,12 +659,14 @@ class CompilerDriver { ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); void ResolveDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings); void VerifyDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); @@ -671,6 +674,7 @@ class CompilerDriver { ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); void InitializeClasses(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_); @@ -681,6 +685,7 @@ class CompilerDriver { void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings); void CompileDexFile(jobject class_loader, const DexFile& dex_file, + const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) LOCKS_EXCLUDED(Locks::mutator_lock_); void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags, @@ -766,9 +771,6 @@ class CompilerDriver { bool support_boot_image_fixup_; - // Call Frame Information, which might be generated to help stack tracebacks. - std::unique_ptr<std::vector<uint8_t>> cfi_info_; - // DeDuplication data structures, these own the corresponding byte arrays. class DedupeHashFunc { public: diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc index 6112fbb..9ae755d 100644 --- a/compiler/elf_patcher.cc +++ b/compiler/elf_patcher.cc @@ -99,11 +99,13 @@ mirror::ArtMethod* ElfPatcher::GetTargetMethod(const CompilerDriver::CallPatchIn mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); StackHandleScope<2> hs(Thread::Current()); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile()))); - mirror::Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(), + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache( + patch->GetTargetTypeDexFile()))); + mirror::Class* klass = class_linker->ResolveType(patch->GetTargetTypeDexFile(), + patch->GetTargetTypeIdx(), dex_cache, NullHandle<mirror::ClassLoader>()); CHECK(klass != NULL) - << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx(); + << patch->GetTargetTypeDexFile().GetLocation() << " " << patch->GetTargetTypeIdx(); CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass) << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " " << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " " @@ -120,6 +122,7 @@ void ElfPatcher::AddPatch(uintptr_t p) { uint32_t* ElfPatcher::GetPatchLocation(uintptr_t patch_ptr) { CHECK_GE(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->Begin())); + CHECK_LE(patch_ptr, reinterpret_cast<uintptr_t>(oat_file_->End())); uintptr_t off = patch_ptr - reinterpret_cast<uintptr_t>(oat_file_->Begin()); uintptr_t ret = reinterpret_cast<uintptr_t>(oat_header_) + off; @@ -144,20 +147,20 @@ void ElfPatcher::SetPatchLocation(const CompilerDriver::PatchInformation* patch, cpatch->GetTargetDexFile()->GetMethodId(cpatch->GetTargetMethodIdx()); uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF; uint32_t actual = *patch_location; - CHECK(actual == expected || actual == value) << std::hex - << "actual=" << actual - << "expected=" << expected - << "value=" << value; + CHECK(actual == expected || actual == value) << "Patching call failed: " << std::hex + << " actual=" << actual + << " expected=" << expected + << " value=" << value; } if (patch->IsType()) { const CompilerDriver::TypePatchInformation* tpatch = patch->AsType(); - const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx()); + const DexFile::TypeId& id = tpatch->GetTargetTypeDexFile().GetTypeId(tpatch->GetTargetTypeIdx()); uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF; uint32_t actual = *patch_location; - CHECK(actual == expected || actual == value) << std::hex - << "actual=" << actual - << "expected=" << expected - << "value=" << value; + CHECK(actual == expected || actual == value) << "Patching type failed: " << std::hex + << " actual=" << actual + << " expected=" << expected + << " value=" << value; } } *patch_location = value; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 1fde12e..71f02d3 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -24,6 +24,7 @@ #include "elf_utils.h" #include "file_output_stream.h" #include "globals.h" +#include "leb128.h" #include "oat.h" #include "oat_writer.h" #include "utils.h" @@ -38,6 +39,25 @@ static uint8_t MakeStInfo(uint8_t binding, uint8_t type) { return ((binding) << 4) + ((type) & 0xf); } +static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) { + (*buf)[offset+0] = data; + (*buf)[offset+1] = data >> 8; + (*buf)[offset+2] = data >> 16; + (*buf)[offset+3] = data >> 24; +} + +static void PushWord(std::vector<uint8_t>* buf, int data) { + buf->push_back(data & 0xff); + buf->push_back((data >> 8) & 0xff); + buf->push_back((data >> 16) & 0xff); + buf->push_back((data >> 24) & 0xff); +} + +static void PushHalf(std::vector<uint8_t>* buf, int data) { + buf->push_back(data & 0xff); + buf->push_back((data >> 8) & 0xff); +} + bool ElfWriterQuick::ElfBuilder::Write() { // The basic layout of the elf file. Order may be different in final output. // +-------------------------+ @@ -822,37 +842,131 @@ void ElfWriterQuick::ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug) } } +static void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) { + size_t encoded_size = UnsignedLeb128Size(data); + size_t cur_index = dst->size(); + dst->resize(dst->size() + encoded_size); + uint8_t* write_pos = &((*dst)[cur_index]); + uint8_t* write_pos_after = EncodeUnsignedLeb128(write_pos, data); + DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size); +} + +static void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) { + size_t encoded_size = SignedLeb128Size(data); + size_t cur_index = dst->size(); + dst->resize(dst->size() + encoded_size); + uint8_t* write_pos = &((*dst)[cur_index]); + uint8_t* write_pos_after = EncodeSignedLeb128(write_pos, data); + DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size); +} + +std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) { + std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; + + // Length (will be filled in later in this routine). + PushWord(cfi_info, 0); + + // CIE id: always 0. + PushWord(cfi_info, 0); + + // Version: always 1. + cfi_info->push_back(0x01); + + // Augmentation: 'zR\0' + cfi_info->push_back(0x7a); + cfi_info->push_back(0x52); + cfi_info->push_back(0x0); + + // Code alignment: 1. + EncodeUnsignedLeb128(1, cfi_info); + + // Data alignment. + if (is_x86_64) { + EncodeSignedLeb128(-8, cfi_info); + } else { + EncodeSignedLeb128(-4, cfi_info); + } + + // Return address register. + if (is_x86_64) { + // R16(RIP) + cfi_info->push_back(0x10); + } else { + // R8(EIP) + cfi_info->push_back(0x08); + } + + // Augmentation length: 1. + cfi_info->push_back(1); + + // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). + cfi_info->push_back(0x03); + + // Initial instructions. + if (is_x86_64) { + // DW_CFA_def_cfa R7(RSP) 8. + cfi_info->push_back(0x0c); + cfi_info->push_back(0x07); + cfi_info->push_back(0x08); + + // DW_CFA_offset R16(RIP) 1 (* -8). + cfi_info->push_back(0x90); + cfi_info->push_back(0x01); + } else { + // DW_CFA_def_cfa R4(ESP) 4. + cfi_info->push_back(0x0c); + cfi_info->push_back(0x04); + cfi_info->push_back(0x04); + + // DW_CFA_offset R8(EIP) 1 (* -4). + cfi_info->push_back(0x88); + cfi_info->push_back(0x01); + } + + // Padding to a multiple of 4 + while ((cfi_info->size() & 3) != 0) { + // DW_CFA_nop is encoded as 0. + cfi_info->push_back(0); + } + + // Set the length of the CIE inside the generated bytes. + uint32_t length = cfi_info->size() - 4; + (*cfi_info)[0] = length; + (*cfi_info)[1] = length >> 8; + (*cfi_info)[2] = length >> 16; + (*cfi_info)[3] = length >> 24; + return cfi_info; +} + +std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) { + switch (isa) { + case kX86: + return ConstructCIEFrameX86(false); + case kX86_64: + return ConstructCIEFrameX86(true); + + default: + // Not implemented. + return nullptr; + } +} + bool ElfWriterQuick::Write(OatWriter* oat_writer, const std::vector<const DexFile*>& dex_files_unused, const std::string& android_root_unused, bool is_host_unused) { - const bool debug = false; - const bool add_symbols = oat_writer->DidAddSymbols(); + constexpr bool debug = false; const OatHeader& oat_header = oat_writer->GetOatHeader(); Elf32_Word oat_data_size = oat_header.GetExecutableOffset(); uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size; ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0, - oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug); + oat_data_size, oat_data_size, oat_exec_size, + compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(), + debug); - if (add_symbols) { - AddDebugSymbols(builder, oat_writer, debug); - } - - bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr; - if (generateDebugInformation) { - ElfRawSectionBuilder debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); - eh_frame.SetBuffer(*compiler_driver_->GetCallFrameInformation()); - - FillInCFIInformation(oat_writer, debug_info.GetBuffer(), - debug_abbrev.GetBuffer(), debug_str.GetBuffer()); - builder.RegisterRawSection(debug_info); - builder.RegisterRawSection(debug_abbrev); - builder.RegisterRawSection(eh_frame); - builder.RegisterRawSection(debug_str); + if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { + WriteDebugSymbols(builder, oat_writer); } if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) { @@ -865,32 +979,62 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer, return builder.Write(); } -void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) { +void ElfWriterQuick::WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer) { + std::unique_ptr<std::vector<uint8_t>> cfi_info( + ConstructCIEFrame(compiler_driver_->GetInstructionSet())); + + // Iterate over the compiled methods. const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); ElfSymtabBuilder* symtab = &builder.symtab_builder_; for (auto it = method_info.begin(); it != method_info.end(); ++it) { symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); - } -} -static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) { - (*buf)[offset+0] = data; - (*buf)[offset+1] = data >> 8; - (*buf)[offset+2] = data >> 16; - (*buf)[offset+3] = data >> 24; -} + // Include CFI for compiled method, if possible. + if (cfi_info.get() != nullptr) { + DCHECK(it->compiled_method_ != nullptr); + + // Copy in the FDE, if present + const std::vector<uint8_t>* fde = it->compiled_method_->GetCFIInfo(); + if (fde != nullptr) { + // Copy the information into cfi_info and then fix the address in the new copy. + int cur_offset = cfi_info->size(); + cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); + + // Set the 'CIE_pointer' field to cur_offset+4. + uint32_t CIE_pointer = cur_offset + 4; + uint32_t offset_to_update = cur_offset + sizeof(uint32_t); + (*cfi_info)[offset_to_update+0] = CIE_pointer; + (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; + (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; + (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; + + // Set the 'initial_location' field to address the start of the method. + offset_to_update = cur_offset + 2*sizeof(uint32_t); + const uint32_t quick_code_start = it->low_pc_; + (*cfi_info)[offset_to_update+0] = quick_code_start; + (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; + (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; + (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; + } + } + } -static void PushWord(std::vector<uint8_t>*buf, int data) { - buf->push_back(data & 0xff); - buf->push_back((data >> 8) & 0xff); - buf->push_back((data >> 16) & 0xff); - buf->push_back((data >> 24) & 0xff); -} + if (cfi_info.get() != nullptr) { + // Now lay down the Elf sections. + ElfRawSectionBuilder debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + ElfRawSectionBuilder debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + ElfRawSectionBuilder eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); + eh_frame.SetBuffer(std::move(*cfi_info.get())); -static void PushHalf(std::vector<uint8_t>*buf, int data) { - buf->push_back(data & 0xff); - buf->push_back((data >> 8) & 0xff); + FillInCFIInformation(oat_writer, debug_info.GetBuffer(), debug_abbrev.GetBuffer(), + debug_str.GetBuffer()); + builder.RegisterRawSection(debug_info); + builder.RegisterRawSection(debug_abbrev); + builder.RegisterRawSection(eh_frame); + builder.RegisterRawSection(debug_str); + } } void ElfWriterQuick::FillInCFIInformation(OatWriter* oat_writer, diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h index a0d36df..8cfe550 100644 --- a/compiler/elf_writer_quick.h +++ b/compiler/elf_writer_quick.h @@ -48,9 +48,7 @@ class ElfWriterQuick FINAL : public ElfWriter { ~ElfWriterQuick() {} class ElfBuilder; - void AddDebugSymbols(ElfBuilder& builder, - OatWriter* oat_writer, - bool debug); + void WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer); void ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug); class ElfSectionBuilder { @@ -126,7 +124,7 @@ class ElfWriterQuick FINAL : public ElfWriter { : ElfSectionBuilder(sec_name, type, flags, link, info, align, entsize) {} ~ElfRawSectionBuilder() {} std::vector<uint8_t>* GetBuffer() { return &buf_; } - void SetBuffer(std::vector<uint8_t> buf) { buf_ = buf; } + void SetBuffer(std::vector<uint8_t>&& buf) { buf_ = buf; } protected: std::vector<uint8_t> buf_; diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 3005e56..6b23345 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -141,6 +141,8 @@ TEST_F(ImageTest, WriteRead) { std::string image("-Ximage:"); image.append(image_location.GetFilename()); options.push_back(std::make_pair(image.c_str(), reinterpret_cast<void*>(NULL))); + // By default the compiler this creates will not include patch information. + options.push_back(std::make_pair("-Xnorelocate", nullptr)); if (!Runtime::Create(options, false)) { LOG(FATAL) << "Failed to create runtime"; diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 9da59ab..1ba5d32 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -357,7 +357,6 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { uint32_t thumb_offset = compiled_method->CodeDelta(); quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; - bool force_debug_capture = false; bool deduped = false; // Deduplicate code arrays. @@ -400,47 +399,22 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { offset_ += code_size; } - uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset(); - std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation(); - if (cfi_info != nullptr) { - // Copy in the FDE, if present - const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo(); - if (fde != nullptr) { - // Copy the information into cfi_info and then fix the address in the new copy. - int cur_offset = cfi_info->size(); - cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); - - // Set the 'CIE_pointer' field to cur_offset+4. - uint32_t CIE_pointer = cur_offset + 4; - uint32_t offset_to_update = cur_offset + sizeof(uint32_t); - (*cfi_info)[offset_to_update+0] = CIE_pointer; - (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; - (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; - (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; - - // Set the 'initial_location' field to address the start of the method. - offset_to_update = cur_offset + 2*sizeof(uint32_t); - (*cfi_info)[offset_to_update+0] = quick_code_start; - (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; - (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; - (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; - force_debug_capture = true; - } - } + if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { + // Record debug information for this function if we are doing that. - - if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) { - // Record debug information for this function if we are doing that or - // we have CFI and so need it. std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true); if (deduped) { - // TODO We should place the DEDUPED tag on the first instance of a - // deduplicated symbol so that it will show up in a debuggerd crash - // report. + // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol + // so that it will show up in a debuggerd crash report. name += " [ DEDUPED ]"; } - writer_->method_info_.push_back(DebugInfo(name, quick_code_start, - quick_code_start + code_size)); + + const uint32_t quick_code_start = quick_code_offset - + writer_->oat_header_->GetExecutableOffset(); + writer_->method_info_.push_back(DebugInfo(name, + quick_code_start, + quick_code_start + code_size, + compiled_method)); } } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 945048e..ef5fd6b 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -30,6 +30,7 @@ namespace art { class BitVector; +class CompiledMethod; class OutputStream; // OatHeader variable length with count of D OatDexFiles @@ -97,22 +98,21 @@ class OatWriter { ~OatWriter(); struct DebugInfo { - DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc) - : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc) { + DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc, + CompiledMethod* compiled_method) + : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc), + compiled_method_(compiled_method) { } - std::string method_name_; + std::string method_name_; // Note: this name is a pretty-printed name. uint32_t low_pc_; uint32_t high_pc_; + CompiledMethod* compiled_method_; }; const std::vector<DebugInfo>& GetCFIMethodInfo() const { return method_info_; } - bool DidAddSymbols() const { - return compiler_driver_->DidIncludeDebugSymbols(); - } - private: // The DataAccess classes are helper classes that provide access to members related to // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index f4bcb1d..7bfbb6f 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -24,6 +24,7 @@ #include "base/mutex.h" #include "mem_map.h" #include "utils.h" +#include "utils/debug_stack.h" namespace art { @@ -34,6 +35,9 @@ class ArenaStack; class ScopedArenaAllocator; class MemStats; +template <typename T> +class ArenaAllocatorAdapter; + static constexpr bool kArenaAllocatorCountAllocations = false; // Type of allocation for memory tuning. @@ -147,11 +151,14 @@ class ArenaPool { DISALLOW_COPY_AND_ASSIGN(ArenaPool); }; -class ArenaAllocator : private ArenaAllocatorStats { +class ArenaAllocator : private DebugStackRefCounter, private ArenaAllocatorStats { public: explicit ArenaAllocator(ArenaPool* pool); ~ArenaAllocator(); + // Get adapter for use in STL containers. See arena_containers.h . + ArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL); + // Returns zeroed memory. void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { if (UNLIKELY(running_on_valgrind_)) { @@ -190,6 +197,9 @@ class ArenaAllocator : private ArenaAllocatorStats { Arena* arena_head_; bool running_on_valgrind_; + template <typename U> + friend class ArenaAllocatorAdapter; + DISALLOW_COPY_AND_ASSIGN(ArenaAllocator); }; // ArenaAllocator diff --git a/compiler/utils/arena_containers.h b/compiler/utils/arena_containers.h new file mode 100644 index 0000000..c48b0c8 --- /dev/null +++ b/compiler/utils/arena_containers.h @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ +#define ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ + +#include <deque> +#include <queue> +#include <set> +#include <vector> + +#include "utils/arena_allocator.h" +#include "safe_map.h" + +namespace art { + +// Adapter for use of ArenaAllocator in STL containers. +// Use ArenaAllocator::Adapter() to create an adapter to pass to container constructors. +// For example, +// struct Foo { +// explicit Foo(ArenaAllocator* allocator) +// : foo_vector(allocator->Adapter(kArenaAllocMisc)), +// foo_map(std::less<int>(), allocator->Adapter()) { +// } +// ArenaVector<int> foo_vector; +// ArenaSafeMap<int, int> foo_map; +// }; +template <typename T> +class ArenaAllocatorAdapter; + +template <typename T> +using ArenaDeque = std::deque<T, ArenaAllocatorAdapter<T>>; + +template <typename T> +using ArenaQueue = std::queue<T, ArenaDeque<T>>; + +template <typename T> +using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>; + +template <typename T, typename Comparator = std::less<T>> +using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>; + +template <typename K, typename V, typename Comparator = std::less<K>> +using ArenaSafeMap = + SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>; + +// Implementation details below. + +template <bool kCount> +class ArenaAllocatorAdapterKindImpl; + +template <> +class ArenaAllocatorAdapterKindImpl<false> { + public: + // Not tracking allocations, ignore the supplied kind and arbitrarily provide kArenaAllocSTL. + explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) { } + ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default; + ArenaAllocKind Kind() { return kArenaAllocSTL; } +}; + +template <bool kCount> +class ArenaAllocatorAdapterKindImpl { + public: + explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { } + ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default; + ArenaAllocKind Kind() { return kind_; } + + private: + ArenaAllocKind kind_; +}; + +typedef ArenaAllocatorAdapterKindImpl<kArenaAllocatorCountAllocations> ArenaAllocatorAdapterKind; + +template <> +class ArenaAllocatorAdapter<void> + : private DebugStackReference, private ArenaAllocatorAdapterKind { + public: + typedef void value_type; + typedef void* pointer; + typedef const void* const_pointer; + + template <typename U> + struct rebind { + typedef ArenaAllocatorAdapter<U> other; + }; + + explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, + ArenaAllocKind kind = kArenaAllocSTL) + : DebugStackReference(arena_allocator), + ArenaAllocatorAdapterKind(kind), + arena_allocator_(arena_allocator) { + } + template <typename U> + ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + ArenaAllocatorAdapterKind(other), + arena_allocator_(other.arena_allocator_) { + } + ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default; + ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default; + ~ArenaAllocatorAdapter() = default; + + private: + ArenaAllocator* arena_allocator_; + + template <typename U> + friend class ArenaAllocatorAdapter; +}; + +template <typename T> +class ArenaAllocatorAdapter : private DebugStackReference, private ArenaAllocatorAdapterKind { + public: + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef const T* const_pointer; + typedef const T& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + template <typename U> + struct rebind { + typedef ArenaAllocatorAdapter<U> other; + }; + + explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, ArenaAllocKind kind) + : DebugStackReference(arena_allocator), + ArenaAllocatorAdapterKind(kind), + arena_allocator_(arena_allocator) { + } + template <typename U> + ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + ArenaAllocatorAdapterKind(other), + arena_allocator_(other.arena_allocator_) { + } + ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default; + ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default; + ~ArenaAllocatorAdapter() = default; + + size_type max_size() const { + return static_cast<size_type>(-1) / sizeof(T); + } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, ArenaAllocatorAdapter<void>::pointer hint = nullptr) { + DCHECK_LE(n, max_size()); + return reinterpret_cast<T*>(arena_allocator_->Alloc(n * sizeof(T), + ArenaAllocatorAdapterKind::Kind())); + } + void deallocate(pointer p, size_type n) { + } + + void construct(pointer p, const_reference val) { + new (static_cast<void*>(p)) value_type(val); + } + void destroy(pointer p) { + p->~value_type(); + } + + private: + ArenaAllocator* arena_allocator_; + + template <typename U> + friend class ArenaAllocatorAdapter; + + template <typename U> + friend bool operator==(const ArenaAllocatorAdapter<U>& lhs, + const ArenaAllocatorAdapter<U>& rhs); +}; + +template <typename T> +inline bool operator==(const ArenaAllocatorAdapter<T>& lhs, + const ArenaAllocatorAdapter<T>& rhs) { + return lhs.arena_allocator_ == rhs.arena_allocator_; +} + +template <typename T> +inline bool operator!=(const ArenaAllocatorAdapter<T>& lhs, + const ArenaAllocatorAdapter<T>& rhs) { + return !(lhs == rhs); +} + +inline ArenaAllocatorAdapter<void> ArenaAllocator::Adapter(ArenaAllocKind kind) { + return ArenaAllocatorAdapter<void>(this, kind); +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h index 9f33f2d..62ea330 100644 --- a/compiler/utils/scoped_arena_allocator.h +++ b/compiler/utils/scoped_arena_allocator.h @@ -120,8 +120,8 @@ class ScopedArenaAllocator return arena_stack_->Alloc(bytes, kind); } - // ScopedArenaAllocatorAdapter is incomplete here, we need to define this later. - ScopedArenaAllocatorAdapter<void> Adapter(); + // Get adapter for use in STL containers. See scoped_arena_containers.h . + ScopedArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL); // Allow a delete-expression to destroy but not deallocate allocators created by Create(). static void operator delete(void* ptr) { UNUSED(ptr); } @@ -138,125 +138,6 @@ class ScopedArenaAllocator DISALLOW_COPY_AND_ASSIGN(ScopedArenaAllocator); }; -template <> -class ScopedArenaAllocatorAdapter<void> - : private DebugStackReference, private DebugStackIndirectTopRef { - public: - typedef void value_type; - typedef void* pointer; - typedef const void* const_pointer; - - template <typename U> - struct rebind { - typedef ScopedArenaAllocatorAdapter<U> other; - }; - - explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator) - : DebugStackReference(arena_allocator), - DebugStackIndirectTopRef(arena_allocator), - arena_stack_(arena_allocator->arena_stack_) { - } - template <typename U> - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - DebugStackIndirectTopRef(other), - arena_stack_(other.arena_stack_) { - } - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; - ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; - ~ScopedArenaAllocatorAdapter() = default; - - private: - ArenaStack* arena_stack_; - - template <typename U> - friend class ScopedArenaAllocatorAdapter; -}; - -// Adapter for use of ScopedArenaAllocator in STL containers. -template <typename T> -class ScopedArenaAllocatorAdapter : private DebugStackReference, private DebugStackIndirectTopRef { - public: - typedef T value_type; - typedef T* pointer; - typedef T& reference; - typedef const T* const_pointer; - typedef const T& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - - template <typename U> - struct rebind { - typedef ScopedArenaAllocatorAdapter<U> other; - }; - - explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator) - : DebugStackReference(arena_allocator), - DebugStackIndirectTopRef(arena_allocator), - arena_stack_(arena_allocator->arena_stack_) { - } - template <typename U> - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - DebugStackIndirectTopRef(other), - arena_stack_(other.arena_stack_) { - } - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; - ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; - ~ScopedArenaAllocatorAdapter() = default; - - size_type max_size() const { - return static_cast<size_type>(-1) / sizeof(T); - } - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) { - DCHECK_LE(n, max_size()); - DebugStackIndirectTopRef::CheckTop(); - return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T), kArenaAllocSTL)); - } - void deallocate(pointer p, size_type n) { - DebugStackIndirectTopRef::CheckTop(); - } - - void construct(pointer p, const_reference val) { - // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. - new (static_cast<void*>(p)) value_type(val); - } - void destroy(pointer p) { - // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. - p->~value_type(); - } - - private: - ArenaStack* arena_stack_; - - template <typename U> - friend class ScopedArenaAllocatorAdapter; - - template <typename U> - friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs, - const ScopedArenaAllocatorAdapter<U>& rhs); -}; - -template <typename T> -inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs, - const ScopedArenaAllocatorAdapter<T>& rhs) { - return lhs.arena_stack_ == rhs.arena_stack_; -} - -template <typename T> -inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs, - const ScopedArenaAllocatorAdapter<T>& rhs) { - return !(lhs == rhs); -} - -inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter() { - return ScopedArenaAllocatorAdapter<void>(this); -} - } // namespace art #endif // ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h index 6728565..0de7403 100644 --- a/compiler/utils/scoped_arena_containers.h +++ b/compiler/utils/scoped_arena_containers.h @@ -22,11 +22,23 @@ #include <set> #include <vector> +#include "utils/arena_containers.h" // For ArenaAllocatorAdapterKind. #include "utils/scoped_arena_allocator.h" #include "safe_map.h" namespace art { +// Adapter for use of ScopedArenaAllocator in STL containers. +// Use ScopedArenaAllocator::Adapter() to create an adapter to pass to container constructors. +// For example, +// void foo(ScopedArenaAllocator* allocator) { +// ScopedArenaVector<int> foo_vector(allocator->Adapter(kArenaAllocMisc)); +// ScopedArenaSafeMap<int, int> foo_map(std::less<int>(), allocator->Adapter()); +// // Use foo_vector and foo_map... +// } +template <typename T> +class ScopedArenaAllocatorAdapter; + template <typename T> using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>; @@ -43,6 +55,136 @@ template <typename K, typename V, typename Comparator = std::less<K>> using ScopedArenaSafeMap = SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>; +// Implementation details below. + +template <> +class ScopedArenaAllocatorAdapter<void> + : private DebugStackReference, private DebugStackIndirectTopRef, + private ArenaAllocatorAdapterKind { + public: + typedef void value_type; + typedef void* pointer; + typedef const void* const_pointer; + + template <typename U> + struct rebind { + typedef ScopedArenaAllocatorAdapter<U> other; + }; + + explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator, + ArenaAllocKind kind = kArenaAllocSTL) + : DebugStackReference(arena_allocator), + DebugStackIndirectTopRef(arena_allocator), + ArenaAllocatorAdapterKind(kind), + arena_stack_(arena_allocator->arena_stack_) { + } + template <typename U> + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + DebugStackIndirectTopRef(other), + ArenaAllocatorAdapterKind(other), + arena_stack_(other.arena_stack_) { + } + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; + ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; + ~ScopedArenaAllocatorAdapter() = default; + + private: + ArenaStack* arena_stack_; + + template <typename U> + friend class ScopedArenaAllocatorAdapter; +}; + +template <typename T> +class ScopedArenaAllocatorAdapter + : private DebugStackReference, private DebugStackIndirectTopRef, + private ArenaAllocatorAdapterKind { + public: + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef const T* const_pointer; + typedef const T& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + template <typename U> + struct rebind { + typedef ScopedArenaAllocatorAdapter<U> other; + }; + + explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator, + ArenaAllocKind kind = kArenaAllocSTL) + : DebugStackReference(arena_allocator), + DebugStackIndirectTopRef(arena_allocator), + ArenaAllocatorAdapterKind(kind), + arena_stack_(arena_allocator->arena_stack_) { + } + template <typename U> + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) + : DebugStackReference(other), + DebugStackIndirectTopRef(other), + ArenaAllocatorAdapterKind(other), + arena_stack_(other.arena_stack_) { + } + ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; + ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; + ~ScopedArenaAllocatorAdapter() = default; + + size_type max_size() const { + return static_cast<size_type>(-1) / sizeof(T); + } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) { + DCHECK_LE(n, max_size()); + DebugStackIndirectTopRef::CheckTop(); + return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T), + ArenaAllocatorAdapterKind::Kind())); + } + void deallocate(pointer p, size_type n) { + DebugStackIndirectTopRef::CheckTop(); + } + + void construct(pointer p, const_reference val) { + // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. + new (static_cast<void*>(p)) value_type(val); + } + void destroy(pointer p) { + // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. + p->~value_type(); + } + + private: + ArenaStack* arena_stack_; + + template <typename U> + friend class ScopedArenaAllocatorAdapter; + + template <typename U> + friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs, + const ScopedArenaAllocatorAdapter<U>& rhs); +}; + +template <typename T> +inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs, + const ScopedArenaAllocatorAdapter<T>& rhs) { + return lhs.arena_stack_ == rhs.arena_stack_; +} + +template <typename T> +inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs, + const ScopedArenaAllocatorAdapter<T>& rhs) { + return !(lhs == rhs); +} + +inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter(ArenaAllocKind kind) { + return ScopedArenaAllocatorAdapter<void>(this, kind); +} + } // namespace art #endif // ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_ diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 0d14376..7684271 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -283,8 +283,8 @@ void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) { void X86_64Assembler::movw(const Address& dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalRex32(src, dst); EmitOperandSizeOverride(); + EmitOptionalRex32(src, dst); EmitUint8(0x89); EmitOperand(src.LowBits(), dst); } diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 1d6655c..2f814df 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -124,8 +124,8 @@ class Operand { if (index.NeedsRex()) { rex_ |= 0x42; // REX.00X0 } - encoding_[1] = (scale << 6) | (static_cast<uint8_t>(index.AsRegister()) << 3) | - static_cast<uint8_t>(base.AsRegister()); + encoding_[1] = (scale << 6) | (static_cast<uint8_t>(index.LowBits()) << 3) | + static_cast<uint8_t>(base.LowBits()); length_ = 2; } diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index dc1758f..4ed7b20 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -128,13 +128,29 @@ TEST_F(AssemblerX86_64Test, XorqImm) { TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11)); GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11)); + GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); const char* expected = "movl %R11d, %R8d\n" - "movl %R11d, %EAX\n"; + "movl %R11d, %EAX\n" + "movl 0xc(%RDI,%RBX,4), %EAX\n" + "movl 0xc(%RDI,%R9,4), %EAX\n" + "movl 0xc(%RDI,%R9,4), %R8d\n"; DriverStr(expected, "movl"); } +TEST_F(AssemblerX86_64Test, Movw) { + GetAssembler()->movw(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0), + x86_64::CpuRegister(x86_64::R9)); + const char* expected = "movw %R9w, 0(%RAX)\n"; + DriverStr(expected, "movw"); +} + std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) { // From Condition diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index a78d3f7..0437f30 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -430,8 +430,7 @@ class Dex2Oat { t2.NewTiming("Patching ELF"); std::string error_msg; if (!PatchOatCode(driver.get(), oat_file, oat_location, &error_msg)) { - LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath(); - LOG(ERROR) << "Error was: " << error_msg; + LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath() << ": " << error_msg; return nullptr; } } diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 101a55d..0ca8962 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -268,7 +268,7 @@ DISASSEMBLER_ENTRY(cmp, target_specific = true; break; case 0x63: - if (rex == 0x48) { + if ((rex & REX_W) != 0) { opcode << "movsxd"; has_modrm = true; load = true; @@ -959,7 +959,7 @@ DISASSEMBLER_ENTRY(cmp, byte_operand = true; break; case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: - if (rex == 0x48) { + if ((rex & REX_W) != 0) { opcode << "movabsq"; immediate_bytes = 8; reg_in_opcode = true; diff --git a/runtime/Android.mk b/runtime/Android.mk index 8fc5e34..302e835 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -252,6 +252,7 @@ LIBART_SRC_FILES_x86_64 := \ arch/x86_64/context_x86_64.cc \ arch/x86_64/entrypoints_init_x86_64.cc \ arch/x86_64/jni_entrypoints_x86_64.S \ + arch/x86_64/memcmp16_x86_64.S \ arch/x86_64/portable_entrypoints_x86_64.S \ arch/x86_64/quick_entrypoints_x86_64.S \ arch/x86_64/thread_x86_64.cc \ diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 4939610..86cb16a 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -365,8 +365,9 @@ END art_quick_invoke_stub ARM_ENTRY art_quick_do_long_jump vldm r1, {s0-s31} @ load all fprs from argument fprs_ ldr r2, [r0, #60] @ r2 = r15 (PC from gprs_ 60=4*15) + ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 - ldm r0, {r3-r14} @ load remaining gprs from argument gprs_ + ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ mov r0, #0 @ clear result registers r0 and r1 mov r1, #0 bx r2 @ do long jump diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h index 65d2f92..14dc1e3 100644 --- a/runtime/arch/memcmp16.h +++ b/runtime/arch/memcmp16.h @@ -30,7 +30,7 @@ // // In both cases, MemCmp16 is declared. -#if defined(__aarch64__) || defined(__arm__) || defined(__mips) || defined(__i386__) +#if defined(__aarch64__) || defined(__arm__) || defined(__mips) || defined(__i386__) || defined(__x86_64__) extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count); #define MemCmp16 __memcmp16 diff --git a/runtime/arch/x86/memcmp16_x86.S b/runtime/arch/x86/memcmp16_x86.S index 17662fa..a315a37 100644 --- a/runtime/arch/x86/memcmp16_x86.S +++ b/runtime/arch/x86/memcmp16_x86.S @@ -21,1018 +21,1018 @@ /* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */ #ifndef L -# define L(label) .L##label +# define L(label) .L##label #endif -#define CFI_PUSH(REG) \ - CFI_ADJUST_CFA_OFFSET(4); \ - CFI_REL_OFFSET(REG, 0) +#define CFI_PUSH(REG) \ + CFI_ADJUST_CFA_OFFSET(4); \ + CFI_REL_OFFSET(REG, 0) -#define CFI_POP(REG) \ - CFI_ADJUST_CFA_OFFSET(-4); \ - CFI_RESTORE(REG) +#define CFI_POP(REG) \ + CFI_ADJUST_CFA_OFFSET(-4); \ + CFI_RESTORE(REG) -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) -#define PARMS 4 -#define BLK1 PARMS -#define BLK2 BLK1+4 -#define LEN BLK2+4 -#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret -#define RETURN RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE +#define PARMS 4 +#define BLK1 PARMS +#define BLK2 BLK1+4 +#define LEN BLK2+4 +#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret +#define RETURN RETURN_END; CFI_RESTORE_STATE; CFI_REMEMBER_STATE DEFINE_FUNCTION MEMCMP - movl LEN(%esp), %ecx + movl LEN(%esp), %ecx - shl $1, %ecx - jz L(zero) + shl $1, %ecx + jz L(zero) - movl BLK1(%esp), %eax - cmp $48, %ecx - movl BLK2(%esp), %edx - jae L(48bytesormore) + movl BLK1(%esp), %eax + cmp $48, %ecx + movl BLK2(%esp), %edx + jae L(48bytesormore) - PUSH (%ebx) - add %ecx, %edx - add %ecx, %eax - jmp L(less48bytes) + PUSH (%ebx) + add %ecx, %edx + add %ecx, %eax + jmp L(less48bytes) - CFI_POP (%ebx) + CFI_POP (%ebx) - .p2align 4 + .p2align 4 L(zero): - xor %eax, %eax - ret + xor %eax, %eax + ret - .p2align 4 + .p2align 4 L(48bytesormore): - PUSH (%ebx) - PUSH (%esi) - PUSH (%edi) - CFI_REMEMBER_STATE - movdqu (%eax), %xmm3 - movdqu (%edx), %xmm0 - movl %eax, %edi - movl %edx, %esi - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - lea 16(%edi), %edi - - sub $0xffff, %edx - lea 16(%esi), %esi - jnz L(less16bytes) - mov %edi, %edx - and $0xf, %edx - xor %edx, %edi - sub %edx, %esi - add %edx, %ecx - mov %esi, %edx - and $0xf, %edx - jz L(shr_0) - xor %edx, %esi - - cmp $0, %edx - je L(shr_0) - cmp $2, %edx - je L(shr_2) - cmp $4, %edx - je L(shr_4) - cmp $6, %edx - je L(shr_6) - cmp $8, %edx - je L(shr_8) - cmp $10, %edx - je L(shr_10) - cmp $12, %edx - je L(shr_12) - jmp L(shr_14) - - .p2align 4 + PUSH (%ebx) + PUSH (%esi) + PUSH (%edi) + CFI_REMEMBER_STATE + movdqu (%eax), %xmm3 + movdqu (%edx), %xmm0 + movl %eax, %edi + movl %edx, %esi + pcmpeqb %xmm0, %xmm3 + pmovmskb %xmm3, %edx + lea 16(%edi), %edi + + sub $0xffff, %edx + lea 16(%esi), %esi + jnz L(less16bytes) + mov %edi, %edx + and $0xf, %edx + xor %edx, %edi + sub %edx, %esi + add %edx, %ecx + mov %esi, %edx + and $0xf, %edx + jz L(shr_0) + xor %edx, %esi + + cmp $0, %edx + je L(shr_0) + cmp $2, %edx + je L(shr_2) + cmp $4, %edx + je L(shr_4) + cmp $6, %edx + je L(shr_6) + cmp $8, %edx + je L(shr_8) + cmp $10, %edx + je L(shr_10) + cmp $12, %edx + je L(shr_12) + jmp L(shr_14) + + .p2align 4 L(shr_0): - cmp $80, %ecx - jae L(shr_0_gobble) - lea -48(%ecx), %ecx - xor %eax, %eax - movaps (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - movaps 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 - pand %xmm1, %xmm2 - pmovmskb %xmm2, %edx - add $32, %edi - add $32, %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + jae L(shr_0_gobble) + lea -48(%ecx), %ecx + xor %eax, %eax + movaps (%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + movaps 16(%esi), %xmm2 + pcmpeqb 16(%edi), %xmm2 + pand %xmm1, %xmm2 + pmovmskb %xmm2, %edx + add $32, %edi + add $32, %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea (%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_0_gobble): - lea -48(%ecx), %ecx - movdqa (%esi), %xmm0 - xor %eax, %eax - pcmpeqb (%edi), %xmm0 - sub $32, %ecx - movdqa 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 + lea -48(%ecx), %ecx + movdqa (%esi), %xmm0 + xor %eax, %eax + pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm2 + pcmpeqb 16(%edi), %xmm2 L(shr_0_gobble_loop): - pand %xmm0, %xmm2 - sub $32, %ecx - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - movdqa 32(%esi), %xmm0 - movdqa 48(%esi), %xmm2 - sbb $0xffff, %edx - pcmpeqb 32(%edi), %xmm0 - pcmpeqb 48(%edi), %xmm2 - lea 32(%edi), %edi - lea 32(%esi), %esi - jz L(shr_0_gobble_loop) - - pand %xmm0, %xmm2 - cmp $0, %ecx - jge L(shr_0_gobble_loop_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm2 + sub $32, %ecx + pmovmskb %xmm2, %edx + movdqa %xmm0, %xmm1 + movdqa 32(%esi), %xmm0 + movdqa 48(%esi), %xmm2 + sbb $0xffff, %edx + pcmpeqb 32(%edi), %xmm0 + pcmpeqb 48(%edi), %xmm2 + lea 32(%edi), %edi + lea 32(%esi), %esi + jz L(shr_0_gobble_loop) + + pand %xmm0, %xmm2 + cmp $0, %ecx + jge L(shr_0_gobble_loop_next) + inc %edx + add $32, %ecx L(shr_0_gobble_loop_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm2, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea (%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_2): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_2_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $2,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $2,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_2_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $2,(%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $2,%xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 2(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_2_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $2,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $2,(%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $2,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $2,16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_2_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $2,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $2,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_2_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_2_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $2,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $2,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_2_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_2_gobble_next) + inc %edx + add $32, %ecx L(shr_2_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 2(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_4): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_4_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $4,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $4,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_4_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $4,(%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $4,%xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 4(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_4_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $4,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $4,(%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $4,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $4,16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_4_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $4,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $4,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_4_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_4_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $4,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $4,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_4_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_4_gobble_next) + inc %edx + add $32, %ecx L(shr_4_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 4(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_6): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_6_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $6,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $6,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_6_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $6,(%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $6,%xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 6(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_6_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $6,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $6,(%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $6,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $6,16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_6_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $6,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $6,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_6_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_6_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $6,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $6,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_6_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_6_gobble_next) + inc %edx + add $32, %ecx L(shr_6_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 6(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_8): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_8_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $8,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $8,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_8_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $8,(%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $8,%xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 8(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_8_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $8,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $8,(%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $8,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $8,16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_8_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $8,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $8,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_8_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_8_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $8,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $8,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_8_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_8_gobble_next) + inc %edx + add $32, %ecx L(shr_8_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 8(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_10): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_10_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $10, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $10,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_10_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $10, (%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $10,%xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 10(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_10_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $10, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $10, (%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $10, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $10, 16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_10_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $10,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $10,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_10_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_10_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $10,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $10,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_10_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_10_gobble_next) + inc %edx + add $32, %ecx L(shr_10_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 10(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_12): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_12_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $12, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $12, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_12_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $12, (%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $12, %xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 12(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_12_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $12, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $12, (%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $12, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $12, 16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_12_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $12,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $12,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_12_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_12_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $12,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $12,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_12_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_12_gobble_next) + inc %edx + add $32, %ecx L(shr_12_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 12(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_14): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_14_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $14, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $14, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + cmp $80, %ecx + lea -48(%ecx), %ecx + mov %edx, %eax + jae L(shr_14_gobble) + + movdqa 16(%esi), %xmm1 + movdqa %xmm1, %xmm2 + palignr $14, (%esi), %xmm1 + pcmpeqb (%edi), %xmm1 + + movdqa 32(%esi), %xmm3 + palignr $14, %xmm2, %xmm3 + pcmpeqb 16(%edi), %xmm3 + + pand %xmm1, %xmm3 + pmovmskb %xmm3, %edx + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + lea (%ecx, %edi,1), %eax + lea 14(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(shr_14_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $14, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 + sub $32, %ecx + movdqa 16(%esi), %xmm0 + palignr $14, (%esi), %xmm0 + pcmpeqb (%edi), %xmm0 - movdqa 32(%esi), %xmm3 - palignr $14, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 + movdqa 32(%esi), %xmm3 + palignr $14, 16(%esi), %xmm3 + pcmpeqb 16(%edi), %xmm3 L(shr_14_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $14,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $14,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_14_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_14_gobble_next) - inc %edx - add $32, %ecx + pand %xmm0, %xmm3 + sub $32, %ecx + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + + movdqa 64(%esi), %xmm3 + palignr $14,48(%esi), %xmm3 + sbb $0xffff, %edx + movdqa 48(%esi), %xmm0 + palignr $14,32(%esi), %xmm0 + pcmpeqb 32(%edi), %xmm0 + lea 32(%esi), %esi + pcmpeqb 48(%edi), %xmm3 + + lea 32(%edi), %edi + jz L(shr_14_gobble_loop) + pand %xmm0, %xmm3 + + cmp $0, %ecx + jge L(shr_14_gobble_next) + inc %edx + add $32, %ecx L(shr_14_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - CFI_RESTORE_STATE - CFI_REMEMBER_STATE - .p2align 4 + test %edx, %edx + jnz L(exit) + + pmovmskb %xmm3, %edx + movdqa %xmm0, %xmm1 + lea 32(%edi), %edi + lea 32(%esi), %esi + sub $0xffff, %edx + jnz L(exit) + + lea (%ecx, %edi,1), %eax + lea 14(%ecx, %esi,1), %edx + POP (%edi) + POP (%esi) + jmp L(less48bytes) + + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + .p2align 4 L(exit): - pmovmskb %xmm1, %ebx - sub $0xffff, %ebx - jz L(first16bytes) - lea -16(%esi), %esi - lea -16(%edi), %edi - mov %ebx, %edx + pmovmskb %xmm1, %ebx + sub $0xffff, %ebx + jz L(first16bytes) + lea -16(%esi), %esi + lea -16(%edi), %edi + mov %ebx, %edx L(first16bytes): - add %eax, %esi + add %eax, %esi L(less16bytes): - test %dl, %dl - jz L(next_four_words) - test $15, %dl - jz L(second_two_words) - test $3, %dl - jz L(second_word) - movzwl -16(%edi), %eax - movzwl -16(%esi), %ebx - subl %ebx, %eax - RETURN - - .p2align 4 + test %dl, %dl + jz L(next_four_words) + test $15, %dl + jz L(second_two_words) + test $3, %dl + jz L(second_word) + movzwl -16(%edi), %eax + movzwl -16(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 L(second_word): - movzwl -14(%edi), %eax - movzwl -14(%esi), %ebx - subl %ebx, %eax - RETURN + movzwl -14(%edi), %eax + movzwl -14(%esi), %ebx + subl %ebx, %eax + RETURN - .p2align 4 + .p2align 4 L(second_two_words): - test $63, %dl - jz L(fourth_word) - movzwl -12(%edi), %eax - movzwl -12(%esi), %ebx - subl %ebx, %eax - RETURN - - .p2align 4 + test $63, %dl + jz L(fourth_word) + movzwl -12(%edi), %eax + movzwl -12(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 L(fourth_word): - movzwl -10(%edi), %eax - movzwl -10(%esi), %ebx - subl %ebx, %eax - RETURN + movzwl -10(%edi), %eax + movzwl -10(%esi), %ebx + subl %ebx, %eax + RETURN - .p2align 4 + .p2align 4 L(next_four_words): - test $15, %dh - jz L(fourth_two_words) - test $3, %dh - jz L(sixth_word) - movzwl -8(%edi), %eax - movzwl -8(%esi), %ebx - subl %ebx, %eax - RETURN - - .p2align 4 + test $15, %dh + jz L(fourth_two_words) + test $3, %dh + jz L(sixth_word) + movzwl -8(%edi), %eax + movzwl -8(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 L(sixth_word): - movzwl -6(%edi), %eax - movzwl -6(%esi), %ebx - subl %ebx, %eax - RETURN + movzwl -6(%edi), %eax + movzwl -6(%esi), %ebx + subl %ebx, %eax + RETURN - .p2align 4 + .p2align 4 L(fourth_two_words): - test $63, %dh - jz L(eighth_word) - movzwl -4(%edi), %eax - movzwl -4(%esi), %ebx - subl %ebx, %eax - RETURN - - .p2align 4 + test $63, %dh + jz L(eighth_word) + movzwl -4(%edi), %eax + movzwl -4(%esi), %ebx + subl %ebx, %eax + RETURN + + .p2align 4 L(eighth_word): - movzwl -2(%edi), %eax - movzwl -2(%esi), %ebx - subl %ebx, %eax - RETURN + movzwl -2(%edi), %eax + movzwl -2(%esi), %ebx + subl %ebx, %eax + RETURN - CFI_PUSH (%ebx) + CFI_PUSH (%ebx) - .p2align 4 + .p2align 4 L(more8bytes): - cmp $16, %ecx - jae L(more16bytes) - cmp $8, %ecx - je L(8bytes) - cmp $10, %ecx - je L(10bytes) - cmp $12, %ecx - je L(12bytes) - jmp L(14bytes) - - .p2align 4 + cmp $16, %ecx + jae L(more16bytes) + cmp $8, %ecx + je L(8bytes) + cmp $10, %ecx + je L(10bytes) + cmp $12, %ecx + je L(12bytes) + jmp L(14bytes) + + .p2align 4 L(more16bytes): - cmp $24, %ecx - jae L(more24bytes) - cmp $16, %ecx - je L(16bytes) - cmp $18, %ecx - je L(18bytes) - cmp $20, %ecx - je L(20bytes) - jmp L(22bytes) - - .p2align 4 + cmp $24, %ecx + jae L(more24bytes) + cmp $16, %ecx + je L(16bytes) + cmp $18, %ecx + je L(18bytes) + cmp $20, %ecx + je L(20bytes) + jmp L(22bytes) + + .p2align 4 L(more24bytes): - cmp $32, %ecx - jae L(more32bytes) - cmp $24, %ecx - je L(24bytes) - cmp $26, %ecx - je L(26bytes) - cmp $28, %ecx - je L(28bytes) - jmp L(30bytes) - - .p2align 4 + cmp $32, %ecx + jae L(more32bytes) + cmp $24, %ecx + je L(24bytes) + cmp $26, %ecx + je L(26bytes) + cmp $28, %ecx + je L(28bytes) + jmp L(30bytes) + + .p2align 4 L(more32bytes): - cmp $40, %ecx - jae L(more40bytes) - cmp $32, %ecx - je L(32bytes) - cmp $34, %ecx - je L(34bytes) - cmp $36, %ecx - je L(36bytes) - jmp L(38bytes) - - .p2align 4 + cmp $40, %ecx + jae L(more40bytes) + cmp $32, %ecx + je L(32bytes) + cmp $34, %ecx + je L(34bytes) + cmp $36, %ecx + je L(36bytes) + jmp L(38bytes) + + .p2align 4 L(less48bytes): - cmp $8, %ecx - jae L(more8bytes) - cmp $2, %ecx - je L(2bytes) - cmp $4, %ecx - je L(4bytes) - jmp L(6bytes) - - .p2align 4 + cmp $8, %ecx + jae L(more8bytes) + cmp $2, %ecx + je L(2bytes) + cmp $4, %ecx + je L(4bytes) + jmp L(6bytes) + + .p2align 4 L(more40bytes): - cmp $40, %ecx - je L(40bytes) - cmp $42, %ecx - je L(42bytes) - cmp $44, %ecx - je L(44bytes) - jmp L(46bytes) - - .p2align 4 + cmp $40, %ecx + je L(40bytes) + cmp $42, %ecx + je L(42bytes) + cmp $44, %ecx + je L(44bytes) + jmp L(46bytes) + + .p2align 4 L(46bytes): - movzwl -46(%eax), %ecx - movzwl -46(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -46(%eax), %ecx + movzwl -46(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(44bytes): - movzwl -44(%eax), %ecx - movzwl -44(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -44(%eax), %ecx + movzwl -44(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(42bytes): - movzwl -42(%eax), %ecx - movzwl -42(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -42(%eax), %ecx + movzwl -42(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(40bytes): - movzwl -40(%eax), %ecx - movzwl -40(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -40(%eax), %ecx + movzwl -40(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(38bytes): - movzwl -38(%eax), %ecx - movzwl -38(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -38(%eax), %ecx + movzwl -38(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(36bytes): - movzwl -36(%eax), %ecx - movzwl -36(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -36(%eax), %ecx + movzwl -36(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(34bytes): - movzwl -34(%eax), %ecx - movzwl -34(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -34(%eax), %ecx + movzwl -34(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(32bytes): - movzwl -32(%eax), %ecx - movzwl -32(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -32(%eax), %ecx + movzwl -32(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(30bytes): - movzwl -30(%eax), %ecx - movzwl -30(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -30(%eax), %ecx + movzwl -30(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(28bytes): - movzwl -28(%eax), %ecx - movzwl -28(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -28(%eax), %ecx + movzwl -28(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(26bytes): - movzwl -26(%eax), %ecx - movzwl -26(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -26(%eax), %ecx + movzwl -26(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(24bytes): - movzwl -24(%eax), %ecx - movzwl -24(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -24(%eax), %ecx + movzwl -24(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(22bytes): - movzwl -22(%eax), %ecx - movzwl -22(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -22(%eax), %ecx + movzwl -22(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(20bytes): - movzwl -20(%eax), %ecx - movzwl -20(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -20(%eax), %ecx + movzwl -20(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(18bytes): - movzwl -18(%eax), %ecx - movzwl -18(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -18(%eax), %ecx + movzwl -18(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(16bytes): - movzwl -16(%eax), %ecx - movzwl -16(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -16(%eax), %ecx + movzwl -16(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(14bytes): - movzwl -14(%eax), %ecx - movzwl -14(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -14(%eax), %ecx + movzwl -14(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(12bytes): - movzwl -12(%eax), %ecx - movzwl -12(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -12(%eax), %ecx + movzwl -12(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(10bytes): - movzwl -10(%eax), %ecx - movzwl -10(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -10(%eax), %ecx + movzwl -10(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(8bytes): - movzwl -8(%eax), %ecx - movzwl -8(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -8(%eax), %ecx + movzwl -8(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(6bytes): - movzwl -6(%eax), %ecx - movzwl -6(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -6(%eax), %ecx + movzwl -6(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(4bytes): - movzwl -4(%eax), %ecx - movzwl -4(%edx), %ebx - subl %ebx, %ecx - jne L(memcmp16_exit) + movzwl -4(%eax), %ecx + movzwl -4(%edx), %ebx + subl %ebx, %ecx + jne L(memcmp16_exit) L(2bytes): - movzwl -2(%eax), %eax - movzwl -2(%edx), %ebx - subl %ebx, %eax - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 + movzwl -2(%eax), %eax + movzwl -2(%edx), %ebx + subl %ebx, %eax + POP (%ebx) + ret + CFI_PUSH (%ebx) + + .p2align 4 L(memcmp16_exit): - POP (%ebx) - mov %ecx, %eax - ret + POP (%ebx) + mov %ecx, %eax + ret END_FUNCTION MEMCMP diff --git a/runtime/arch/x86_64/memcmp16_x86_64.S b/runtime/arch/x86_64/memcmp16_x86_64.S new file mode 100755 index 0000000..46e4ba3 --- /dev/null +++ b/runtime/arch/x86_64/memcmp16_x86_64.S @@ -0,0 +1,1210 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "asm_support_x86_64.S" + +#define MEMCMP __memcmp16 + +/* + * Half of Silvermont L1 Data Cache size + *(see original file cache.h in bionic/libc/arch-x86_64/). + * This value is used for specific optimization on big lengths. + */ +#define DATA_CACHE_SIZE_HALF (12*1024) + +#ifndef L +# define L(label) .L##label +#endif + +#ifndef ALIGN +# define ALIGN(n) .p2align n +#endif + +#define JMPTBL(I, B) (I - B) + +#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + lea TABLE(%rip), %r11; \ + movslq (%r11, INDEX, SCALE), %rcx; \ + add %r11, %rcx; \ + jmp *%rcx; \ + ud2 + +DEFINE_FUNCTION MEMCMP + pxor %xmm0, %xmm0 + shl $1, %rdx + cmp $79, %rdx + ja L(79bytesormore) + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + ALIGN (4) +L(79bytesormore): + movdqu (%rsi), %xmm1 + movdqu (%rdi), %xmm2 + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + mov %rsi, %rcx + and $-16, %rsi + add $16, %rsi + sub %rsi, %rcx + + sub %rcx, %rdi + add %rcx, %rdx + test $0xf, %rdi + jz L(2aligned) + + cmp $128, %rdx + ja L(128bytesormore) +L(less128bytes): + sub $64, %rdx + + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqu 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqu 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + cmp $32, %rdx + jb L(less32bytesin64) + + movdqu 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqu 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin64): + add $64, %rdi + add $64, %rsi + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + +L(128bytesormore): + cmp $512, %rdx + ja L(512bytesormore) + cmp $256, %rdx + ja L(less512bytes) +L(less256bytes): + sub $128, %rdx + + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqu 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqu 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + + movdqu 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqu 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + + movdqu 96(%rdi), %xmm2 + pxor 96(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(112bytesin256) + + movdqu 112(%rdi), %xmm2 + pxor 112(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(128bytesin256) + + add $128, %rsi + add $128, %rdi + + cmp $64, %rdx + jae L(less128bytes) + + cmp $32, %rdx + jb L(less32bytesin128) + + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin128): + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + +L(less512bytes): + sub $256, %rdx + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqu 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqu 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + + movdqu 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqu 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + + movdqu 96(%rdi), %xmm2 + pxor 96(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(112bytesin256) + + movdqu 112(%rdi), %xmm2 + pxor 112(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(128bytesin256) + + movdqu 128(%rdi), %xmm2 + pxor 128(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(144bytesin256) + + movdqu 144(%rdi), %xmm2 + pxor 144(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(160bytesin256) + + movdqu 160(%rdi), %xmm2 + pxor 160(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(176bytesin256) + + movdqu 176(%rdi), %xmm2 + pxor 176(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(192bytesin256) + + movdqu 192(%rdi), %xmm2 + pxor 192(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(208bytesin256) + + movdqu 208(%rdi), %xmm2 + pxor 208(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(224bytesin256) + + movdqu 224(%rdi), %xmm2 + pxor 224(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(240bytesin256) + + movdqu 240(%rdi), %xmm2 + pxor 240(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(256bytesin256) + + add $256, %rsi + add $256, %rdi + + cmp $128, %rdx + jae L(less256bytes) + + cmp $64, %rdx + jae L(less128bytes) + + cmp $32, %rdx + jb L(less32bytesin256) + + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin256): + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + ALIGN (4) +L(512bytesormore): +#ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %r8 +#else + mov __x86_64_data_cache_size_half(%rip), %r8 +#endif + mov %r8, %r9 + shr $1, %r8 + add %r9, %r8 + cmp %r8, %rdx + ja L(L2_L3_cache_unaglined) + sub $64, %rdx + ALIGN (4) +L(64bytesormore_loop): + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + movdqa %xmm2, %xmm1 + + movdqu 16(%rdi), %xmm3 + pxor 16(%rsi), %xmm3 + por %xmm3, %xmm1 + + movdqu 32(%rdi), %xmm4 + pxor 32(%rsi), %xmm4 + por %xmm4, %xmm1 + + movdqu 48(%rdi), %xmm5 + pxor 48(%rsi), %xmm5 + por %xmm5, %xmm1 + + ptest %xmm1, %xmm0 + jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx + jae L(64bytesormore_loop) + + add $64, %rdx + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + +L(L2_L3_cache_unaglined): + sub $64, %rdx + ALIGN (4) +L(L2_L3_unaligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + movdqa %xmm2, %xmm1 + + movdqu 16(%rdi), %xmm3 + pxor 16(%rsi), %xmm3 + por %xmm3, %xmm1 + + movdqu 32(%rdi), %xmm4 + pxor 32(%rsi), %xmm4 + por %xmm4, %xmm1 + + movdqu 48(%rdi), %xmm5 + pxor 48(%rsi), %xmm5 + por %xmm5, %xmm1 + + ptest %xmm1, %xmm0 + jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx + jae L(L2_L3_unaligned_128bytes_loop) + + add $64, %rdx + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + +/* + * This case is for machines which are sensitive for unaligned instructions. + */ + ALIGN (4) +L(2aligned): + cmp $128, %rdx + ja L(128bytesormorein2aligned) +L(less128bytesin2aligned): + sub $64, %rdx + + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqa 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqa 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqa 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + cmp $32, %rdx + jb L(less32bytesin64in2alinged) + + movdqa 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqa 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin64in2alinged): + add $64, %rdi + add $64, %rsi + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + ALIGN (4) +L(128bytesormorein2aligned): + cmp $512, %rdx + ja L(512bytesormorein2aligned) + cmp $256, %rdx + ja L(256bytesormorein2aligned) +L(less256bytesin2alinged): + sub $128, %rdx + + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqa 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqa 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqa 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + + movdqa 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqa 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + + movdqa 96(%rdi), %xmm2 + pxor 96(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(112bytesin256) + + movdqa 112(%rdi), %xmm2 + pxor 112(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(128bytesin256) + + add $128, %rsi + add $128, %rdi + + cmp $64, %rdx + jae L(less128bytesin2aligned) + + cmp $32, %rdx + jb L(less32bytesin128in2aligned) + + movdqu (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqu 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin128in2aligned): + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + ALIGN (4) +L(256bytesormorein2aligned): + + sub $256, %rdx + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqa 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + + movdqa 32(%rdi), %xmm2 + pxor 32(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(48bytesin256) + + movdqa 48(%rdi), %xmm2 + pxor 48(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(64bytesin256) + + movdqa 64(%rdi), %xmm2 + pxor 64(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(80bytesin256) + + movdqa 80(%rdi), %xmm2 + pxor 80(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(96bytesin256) + + movdqa 96(%rdi), %xmm2 + pxor 96(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(112bytesin256) + + movdqa 112(%rdi), %xmm2 + pxor 112(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(128bytesin256) + + movdqa 128(%rdi), %xmm2 + pxor 128(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(144bytesin256) + + movdqa 144(%rdi), %xmm2 + pxor 144(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(160bytesin256) + + movdqa 160(%rdi), %xmm2 + pxor 160(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(176bytesin256) + + movdqa 176(%rdi), %xmm2 + pxor 176(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(192bytesin256) + + movdqa 192(%rdi), %xmm2 + pxor 192(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(208bytesin256) + + movdqa 208(%rdi), %xmm2 + pxor 208(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(224bytesin256) + + movdqa 224(%rdi), %xmm2 + pxor 224(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(240bytesin256) + + movdqa 240(%rdi), %xmm2 + pxor 240(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(256bytesin256) + + add $256, %rsi + add $256, %rdi + + cmp $128, %rdx + jae L(less256bytesin2alinged) + + cmp $64, %rdx + jae L(less128bytesin2aligned) + + cmp $32, %rdx + jb L(less32bytesin256in2alinged) + + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(16bytesin256) + + movdqa 16(%rdi), %xmm2 + pxor 16(%rsi), %xmm2 + ptest %xmm2, %xmm0 + jnc L(32bytesin256) + sub $32, %rdx + add $32, %rdi + add $32, %rsi +L(less32bytesin256in2alinged): + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + ALIGN (4) +L(512bytesormorein2aligned): +#ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %r8 +#else + mov __x86_64_data_cache_size_half(%rip), %r8 +#endif + mov %r8, %r9 + shr $1, %r8 + add %r9, %r8 + cmp %r8, %rdx + ja L(L2_L3_cache_aglined) + + sub $64, %rdx + ALIGN (4) +L(64bytesormore_loopin2aligned): + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + movdqa %xmm2, %xmm1 + + movdqa 16(%rdi), %xmm3 + pxor 16(%rsi), %xmm3 + por %xmm3, %xmm1 + + movdqa 32(%rdi), %xmm4 + pxor 32(%rsi), %xmm4 + por %xmm4, %xmm1 + + movdqa 48(%rdi), %xmm5 + pxor 48(%rsi), %xmm5 + por %xmm5, %xmm1 + + ptest %xmm1, %xmm0 + jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx + jae L(64bytesormore_loopin2aligned) + + add $64, %rdx + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) +L(L2_L3_cache_aglined): + sub $64, %rdx + ALIGN (4) +L(L2_L3_aligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) + movdqa (%rdi), %xmm2 + pxor (%rsi), %xmm2 + movdqa %xmm2, %xmm1 + + movdqa 16(%rdi), %xmm3 + pxor 16(%rsi), %xmm3 + por %xmm3, %xmm1 + + movdqa 32(%rdi), %xmm4 + pxor 32(%rsi), %xmm4 + por %xmm4, %xmm1 + + movdqa 48(%rdi), %xmm5 + pxor 48(%rsi), %xmm5 + por %xmm5, %xmm1 + + ptest %xmm1, %xmm0 + jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx + jae L(L2_L3_aligned_128bytes_loop) + + add $64, %rdx + add %rdx, %rsi + add %rdx, %rdi + BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 2) + + + ALIGN (4) +L(64bytesormore_loop_end): + add $16, %rdi + add $16, %rsi + ptest %xmm2, %xmm0 + jnc L(16bytes) + + add $16, %rdi + add $16, %rsi + ptest %xmm3, %xmm0 + jnc L(16bytes) + + add $16, %rdi + add $16, %rsi + ptest %xmm4, %xmm0 + jnc L(16bytes) + + add $16, %rdi + add $16, %rsi + jmp L(16bytes) + +L(256bytesin256): + add $256, %rdi + add $256, %rsi + jmp L(16bytes) +L(240bytesin256): + add $240, %rdi + add $240, %rsi + jmp L(16bytes) +L(224bytesin256): + add $224, %rdi + add $224, %rsi + jmp L(16bytes) +L(208bytesin256): + add $208, %rdi + add $208, %rsi + jmp L(16bytes) +L(192bytesin256): + add $192, %rdi + add $192, %rsi + jmp L(16bytes) +L(176bytesin256): + add $176, %rdi + add $176, %rsi + jmp L(16bytes) +L(160bytesin256): + add $160, %rdi + add $160, %rsi + jmp L(16bytes) +L(144bytesin256): + add $144, %rdi + add $144, %rsi + jmp L(16bytes) +L(128bytesin256): + add $128, %rdi + add $128, %rsi + jmp L(16bytes) +L(112bytesin256): + add $112, %rdi + add $112, %rsi + jmp L(16bytes) +L(96bytesin256): + add $96, %rdi + add $96, %rsi + jmp L(16bytes) +L(80bytesin256): + add $80, %rdi + add $80, %rsi + jmp L(16bytes) +L(64bytesin256): + add $64, %rdi + add $64, %rsi + jmp L(16bytes) +L(48bytesin256): + add $16, %rdi + add $16, %rsi +L(32bytesin256): + add $16, %rdi + add $16, %rsi +L(16bytesin256): + add $16, %rdi + add $16, %rsi +L(16bytes): + mov -16(%rdi), %rax + mov -16(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) +L(8bytes): + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(12bytes): + mov -12(%rdi), %rax + mov -12(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) +L(4bytes): + mov -4(%rsi), %ecx + mov -4(%rdi), %eax + cmp %eax, %ecx + jne L(diffin4bytes) +L(0bytes): + xor %eax, %eax + ret + + ALIGN (4) +L(66bytes): + movdqu -66(%rdi), %xmm1 + movdqu -66(%rsi), %xmm2 + mov $-66, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(50bytes): + movdqu -50(%rdi), %xmm1 + movdqu -50(%rsi), %xmm2 + mov $-50, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(34bytes): + movdqu -34(%rdi), %xmm1 + movdqu -34(%rsi), %xmm2 + mov $-34, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(18bytes): + mov -18(%rdi), %rax + mov -18(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) +L(10bytes): + mov -10(%rdi), %rax + mov -10(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + movzwl -2(%rdi), %eax + movzwl -2(%rsi), %ecx + cmp %cl, %al + jne L(end) + and $0xffff, %eax + and $0xffff, %ecx + sub %ecx, %eax + ret + + ALIGN (4) +L(14bytes): + mov -14(%rdi), %rax + mov -14(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(6bytes): + mov -6(%rdi), %eax + mov -6(%rsi), %ecx + cmp %eax, %ecx + jne L(diffin4bytes) +L(2bytes): + movzwl -2(%rsi), %ecx + movzwl -2(%rdi), %eax + cmp %cl, %al + jne L(end) + and $0xffff, %eax + and $0xffff, %ecx + sub %ecx, %eax + ret + + ALIGN (4) +L(68bytes): + movdqu -68(%rdi), %xmm2 + movdqu -68(%rsi), %xmm1 + mov $-68, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(52bytes): + movdqu -52(%rdi), %xmm2 + movdqu -52(%rsi), %xmm1 + mov $-52, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(36bytes): + movdqu -36(%rdi), %xmm2 + movdqu -36(%rsi), %xmm1 + mov $-36, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(20bytes): + movdqu -20(%rdi), %xmm2 + movdqu -20(%rsi), %xmm1 + mov $-20, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -4(%rdi), %eax + mov -4(%rsi), %ecx + cmp %eax, %ecx + jne L(diffin4bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(70bytes): + movdqu -70(%rsi), %xmm1 + movdqu -70(%rdi), %xmm2 + mov $-70, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(54bytes): + movdqu -54(%rsi), %xmm1 + movdqu -54(%rdi), %xmm2 + mov $-54, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(38bytes): + movdqu -38(%rsi), %xmm1 + movdqu -38(%rdi), %xmm2 + mov $-38, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(22bytes): + movdqu -22(%rsi), %xmm1 + movdqu -22(%rdi), %xmm2 + mov $-22, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(72bytes): + movdqu -72(%rsi), %xmm1 + movdqu -72(%rdi), %xmm2 + mov $-72, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(56bytes): + movdqu -56(%rdi), %xmm2 + movdqu -56(%rsi), %xmm1 + mov $-56, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(40bytes): + movdqu -40(%rdi), %xmm2 + movdqu -40(%rsi), %xmm1 + mov $-40, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(24bytes): + movdqu -24(%rdi), %xmm2 + movdqu -24(%rsi), %xmm1 + mov $-24, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(74bytes): + movdqu -74(%rsi), %xmm1 + movdqu -74(%rdi), %xmm2 + mov $-74, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(58bytes): + movdqu -58(%rdi), %xmm2 + movdqu -58(%rsi), %xmm1 + mov $-58, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(42bytes): + movdqu -42(%rdi), %xmm2 + movdqu -42(%rsi), %xmm1 + mov $-42, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(26bytes): + movdqu -26(%rdi), %xmm2 + movdqu -26(%rsi), %xmm1 + mov $-26, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -10(%rdi), %rax + mov -10(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + movzwl -2(%rdi), %eax + movzwl -2(%rsi), %ecx + jmp L(end) + + ALIGN (4) +L(76bytes): + movdqu -76(%rsi), %xmm1 + movdqu -76(%rdi), %xmm2 + mov $-76, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(60bytes): + movdqu -60(%rdi), %xmm2 + movdqu -60(%rsi), %xmm1 + mov $-60, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(44bytes): + movdqu -44(%rdi), %xmm2 + movdqu -44(%rsi), %xmm1 + mov $-44, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(28bytes): + movdqu -28(%rdi), %xmm2 + movdqu -28(%rsi), %xmm1 + mov $-28, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -12(%rdi), %rax + mov -12(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + mov -4(%rdi), %eax + mov -4(%rsi), %ecx + cmp %eax, %ecx + jne L(diffin4bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(78bytes): + movdqu -78(%rsi), %xmm1 + movdqu -78(%rdi), %xmm2 + mov $-78, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(62bytes): + movdqu -62(%rdi), %xmm2 + movdqu -62(%rsi), %xmm1 + mov $-62, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(46bytes): + movdqu -46(%rdi), %xmm2 + movdqu -46(%rsi), %xmm1 + mov $-46, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(30bytes): + movdqu -30(%rdi), %xmm2 + movdqu -30(%rsi), %xmm1 + mov $-30, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + mov -14(%rdi), %rax + mov -14(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + + ALIGN (4) +L(64bytes): + movdqu -64(%rdi), %xmm2 + movdqu -64(%rsi), %xmm1 + mov $-64, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(48bytes): + movdqu -48(%rdi), %xmm2 + movdqu -48(%rsi), %xmm1 + mov $-48, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) +L(32bytes): + movdqu -32(%rdi), %xmm2 + movdqu -32(%rsi), %xmm1 + mov $-32, %dl + pxor %xmm1, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + + mov -16(%rdi), %rax + mov -16(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + + mov -8(%rdi), %rax + mov -8(%rsi), %rcx + cmp %rax, %rcx + jne L(diffin8bytes) + xor %eax, %eax + ret + +/* + * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. + */ + ALIGN (3) +L(less16bytes): + movsbq %dl, %rdx + mov (%rsi, %rdx), %rcx + mov (%rdi, %rdx), %rax + cmp %rax, %rcx + jne L(diffin8bytes) + mov 8(%rsi, %rdx), %rcx + mov 8(%rdi, %rdx), %rax +L(diffin8bytes): + cmp %eax, %ecx + jne L(diffin4bytes) + shr $32, %rcx + shr $32, %rax +L(diffin4bytes): + cmp %cx, %ax + jne L(end) + shr $16, %ecx + shr $16, %eax + jmp L(end) + + ALIGN (4) +L(end): + and $0xffff, %eax + and $0xffff, %ecx + sub %ecx, %eax + ret + +END_FUNCTION MEMCMP + + ALIGN (3) +L(table_64bytes): + .int JMPTBL (L(0bytes), L(table_64bytes)) + .int JMPTBL (L(2bytes), L(table_64bytes)) + .int JMPTBL (L(4bytes), L(table_64bytes)) + .int JMPTBL (L(6bytes), L(table_64bytes)) + .int JMPTBL (L(8bytes), L(table_64bytes)) + .int JMPTBL (L(10bytes), L(table_64bytes)) + .int JMPTBL (L(12bytes), L(table_64bytes)) + .int JMPTBL (L(14bytes), L(table_64bytes)) + .int JMPTBL (L(16bytes), L(table_64bytes)) + .int JMPTBL (L(18bytes), L(table_64bytes)) + .int JMPTBL (L(20bytes), L(table_64bytes)) + .int JMPTBL (L(22bytes), L(table_64bytes)) + .int JMPTBL (L(24bytes), L(table_64bytes)) + .int JMPTBL (L(26bytes), L(table_64bytes)) + .int JMPTBL (L(28bytes), L(table_64bytes)) + .int JMPTBL (L(30bytes), L(table_64bytes)) + .int JMPTBL (L(32bytes), L(table_64bytes)) + .int JMPTBL (L(34bytes), L(table_64bytes)) + .int JMPTBL (L(36bytes), L(table_64bytes)) + .int JMPTBL (L(38bytes), L(table_64bytes)) + .int JMPTBL (L(40bytes), L(table_64bytes)) + .int JMPTBL (L(42bytes), L(table_64bytes)) + .int JMPTBL (L(44bytes), L(table_64bytes)) + .int JMPTBL (L(46bytes), L(table_64bytes)) + .int JMPTBL (L(48bytes), L(table_64bytes)) + .int JMPTBL (L(50bytes), L(table_64bytes)) + .int JMPTBL (L(52bytes), L(table_64bytes)) + .int JMPTBL (L(54bytes), L(table_64bytes)) + .int JMPTBL (L(56bytes), L(table_64bytes)) + .int JMPTBL (L(58bytes), L(table_64bytes)) + .int JMPTBL (L(60bytes), L(table_64bytes)) + .int JMPTBL (L(62bytes), L(table_64bytes)) + .int JMPTBL (L(64bytes), L(table_64bytes)) + .int JMPTBL (L(66bytes), L(table_64bytes)) + .int JMPTBL (L(68bytes), L(table_64bytes)) + .int JMPTBL (L(70bytes), L(table_64bytes)) + .int JMPTBL (L(72bytes), L(table_64bytes)) + .int JMPTBL (L(74bytes), L(table_64bytes)) + .int JMPTBL (L(76bytes), L(table_64bytes)) + .int JMPTBL (L(78bytes), L(table_64bytes)) diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 12b7680..f0b1b95 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -615,6 +615,14 @@ bool ClassLinker::GenerateOatFile(const char* dex_filename, argv.push_back("--compiler-filter=verify-none"); } + if (Runtime::Current()->MustRelocateIfPossible()) { + argv.push_back("--runtime-arg"); + argv.push_back("-Xrelocate"); + } else { + argv.push_back("--runtime-arg"); + argv.push_back("-Xnorelocate"); + } + if (!kIsTargetBuild) { argv.push_back("--host"); } @@ -680,14 +688,6 @@ const OatFile* ClassLinker::FindOpenedOatFile(const char* oat_location, const ch return NULL; } -static std::string GetMultiDexClassesDexName(size_t number, const char* dex_location) { - if (number == 0) { - return dex_location; - } else { - return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, number + 1); - } -} - static bool LoadMultiDexFilesFromOatFile(const OatFile* oat_file, const char* dex_location, bool generated, std::vector<std::string>* error_msgs, @@ -700,7 +700,7 @@ static bool LoadMultiDexFilesFromOatFile(const OatFile* oat_file, const char* de bool success = true; for (size_t i = 0; success; ++i) { - std::string next_name_str = GetMultiDexClassesDexName(i, dex_location); + std::string next_name_str = DexFile::GetMultiDexClassesDexName(i, dex_location); const char* next_name = next_name_str.c_str(); uint32_t dex_location_checksum; @@ -994,11 +994,25 @@ const OatFile* ClassLinker::CreateOatFileForDexLocation(const char* dex_location return oat_file.release(); } -bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file, - const char* dex_location, - uint32_t dex_location_checksum, - const InstructionSet instruction_set, - std::string* error_msg) { +bool ClassLinker::VerifyOatImageChecksum(const OatFile* oat_file, + const InstructionSet instruction_set) { + Runtime* runtime = Runtime::Current(); + const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace(); + uint32_t image_oat_checksum = 0; + if (instruction_set == kRuntimeISA) { + const ImageHeader& image_header = image_space->GetImageHeader(); + image_oat_checksum = image_header.GetOatChecksum(); + } else { + std::unique_ptr<ImageHeader> image_header(gc::space::ImageSpace::ReadImageHeaderOrDie( + image_space->GetImageLocation().c_str(), instruction_set)); + image_oat_checksum = image_header->GetOatChecksum(); + } + return oat_file->GetOatHeader().GetImageFileLocationOatChecksum() == image_oat_checksum; +} + +bool ClassLinker::VerifyOatChecksums(const OatFile* oat_file, + const InstructionSet instruction_set, + std::string* error_msg) { Runtime* runtime = Runtime::Current(); const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace(); @@ -1021,9 +1035,28 @@ bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file, image_patch_delta = image_header->GetPatchDelta(); } const OatHeader& oat_header = oat_file->GetOatHeader(); - bool image_check = ((oat_header.GetImageFileLocationOatChecksum() == image_oat_checksum) - && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin) - && (oat_header.GetImagePatchDelta() == image_patch_delta)); + bool ret = ((oat_header.GetImageFileLocationOatChecksum() == image_oat_checksum) + && (oat_header.GetImagePatchDelta() == image_patch_delta) + && (oat_header.GetImageFileLocationOatDataBegin() == image_oat_data_begin)); + if (!ret) { + *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d, %d) with (0x%x, %" PRIdPTR ", %d)", + oat_file->GetLocation().c_str(), + oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), + oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), + oat_file->GetOatHeader().GetImagePatchDelta(), + image_oat_checksum, image_oat_data_begin, image_patch_delta); + } + return ret; +} + +bool ClassLinker::VerifyOatAndDexFileChecksums(const OatFile* oat_file, + const char* dex_location, + uint32_t dex_location_checksum, + const InstructionSet instruction_set, + std::string* error_msg) { + if (!VerifyOatChecksums(oat_file, instruction_set, error_msg)) { + return false; + } const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location, &dex_location_checksum); @@ -1039,27 +1072,15 @@ bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file, } return false; } - bool dex_check = dex_location_checksum == oat_dex_file->GetDexFileLocationChecksum(); - - if (image_check && dex_check) { - return true; - } - if (!image_check) { - ScopedObjectAccess soa(Thread::Current()); - *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d) with (0x%x, %" PRIdPTR ")", - oat_file->GetLocation().c_str(), - oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), - oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), - image_oat_checksum, image_oat_data_begin); - } - if (!dex_check) { + if (dex_location_checksum != oat_dex_file->GetDexFileLocationChecksum()) { *error_msg = StringPrintf("oat file '%s' mismatch (0x%x) with '%s' (0x%x)", oat_file->GetLocation().c_str(), oat_dex_file->GetDexFileLocationChecksum(), dex_location, dex_location_checksum); + return false; } - return false; + return true; } bool ClassLinker::VerifyOatWithDexFile(const OatFile* oat_file, @@ -1082,8 +1103,8 @@ bool ClassLinker::VerifyOatWithDexFile(const OatFile* oat_file, } dex_file.reset(oat_dex_file->OpenDexFile(error_msg)); } else { - bool verified = VerifyOatFileChecksums(oat_file, dex_location, dex_location_checksum, - kRuntimeISA, error_msg); + bool verified = VerifyOatAndDexFileChecksums(oat_file, dex_location, dex_location_checksum, + kRuntimeISA, error_msg); if (!verified) { return false; } @@ -3495,14 +3516,19 @@ mirror::ArtMethod* ClassLinker::CreateProxyConstructor(Thread* self, proxy_class->GetDirectMethods(); CHECK_EQ(proxy_direct_methods->GetLength(), 16); mirror::ArtMethod* proxy_constructor = proxy_direct_methods->Get(2); - // Clone the existing constructor of Proxy (our constructor would just invoke it so steal its - // code_ too) - mirror::ArtMethod* constructor = - down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self)); - if (constructor == NULL) { + mirror::ArtMethod* constructor = down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self)); + if (constructor == nullptr) { CHECK(self->IsExceptionPending()); // OOME. - return NULL; + return nullptr; } + // Make the proxy constructor's code always point to the uninstrumented code. This avoids + // getting a method enter event for the proxy constructor as the proxy constructor doesn't + // have an activation. + bool have_portable_code; + constructor->SetEntryPointFromQuickCompiledCode(GetQuickOatCodeFor(proxy_constructor)); + constructor->SetEntryPointFromPortableCompiledCode(GetPortableOatCodeFor(proxy_constructor, + &have_portable_code)); + // Make this constructor public and fix the class to be our Proxy version constructor->SetAccessFlags((constructor->GetAccessFlags() & ~kAccProtected) | kAccPublic); constructor->SetDeclaringClass(klass.Get()); diff --git a/runtime/class_linker.h b/runtime/class_linker.h index 1bb1635..8c09042 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -274,12 +274,18 @@ class ClassLinker { std::vector<const DexFile*>* dex_files) LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_); + // Returns true if the given oat file has the same image checksum as the image it is paired with. + static bool VerifyOatImageChecksum(const OatFile* oat_file, const InstructionSet instruction_set); + // Returns true if the oat file checksums match with the image and the offsets are such that it + // could be loaded with it. + static bool VerifyOatChecksums(const OatFile* oat_file, const InstructionSet instruction_set, + std::string* error_msg); // Returns true if oat file contains the dex file with the given location and checksum. - static bool VerifyOatFileChecksums(const OatFile* oat_file, - const char* dex_location, - uint32_t dex_location_checksum, - InstructionSet instruction_set, - std::string* error_msg); + static bool VerifyOatAndDexFileChecksums(const OatFile* oat_file, + const char* dex_location, + uint32_t dex_location_checksum, + InstructionSet instruction_set, + std::string* error_msg); // TODO: replace this with multiple methods that allocate the correct managed type. template <class T> diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index 8e363c4..9972362 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -137,7 +137,17 @@ void CommonRuntimeTest::SetEnvironmentVariables(std::string& android_data) { } // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache - android_data = (IsHost() ? "/tmp/art-data-XXXXXX" : "/data/dalvik-cache/art-data-XXXXXX"); + if (IsHost()) { + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir != nullptr && tmpdir[0] != 0) { + android_data = tmpdir; + } else { + android_data = "/tmp"; + } + } else { + android_data = "/data/dalvik-cache"; + } + android_data += "/art-data-XXXXXX"; if (mkdtemp(&android_data[0]) == nullptr) { PLOG(FATAL) << "mkdtemp(\"" << &android_data[0] << "\") failed"; } @@ -212,7 +222,7 @@ void CommonRuntimeTest::ClearDirectory(const char* dirpath) { if ((strcmp(e->d_name, ".") == 0) || (strcmp(e->d_name, "..") == 0)) { continue; } - std::string filename(dalvik_cache_); + std::string filename(dirpath); filename.push_back('/'); filename.append(e->d_name); int stat_result = lstat(filename.c_str(), &s); @@ -265,6 +275,19 @@ std::string CommonRuntimeTest::GetDexFileName(const std::string& jar_prefix) { return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str()); } +std::string CommonRuntimeTest::GetLibCoreOatFileName() { + return GetOatFileName("core"); +} + +std::string CommonRuntimeTest::GetOatFileName(const std::string& oat_prefix) { + if (IsHost()) { + const char* host_dir = getenv("ANDROID_HOST_OUT"); + CHECK(host_dir != nullptr); + return StringPrintf("%s/framework/%s.art", host_dir, oat_prefix.c_str()); + } + return StringPrintf("%s/framework/%s.art", GetAndroidRoot(), oat_prefix.c_str()); +} + std::string CommonRuntimeTest::GetTestAndroidRoot() { if (IsHost()) { const char* host_dir = getenv("ANDROID_HOST_OUT"); diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index eb96352..363d8da 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -85,10 +85,18 @@ class CommonRuntimeTest : public testing::Test { virtual void TearDown(); + // Gets the path of the libcore dex file. std::string GetLibCoreDexFileName(); + // Gets the path of the specified dex file for host or target. std::string GetDexFileName(const std::string& jar_prefix); + // Gets the path of the libcore oat file. + std::string GetLibCoreOatFileName(); + + // Gets the path of the specified oat file for host or target. + std::string GetOatFileName(const std::string& oat_prefix); + std::string GetTestAndroidRoot(); std::vector<const DexFile*> OpenTestDexFiles(const char* name) diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index e5bc7c8..e1a7771 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -951,6 +951,38 @@ std::pair<const char*, const char*> DexFile::SplitMultiDexLocation( return std::make_pair(tmp, colon_ptr + 1); } +std::string DexFile::GetMultiDexClassesDexName(size_t number, const char* dex_location) { + if (number == 0) { + return dex_location; + } else { + return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, number + 1); + } +} + +std::string DexFile::GetDexCanonicalLocation(const char* dex_location) { + CHECK_NE(dex_location, static_cast<const char*>(nullptr)); + char* path = nullptr; + if (!IsMultiDexLocation(dex_location)) { + path = realpath(dex_location, nullptr); + } else { + std::pair<const char*, const char*> pair = DexFile::SplitMultiDexLocation(dex_location); + const char* dex_real_location(realpath(pair.first, nullptr)); + delete pair.first; + if (dex_real_location != nullptr) { + int length = strlen(dex_real_location) + strlen(pair.second) + strlen(kMultiDexSeparatorString) + 1; + char* multidex_canonical_location = reinterpret_cast<char*>(malloc(sizeof(char) * length)); + snprintf(multidex_canonical_location, length, "%s" kMultiDexSeparatorString "%s", dex_real_location, pair.second); + free(const_cast<char*>(dex_real_location)); + path = multidex_canonical_location; + } + } + + // If realpath fails then we just copy the argument. + std::string result(path == nullptr ? dex_location : path); + free(path); + return result; +} + std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) { os << StringPrintf("[DexFile: %s dex-checksum=%08x location-checksum=%08x %p-%p]", dex_file.GetLocation().c_str(), @@ -958,6 +990,7 @@ std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) { dex_file.Begin(), dex_file.Begin() + dex_file.Size()); return os; } + std::string Signature::ToString() const { if (dex_file_ == nullptr) { CHECK(proto_id_ == nullptr); diff --git a/runtime/dex_file.h b/runtime/dex_file.h index d64a030..2794af6 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -841,6 +841,23 @@ class DexFile { return size_; } + static std::string GetMultiDexClassesDexName(size_t number, const char* dex_location); + + // Returns the canonical form of the given dex location. + // + // There are different flavors of "dex locations" as follows: + // the file name of a dex file: + // The actual file path that the dex file has on disk. + // dex_location: + // This acts as a key for the class linker to know which dex file to load. + // It may correspond to either an old odex file or a particular dex file + // inside an oat file. In the first case it will also match the file name + // of the dex file. In the second case (oat) it will include the file name + // and possibly some multidex annotation to uniquely identify it. + // canonical_dex_location: + // the dex_location where it's file name part has been made canonical. + static std::string GetDexCanonicalLocation(const char* dex_location); + private: // Opens a .dex file static const DexFile* OpenFile(int fd, const char* location, bool verify, std::string* error_msg); diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc index 284aa89..fa13290 100644 --- a/runtime/dex_file_test.cc +++ b/runtime/dex_file_test.cc @@ -345,4 +345,31 @@ TEST_F(DexFileTest, FindFieldId) { } } +TEST_F(DexFileTest, GetMultiDexClassesDexName) { + std::string dex_location_str = "/system/app/framework.jar"; + const char* dex_location = dex_location_str.c_str(); + ASSERT_EQ("/system/app/framework.jar", DexFile::GetMultiDexClassesDexName(0, dex_location)); + ASSERT_EQ("/system/app/framework.jar:classes2.dex", DexFile::GetMultiDexClassesDexName(1, dex_location)); + ASSERT_EQ("/system/app/framework.jar:classes101.dex", DexFile::GetMultiDexClassesDexName(100, dex_location)); +} + +TEST_F(DexFileTest, GetDexCanonicalLocation) { + ScratchFile file; + std::string dex_location = file.GetFilename(); + + ASSERT_EQ(file.GetFilename(), DexFile::GetDexCanonicalLocation(dex_location.c_str())); + std::string multidex_location = DexFile::GetMultiDexClassesDexName(1, dex_location.c_str()); + ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location.c_str())); + + std::string dex_location_sym = dex_location + "symlink"; + ASSERT_EQ(0, symlink(dex_location.c_str(), dex_location_sym.c_str())); + + ASSERT_EQ(dex_location, DexFile::GetDexCanonicalLocation(dex_location_sym.c_str())); + + std::string multidex_location_sym = DexFile::GetMultiDexClassesDexName(1, dex_location_sym.c_str()); + ASSERT_EQ(multidex_location, DexFile::GetDexCanonicalLocation(multidex_location_sym.c_str())); + + ASSERT_EQ(0, unlink(dex_location_sym.c_str())); +} + } // namespace art diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc index 594c65f..6179b5e 100644 --- a/runtime/elf_file.cc +++ b/runtime/elf_file.cc @@ -837,6 +837,7 @@ bool ElfFile::Load(bool executable, std::string* error_msg) { } } + bool reserved = false; for (Elf32_Word i = 0; i < GetProgramHeaderNum(); i++) { Elf32_Phdr& program_header = GetProgramHeader(i); @@ -853,10 +854,8 @@ bool ElfFile::Load(bool executable, std::string* error_msg) { // Found something to load. - // If p_vaddr is zero, it must be the first loadable segment, - // since they must be in order. Since it is zero, there isn't a - // specific address requested, so first request a contiguous chunk - // of required size for all segments, but with no + // Before load the actual segments, reserve a contiguous chunk + // of required size and address for all segments, but with no // permissions. We'll then carve that up with the proper // permissions as we load the actual segments. If p_vaddr is // non-zero, the segments require the specific address specified, @@ -870,18 +869,24 @@ bool ElfFile::Load(bool executable, std::string* error_msg) { return false; } size_t file_length = static_cast<size_t>(temp_file_length); - if (program_header.p_vaddr == 0) { + if (!reserved) { + byte* reserve_base = ((program_header.p_vaddr != 0) ? + reinterpret_cast<byte*>(program_header.p_vaddr) : nullptr); std::string reservation_name("ElfFile reservation for "); reservation_name += file_->GetPath(); std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(), - nullptr, GetLoadedSize(), PROT_NONE, false, - error_msg)); + reserve_base, + GetLoadedSize(), PROT_NONE, false, + error_msg)); if (reserve.get() == nullptr) { *error_msg = StringPrintf("Failed to allocate %s: %s", reservation_name.c_str(), error_msg->c_str()); return false; } - base_address_ = reserve->Begin(); + reserved = true; + if (reserve_base == nullptr) { + base_address_ = reserve->Begin(); + } segments_.push_back(reserve.release()); } // empty segment, nothing to map @@ -1335,7 +1340,8 @@ void ElfFile::GdbJITSupport() { const Elf32_Shdr* symtab_sec = all.FindSectionByName(".symtab"); Elf32_Shdr* text_sec = all.FindSectionByName(".text"); if (debug_info == nullptr || debug_abbrev == nullptr || eh_frame == nullptr || - debug_str == nullptr || text_sec == nullptr || strtab_sec == nullptr || symtab_sec == nullptr) { + debug_str == nullptr || text_sec == nullptr || strtab_sec == nullptr || + symtab_sec == nullptr) { return; } // We need to add in a strtab and symtab to the image. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 49bb65f..fa198d7 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -592,8 +592,7 @@ extern "C" uint64_t artQuickProxyInvokeHandler(mirror::ArtMethod* proxy_method, const char* old_cause = self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments"); // Register the top of the managed stack, making stack crawlable. - DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) - << PrettyMethod(proxy_method); + DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method); self->SetTopOfStack(sp, 0); DCHECK_EQ(proxy_method->GetFrameSizeInBytes(), Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes()) diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h index 46b9363..217360f 100644 --- a/runtime/gc/accounting/card_table-inl.h +++ b/runtime/gc/accounting/card_table-inl.h @@ -37,12 +37,13 @@ static inline bool byte_cas(byte old_value, byte new_value, byte* address) { // Align the address down. address -= shift_in_bytes; const size_t shift_in_bits = shift_in_bytes * kBitsPerByte; - AtomicInteger* word_atomic = reinterpret_cast<AtomicInteger*>(address); + Atomic<uintptr_t>* word_atomic = reinterpret_cast<Atomic<uintptr_t>*>(address); // Word with the byte we are trying to cas cleared. - const int32_t cur_word = word_atomic->LoadRelaxed() & ~(0xFF << shift_in_bits); - const int32_t old_word = cur_word | (static_cast<int32_t>(old_value) << shift_in_bits); - const int32_t new_word = cur_word | (static_cast<int32_t>(new_value) << shift_in_bits); + const uintptr_t cur_word = word_atomic->LoadRelaxed() & + ~(static_cast<uintptr_t>(0xFF) << shift_in_bits); + const uintptr_t old_word = cur_word | (static_cast<uintptr_t>(old_value) << shift_in_bits); + const uintptr_t new_word = cur_word | (static_cast<uintptr_t>(new_value) << shift_in_bits); return word_atomic->CompareExchangeWeakRelaxed(old_word, new_word); #endif } diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc index ceb42e5..0498550 100644 --- a/runtime/gc/accounting/card_table.cc +++ b/runtime/gc/accounting/card_table.cc @@ -28,6 +28,11 @@ namespace art { namespace gc { namespace accounting { +constexpr size_t CardTable::kCardShift; +constexpr size_t CardTable::kCardSize; +constexpr uint8_t CardTable::kCardClean; +constexpr uint8_t CardTable::kCardDirty; + /* * Maintain a card table from the write barrier. All writes of * non-NULL values to heap addresses should go through an entry in @@ -55,9 +60,9 @@ CardTable* CardTable::Create(const byte* heap_begin, size_t heap_capacity) { size_t capacity = heap_capacity / kCardSize; /* Allocate an extra 256 bytes to allow fixed low-byte of base */ std::string error_msg; - std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL, - capacity + 256, PROT_READ | PROT_WRITE, - false, &error_msg)); + std::unique_ptr<MemMap> mem_map( + MemMap::MapAnonymous("card table", nullptr, capacity + 256, PROT_READ | PROT_WRITE, + false, &error_msg)); CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg; // All zeros is the correct initial value; all clean. Anonymous mmaps are initialized to zero, we // don't clear the card table to avoid unnecessary pages being allocated @@ -67,17 +72,17 @@ CardTable* CardTable::Create(const byte* heap_begin, size_t heap_capacity) { CHECK(cardtable_begin != NULL); // We allocated up to a bytes worth of extra space to allow biased_begin's byte value to equal - // GC_CARD_DIRTY, compute a offset value to make this the case + // kCardDirty, compute a offset value to make this the case size_t offset = 0; byte* biased_begin = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(cardtable_begin) - (reinterpret_cast<uintptr_t>(heap_begin) >> kCardShift)); - if (((uintptr_t)biased_begin & 0xff) != kCardDirty) { - int delta = kCardDirty - (reinterpret_cast<uintptr_t>(biased_begin) & 0xff); + uintptr_t biased_byte = reinterpret_cast<uintptr_t>(biased_begin) & 0xff; + if (biased_byte != kCardDirty) { + int delta = kCardDirty - biased_byte; offset = delta + (delta < 0 ? 0x100 : 0); biased_begin += offset; } CHECK_EQ(reinterpret_cast<uintptr_t>(biased_begin) & 0xff, kCardDirty); - return new CardTable(mem_map.release(), biased_begin, offset); } diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index 7934974..fbeea85 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -46,10 +46,10 @@ template<size_t kAlignment> class SpaceBitmap; // WriteBarrier, and from there to here. class CardTable { public: - static const size_t kCardShift = 7; - static const size_t kCardSize = (1 << kCardShift); - static const uint8_t kCardClean = 0x0; - static const uint8_t kCardDirty = 0x70; + static constexpr size_t kCardShift = 7; + static constexpr size_t kCardSize = 1 << kCardShift; + static constexpr uint8_t kCardClean = 0x0; + static constexpr uint8_t kCardDirty = 0x70; static CardTable* Create(const byte* heap_begin, size_t heap_capacity); diff --git a/runtime/gc/accounting/card_table_test.cc b/runtime/gc/accounting/card_table_test.cc new file mode 100644 index 0000000..a88b2c9 --- /dev/null +++ b/runtime/gc/accounting/card_table_test.cc @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "card_table-inl.h" + +#include <string> + +#include "atomic.h" +#include "common_runtime_test.h" +#include "handle_scope-inl.h" +#include "mirror/class-inl.h" +#include "mirror/string-inl.h" // Strings are easiest to allocate +#include "scoped_thread_state_change.h" +#include "thread_pool.h" +#include "utils.h" + +namespace art { + +namespace mirror { + class Object; +} // namespace mirror + +class CardTableTest : public CommonRuntimeTest { + public: + std::unique_ptr<gc::accounting::CardTable> card_table_; + static constexpr size_t kCardSize = gc::accounting::CardTable::kCardSize; + + void CommonSetup() { + if (card_table_.get() == nullptr) { + card_table_.reset(gc::accounting::CardTable::Create(heap_begin_, heap_size_)); + EXPECT_TRUE(card_table_.get() != nullptr); + } else { + ClearCardTable(); + } + } + // Default values for the test, not random to avoid undeterministic behaviour. + CardTableTest() : heap_begin_(reinterpret_cast<byte*>(0x2000000)), heap_size_(2 * MB) { + } + void ClearCardTable() { + card_table_->ClearCardTable(); + } + byte* HeapBegin() const { + return heap_begin_; + } + byte* HeapLimit() const { + return HeapBegin() + heap_size_; + } + byte PRandCard(const byte* addr) const { + size_t offset = RoundDown(addr - heap_begin_, kCardSize); + return 1 + offset % 254; + } + void FillRandom() { + for (const byte* addr = HeapBegin(); addr != HeapLimit(); addr += kCardSize) { + EXPECT_TRUE(card_table_->AddrIsInCardTable(addr)); + byte* card = card_table_->CardFromAddr(addr); + *card = PRandCard(addr); + } + } + + private: + byte* const heap_begin_; + const size_t heap_size_; +}; + +TEST_F(CardTableTest, TestMarkCard) { + CommonSetup(); + for (const byte* addr = HeapBegin(); addr < HeapLimit(); addr += kObjectAlignment) { + auto obj = reinterpret_cast<const mirror::Object*>(addr); + EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardClean); + EXPECT_TRUE(!card_table_->IsDirty(obj)); + card_table_->MarkCard(addr); + EXPECT_TRUE(card_table_->IsDirty(obj)); + EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardDirty); + byte* card_addr = card_table_->CardFromAddr(addr); + EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardDirty); + *card_addr = gc::accounting::CardTable::kCardClean; + EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardClean); + } +} + +class UpdateVisitor { + public: + byte operator()(byte c) const { + return c * 93 + 123; + } + void operator()(byte* /*card*/, byte /*expected_value*/, byte /*new_value*/) const { + } +}; + +TEST_F(CardTableTest, TestModifyCardsAtomic) { + CommonSetup(); + FillRandom(); + const size_t delta = std::min(static_cast<size_t>(HeapLimit() - HeapBegin()), 8U * kCardSize); + UpdateVisitor visitor; + size_t start_offset = 0; + for (byte* cstart = HeapBegin(); cstart < HeapBegin() + delta; cstart += kCardSize) { + start_offset = (start_offset + kObjectAlignment) % kCardSize; + size_t end_offset = 0; + for (byte* cend = HeapLimit() - delta; cend < HeapLimit(); cend += kCardSize) { + // Don't always start at a card boundary. + byte* start = cstart + start_offset; + byte* end = cend - end_offset; + end_offset = (end_offset + kObjectAlignment) % kCardSize; + // Modify cards. + card_table_->ModifyCardsAtomic(start, end, visitor, visitor); + // Check adjacent cards not modified. + for (byte* cur = start - kCardSize; cur >= HeapBegin(); cur -= kCardSize) { + EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur)); + } + for (byte* cur = end + kCardSize; cur < HeapLimit(); cur += kCardSize) { + EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur)); + } + // Verify Range. + for (byte* cur = start; cur < AlignUp(end, kCardSize); cur += kCardSize) { + byte* card = card_table_->CardFromAddr(cur); + byte value = PRandCard(cur); + if (visitor(value) != *card) { + LOG(ERROR) << reinterpret_cast<void*>(start) << " " << reinterpret_cast<void*>(cur) << " " << reinterpret_cast<void*>(end); + } + EXPECT_EQ(visitor(value), *card); + // Restore for next iteration. + *card = value; + } + } + } +} + +// TODO: Add test for CardTable::Scan. + +} // namespace art diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index fc6d2ef..1d10af2 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -166,7 +166,8 @@ static bool ReadSpecificImageHeader(const char* filename, ImageHeader* image_hea return true; } -bool ImageSpace::RelocateImage(const char* image_location, const char* dest_filename, +// Relocate the image at image_location to dest_filename and relocate it by a random amount. +static bool RelocateImage(const char* image_location, const char* dest_filename, InstructionSet isa, std::string* error_msg) { std::string patchoat(Runtime::Current()->GetPatchoatExecutable()); diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index debca52..6be3b8f 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -124,9 +124,6 @@ class ImageSpace : public MemMapSpace { bool validate_oat_file, std::string* error_msg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static bool RelocateImage(const char* image_location, const char* dest_filename, - InstructionSet isa, std::string* error_msg); - OatFile* OpenOatFile(const char* image, std::string* error_msg) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/globals.h b/runtime/globals.h index 1d9f22c..107e064 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -118,6 +118,8 @@ static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceDu static constexpr TraceClockSource kDefaultTraceClockSource = kTraceClockSourceWall; #endif +static constexpr bool kDefaultMustRelocate = true; + } // namespace art #endif // ART_RUNTIME_GLOBALS_H_ diff --git a/runtime/lock_word.h b/runtime/lock_word.h index ab86eaa..e585412 100644 --- a/runtime/lock_word.h +++ b/runtime/lock_word.h @@ -65,7 +65,7 @@ class LockWord { kThinLockOwnerMask = (1 << kThinLockOwnerSize) - 1, // Count in higher bits. kThinLockCountShift = kThinLockOwnerSize + kThinLockOwnerShift, - kThinLockCountMask = (1 << kThinLockCountShift) - 1, + kThinLockCountMask = (1 << kThinLockCountSize) - 1, kThinLockMaxCount = kThinLockCountMask, // State in the highest bits. diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index 1074253..6c7ee5b 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -130,8 +130,67 @@ static uintptr_t GenerateNextMemPos() { uintptr_t MemMap::next_mem_pos_ = GenerateNextMemPos(); #endif +// Return true if the address range is contained in a single /proc/self/map entry. +static bool CheckOverlapping(uintptr_t begin, + uintptr_t end, + std::string* error_msg) { + std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true)); + if (!map->Build()) { + *error_msg = StringPrintf("Failed to build process map"); + return false; + } + for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) { + if ((begin >= it->start && begin < it->end) // start of new within old + && (end > it->start && end <= it->end)) { // end of new within old + return true; + } + } + std::string maps; + ReadFileToString("/proc/self/maps", &maps); + *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap " + "any existing map:\n%s\n", + begin, end, maps.c_str()); + return false; +} + +// Return true if the address range does not conflict with any /proc/self/maps entry. +static bool CheckNonOverlapping(uintptr_t begin, + uintptr_t end, + std::string* error_msg) { + std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true)); + if (!map->Build()) { + *error_msg = StringPrintf("Failed to build process map"); + return false; + } + for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) { + if ((begin >= it->start && begin < it->end) // start of new within old + || (end > it->start && end < it->end) // end of new within old + || (begin <= it->start && end > it->end)) { // start/end of new includes all of old + std::ostringstream map_info; + map_info << std::make_pair(it, map->end()); + *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " overlaps with " + "existing map 0x%08" PRIxPTR "-0x%08" PRIxPTR " (%s)\n%s", + begin, end, + static_cast<uintptr_t>(it->start), static_cast<uintptr_t>(it->end), + it->name.c_str(), + map_info.str().c_str()); + return false; + } + } + return true; +} + +// CheckMapRequest to validate a non-MAP_FAILED mmap result based on +// the expected value, calling munmap if validation fails, giving the +// reason in error_msg. +// +// If the expected_ptr is nullptr, nothing is checked beyond the fact +// that the actual_ptr is not MAP_FAILED. However, if expected_ptr is +// non-null, we check that pointer is the actual_ptr == expected_ptr, +// and if not, report in error_msg what the conflict mapping was if +// found, or a generic error in other cases. static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_count, - std::ostringstream* error_msg) { + std::string* error_msg) { // Handled first by caller for more specific error messages. CHECK(actual_ptr != MAP_FAILED); @@ -139,6 +198,10 @@ static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_co return true; } + uintptr_t actual = reinterpret_cast<uintptr_t>(actual_ptr); + uintptr_t expected = reinterpret_cast<uintptr_t>(expected_ptr); + uintptr_t limit = expected + byte_count; + if (expected_ptr == actual_ptr) { return true; } @@ -149,40 +212,19 @@ static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_co PLOG(WARNING) << StringPrintf("munmap(%p, %zd) failed", actual_ptr, byte_count); } - uintptr_t actual = reinterpret_cast<uintptr_t>(actual_ptr); - uintptr_t expected = reinterpret_cast<uintptr_t>(expected_ptr); - uintptr_t limit = expected + byte_count; - - std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid())); - if (!map->Build()) { - *error_msg << StringPrintf("Failed to build process map to determine why mmap returned " - "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR, actual, expected); - + if (!CheckNonOverlapping(expected, limit, error_msg)) { return false; } - for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) { - if ((expected >= it->start && expected < it->end) // start of new within old - || (limit > it->start && limit < it->end) // end of new within old - || (expected <= it->start && limit > it->end)) { // start/end of new includes all of old - *error_msg - << StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " overlaps with " - "existing map 0x%08" PRIxPTR "-0x%08" PRIxPTR " (%s)\n", - expected, limit, - static_cast<uintptr_t>(it->start), static_cast<uintptr_t>(it->end), - it->name.c_str()) - << std::make_pair(it, map->end()); - return false; - } - } - *error_msg << StringPrintf("Failed to mmap at expected address, mapped at " - "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR, actual, expected); + + *error_msg = StringPrintf("Failed to mmap at expected address, mapped at " + "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR, actual, expected); return false; } -MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count, int prot, +MemMap* MemMap::MapAnonymous(const char* name, byte* expected_ptr, size_t byte_count, int prot, bool low_4gb, std::string* error_msg) { if (byte_count == 0) { - return new MemMap(name, nullptr, 0, nullptr, 0, prot); + return new MemMap(name, nullptr, 0, nullptr, 0, prot, false); } size_t page_aligned_byte_count = RoundUp(byte_count, kPageSize); @@ -222,11 +264,11 @@ MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count // 4GB. if (low_4gb && ( // Start out of bounds. - (reinterpret_cast<uintptr_t>(expected) >> 32) != 0 || + (reinterpret_cast<uintptr_t>(expected_ptr) >> 32) != 0 || // End out of bounds. For simplicity, this will fail for the last page of memory. - (reinterpret_cast<uintptr_t>(expected + page_aligned_byte_count) >> 32) != 0)) { + (reinterpret_cast<uintptr_t>(expected_ptr + page_aligned_byte_count) >> 32) != 0)) { *error_msg = StringPrintf("The requested address space (%p, %p) cannot fit in low_4gb", - expected, expected + page_aligned_byte_count); + expected_ptr, expected_ptr + page_aligned_byte_count); return nullptr; } #endif @@ -238,7 +280,7 @@ MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count #if USE_ART_LOW_4G_ALLOCATOR // MAP_32BIT only available on x86_64. void* actual = MAP_FAILED; - if (low_4gb && expected == nullptr) { + if (low_4gb && expected_ptr == nullptr) { bool first_run = true; for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) { @@ -294,18 +336,18 @@ MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count saved_errno = ENOMEM; } } else { - actual = mmap(expected, page_aligned_byte_count, prot, flags, fd.get(), 0); + actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0); saved_errno = errno; } #else #if defined(__LP64__) - if (low_4gb && expected == nullptr) { + if (low_4gb && expected_ptr == nullptr) { flags |= MAP_32BIT; } #endif - void* actual = mmap(expected, page_aligned_byte_count, prot, flags, fd.get(), 0); + void* actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0); saved_errno = errno; #endif @@ -314,44 +356,51 @@ MemMap* MemMap::MapAnonymous(const char* name, byte* expected, size_t byte_count ReadFileToString("/proc/self/maps", &maps); *error_msg = StringPrintf("Failed anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0): %s\n%s", - expected, page_aligned_byte_count, prot, flags, fd.get(), + expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), strerror(saved_errno), maps.c_str()); return nullptr; } std::ostringstream check_map_request_error_msg; - if (!CheckMapRequest(expected, actual, page_aligned_byte_count, &check_map_request_error_msg)) { - *error_msg = check_map_request_error_msg.str(); + if (!CheckMapRequest(expected_ptr, actual, page_aligned_byte_count, error_msg)) { return nullptr; } return new MemMap(name, reinterpret_cast<byte*>(actual), byte_count, actual, - page_aligned_byte_count, prot); + page_aligned_byte_count, prot, false); } -MemMap* MemMap::MapFileAtAddress(byte* expected, size_t byte_count, int prot, int flags, int fd, +MemMap* MemMap::MapFileAtAddress(byte* expected_ptr, size_t byte_count, int prot, int flags, int fd, off_t start, bool reuse, const char* filename, std::string* error_msg) { CHECK_NE(0, prot); CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE)); + uintptr_t expected = reinterpret_cast<uintptr_t>(expected_ptr); + uintptr_t limit = expected + byte_count; if (reuse) { // reuse means it is okay that it overlaps an existing page mapping. // Only use this if you actually made the page reservation yourself. - CHECK(expected != nullptr); + CHECK(expected_ptr != nullptr); + if (!CheckOverlapping(expected, limit, error_msg)) { + return nullptr; + } flags |= MAP_FIXED; } else { CHECK_EQ(0, flags & MAP_FIXED); + if (expected_ptr != nullptr && !CheckNonOverlapping(expected, limit, error_msg)) { + return nullptr; + } } if (byte_count == 0) { - return new MemMap(filename, nullptr, 0, nullptr, 0, prot); + return new MemMap(filename, nullptr, 0, nullptr, 0, prot, false); } // Adjust 'offset' to be page-aligned as required by mmap. int page_offset = start % kPageSize; off_t page_aligned_offset = start - page_offset; // Adjust 'byte_count' to be page-aligned as we will map this anyway. size_t page_aligned_byte_count = RoundUp(byte_count + page_offset, kPageSize); - // The 'expected' is modified (if specified, ie non-null) to be page aligned to the file but not - // necessarily to virtual memory. mmap will page align 'expected' for us. - byte* page_aligned_expected = (expected == nullptr) ? nullptr : (expected - page_offset); + // The 'expected_ptr' is modified (if specified, ie non-null) to be page aligned to the file but + // not necessarily to virtual memory. mmap will page align 'expected' for us. + byte* page_aligned_expected = (expected_ptr == nullptr) ? nullptr : (expected_ptr - page_offset); byte* actual = reinterpret_cast<byte*>(mmap(page_aligned_expected, page_aligned_byte_count, @@ -373,21 +422,22 @@ MemMap* MemMap::MapFileAtAddress(byte* expected, size_t byte_count, int prot, in return nullptr; } std::ostringstream check_map_request_error_msg; - if (!CheckMapRequest(expected, actual, page_aligned_byte_count, &check_map_request_error_msg)) { - *error_msg = check_map_request_error_msg.str(); + if (!CheckMapRequest(expected_ptr, actual, page_aligned_byte_count, error_msg)) { return nullptr; } return new MemMap(filename, actual + page_offset, byte_count, actual, page_aligned_byte_count, - prot); + prot, reuse); } MemMap::~MemMap() { if (base_begin_ == nullptr && base_size_ == 0) { return; } - int result = munmap(base_begin_, base_size_); - if (result == -1) { - PLOG(FATAL) << "munmap failed"; + if (!reuse_) { + int result = munmap(base_begin_, base_size_); + if (result == -1) { + PLOG(FATAL) << "munmap failed"; + } } // Remove it from maps_. @@ -405,9 +455,9 @@ MemMap::~MemMap() { } MemMap::MemMap(const std::string& name, byte* begin, size_t size, void* base_begin, - size_t base_size, int prot) + size_t base_size, int prot, bool reuse) : name_(name), begin_(begin), size_(size), base_begin_(base_begin), base_size_(base_size), - prot_(prot) { + prot_(prot), reuse_(reuse) { if (size_ == 0) { CHECK(begin_ == nullptr); CHECK(base_begin_ == nullptr); @@ -437,7 +487,7 @@ MemMap* MemMap::RemapAtEnd(byte* new_end, const char* tail_name, int tail_prot, byte* new_base_end = new_end; DCHECK_LE(new_base_end, old_base_end); if (new_base_end == old_base_end) { - return new MemMap(tail_name, nullptr, 0, nullptr, 0, tail_prot); + return new MemMap(tail_name, nullptr, 0, nullptr, 0, tail_prot, false); } size_ = new_end - reinterpret_cast<byte*>(begin_); base_size_ = new_base_end - reinterpret_cast<byte*>(base_begin_); @@ -489,7 +539,7 @@ MemMap* MemMap::RemapAtEnd(byte* new_end, const char* tail_name, int tail_prot, maps.c_str()); return nullptr; } - return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot); + return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false); } void MemMap::MadviseDontNeedAndZero() { diff --git a/runtime/mem_map.h b/runtime/mem_map.h index defa6a5..872c63b 100644 --- a/runtime/mem_map.h +++ b/runtime/mem_map.h @@ -73,7 +73,9 @@ class MemMap { // Map part of a file, taking care of non-page aligned offsets. The // "start" offset is absolute, not relative. This version allows - // requesting a specific address for the base of the mapping. + // requesting a specific address for the base of the + // mapping. "reuse" allows us to create a view into an existing + // mapping where we do not take ownership of the memory. // // On success, returns returns a MemMap instance. On failure, returns a NULL; static MemMap* MapFileAtAddress(byte* addr, size_t byte_count, int prot, int flags, int fd, @@ -134,7 +136,7 @@ class MemMap { private: MemMap(const std::string& name, byte* begin, size_t size, void* base_begin, size_t base_size, - int prot) LOCKS_EXCLUDED(Locks::mem_maps_lock_); + int prot, bool reuse) LOCKS_EXCLUDED(Locks::mem_maps_lock_); static void DumpMaps(std::ostream& os, const std::multimap<void*, MemMap*>& mem_maps) LOCKS_EXCLUDED(Locks::mem_maps_lock_); @@ -145,7 +147,7 @@ class MemMap { static MemMap* GetLargestMemMapAt(void* address) EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_); - std::string name_; + const std::string name_; byte* const begin_; // Start of data. size_t size_; // Length of data. @@ -153,6 +155,11 @@ class MemMap { size_t base_size_; // Length of mapping. May be changed by RemapAtEnd (ie Zygote). int prot_; // Protection of the map. + // When reuse_ is true, this is just a view of an existing mapping + // and we do not take ownership and are not responsible for + // unmapping. + const bool reuse_; + #if USE_ART_LOW_4G_ALLOCATOR static uintptr_t next_mem_pos_; // Next memory location to check for low_4g extent. #endif diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc index 4882728..8eacb1c 100644 --- a/runtime/mirror/art_method.cc +++ b/runtime/mirror/art_method.cc @@ -157,12 +157,12 @@ ArtMethod* ArtMethod::FindOverriddenMethod() { } } } -#ifndef NDEBUG - StackHandleScope<2> hs(Thread::Current()); - MethodHelper result_mh(hs.NewHandle(result)); - MethodHelper this_mh(hs.NewHandle(this)); - DCHECK(result == NULL || this_mh.HasSameNameAndSignature(&result_mh)); -#endif + if (kIsDebugBuild) { + StackHandleScope<2> hs(Thread::Current()); + MethodHelper result_mh(hs.NewHandle(result)); + MethodHelper this_mh(hs.NewHandle(this)); + DCHECK(result == nullptr || this_mh.HasSameNameAndSignature(&result_mh)); + } return result; } diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc index 440a6be..4964aa0 100644 --- a/runtime/monitor_pool.cc +++ b/runtime/monitor_pool.cc @@ -52,7 +52,7 @@ void MonitorPool::AllocateChunk() { monitor_chunks_.StoreRelaxed(new_backing); capacity_ = new_capacity; old_chunk_arrays_.push_back(old_backing); - LOG(INFO) << "Resizing to capacity " << capacity_; + VLOG(monitor) << "Resizing to capacity " << capacity_; } } @@ -64,7 +64,7 @@ void MonitorPool::AllocateChunk() { CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment); // Add the chunk. - *(monitor_chunks_.LoadRelaxed()+num_chunks_) = reinterpret_cast<uintptr_t>(chunk); + *(monitor_chunks_.LoadRelaxed() + num_chunks_) = reinterpret_cast<uintptr_t>(chunk); num_chunks_++; // Set up the free list @@ -96,7 +96,7 @@ Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::O // Enough space, or need to resize? if (first_free_ == nullptr) { - LOG(INFO) << "Allocating a new chunk."; + VLOG(monitor) << "Allocating a new chunk."; AllocateChunk(); } diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index ac1a310..0af2c22 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -275,8 +275,96 @@ static void CopyProfileFile(const char* oldfile, const char* newfile) { } } -static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, +// Java: dalvik.system.DexFile.UP_TO_DATE +static const jbyte kUpToDate = 0; +// Java: dalvik.system.DexFile.DEXOPT_NEEDED +static const jbyte kPatchoatNeeded = 1; +// Java: dalvik.system.DexFile.PATCHOAT_NEEDED +static const jbyte kDexoptNeeded = 2; + +template <const bool kVerboseLogging, const bool kReasonLogging> +static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char* filename, + InstructionSet target_instruction_set) { + std::string error_msg; + std::unique_ptr<const OatFile> oat_file(OatFile::Open(oat_filename, oat_filename, nullptr, + false, &error_msg)); + if (oat_file.get() == nullptr) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << oat_filename + << "' for file location '" << filename << "': " << error_msg; + } + error_msg.clear(); + return kDexoptNeeded; + } + bool should_relocate_if_possible = Runtime::Current()->ShouldRelocate(); + uint32_t location_checksum = 0; + const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, nullptr, + kReasonLogging); + if (oat_dex_file != nullptr) { + // If its not possible to read the classes.dex assume up-to-date as we won't be able to + // compile it anyway. + if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded found precompiled stripped file: " + << filename << " for " << oat_filename << ": " << error_msg; + } + if (ClassLinker::VerifyOatChecksums(oat_file.get(), target_instruction_set, &error_msg)) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " is up-to-date for " << filename; + } + return kUpToDate; + } else if (should_relocate_if_possible && + ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " needs to be relocated for " << filename; + } + return kPatchoatNeeded; + } else { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " is out of date for " << filename; + } + return kDexoptNeeded; + } + // If we get here the file is out of date and we should use the system one to relocate. + } else { + if (ClassLinker::VerifyOatAndDexFileChecksums(oat_file.get(), filename, location_checksum, + target_instruction_set, &error_msg)) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " is up-to-date for " << filename; + } + return kUpToDate; + } else if (location_checksum == oat_dex_file->GetDexFileLocationChecksum() + && should_relocate_if_possible + && ClassLinker::VerifyOatImageChecksum(oat_file.get(), target_instruction_set)) { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " needs to be relocated for " << filename; + } + return kPatchoatNeeded; + } else { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " is out of date for " << filename; + } + return kDexoptNeeded; + } + } + } else { + if (kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeeded file " << oat_filename + << " does not contain " << filename; + } + return kDexoptNeeded; + } +} + +static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, const char* pkgname, const char* instruction_set, const jboolean defer) { + // TODO disable this logging. const bool kVerboseLogging = false; // Spammy logging. const bool kReasonLogging = true; // Logging of reason for returning JNI_TRUE. @@ -285,7 +373,7 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException")); const char* message = (filename == nullptr) ? "<empty file name>" : filename; env->ThrowNew(fnfe.get(), message); - return JNI_FALSE; + return kUpToDate; } // Always treat elements of the bootclasspath as up-to-date. The @@ -301,78 +389,45 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (kVerboseLogging) { LOG(INFO) << "DexFile_isDexOptNeeded ignoring boot class path file: " << filename; } - return JNI_FALSE; + return kUpToDate; } } - const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set); - - // Check if we have an odex file next to the dex file. - std::string odex_filename(DexFilenameToOdexFilename(filename, kRuntimeISA)); - std::string error_msg; - std::unique_ptr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false, - &error_msg)); - if (oat_file.get() == nullptr) { - if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename - << "': " << error_msg; - } - error_msg.clear(); - } else { - const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL, - kReasonLogging); - if (oat_dex_file != nullptr) { - uint32_t location_checksum; - // If its not possible to read the classes.dex assume up-to-date as we won't be able to - // compile it anyway. - if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) { - if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: " - << filename << ": " << error_msg; - } - return JNI_FALSE; - } - if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum, - target_instruction_set, - &error_msg)) { - if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename - << " has an up-to-date checksum compared to " << filename; - } - return JNI_FALSE; - } else { - if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename - << " with an out-of-date checksum compared to " << filename - << ": " << error_msg; - } - error_msg.clear(); - } - } - } + bool force_system_only = false; + bool require_system_version = false; // Check the profile file. We need to rerun dex2oat if the profile has changed significantly // since the last time, or it's new. // If the 'defer' argument is true then this will be retried later. In this case we // need to make sure that the profile file copy is not made so that we will get the // same result second time. + std::string profile_file; + std::string prev_profile_file; + bool should_copy_profile = false; if (Runtime::Current()->GetProfilerOptions().IsEnabled() && (pkgname != nullptr)) { - const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */) + profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */) + std::string("/") + pkgname; - const std::string prev_profile_file = profile_file + std::string("@old"); + prev_profile_file = profile_file + std::string("@old"); struct stat profstat, prevstat; int e1 = stat(profile_file.c_str(), &profstat); + int e1_errno = errno; int e2 = stat(prev_profile_file.c_str(), &prevstat); + int e2_errno = errno; if (e1 < 0) { - // No profile file, need to run dex2oat - if (kReasonLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded profile file " << profile_file << " doesn't exist"; + if (e1_errno != EACCES) { + // No profile file, need to run dex2oat, unless we find a file in system + if (kReasonLogging) { + LOG(INFO) << "DexFile_isDexOptNeededInternal profile file " << profile_file << " doesn't exist. " + << "Will check odex to see if we can find a working version."; + } + // Force it to only accept system files/files with versions in system. + require_system_version = true; + } else { + LOG(INFO) << "DexFile_isDexOptNeededInternal recieved EACCES trying to stat profile file " + << profile_file; } - return JNI_TRUE; - } - - if (e2 == 0) { + } else if (e2 == 0) { // There is a previous profile file. Check if the profile has changed significantly. // A change in profile is considered significant if X% (change_thr property) of the top K% // (compile_thr property) samples has changed. @@ -384,7 +439,7 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, bool old_ok = old_profile.LoadFile(prev_profile_file); if (!new_ok || !old_ok) { if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: " + LOG(INFO) << "DexFile_isDexOptNeededInternal Ignoring invalid profiles: " << (new_ok ? "" : profile_file) << " " << (old_ok ? "" : prev_profile_file); } } else { @@ -393,7 +448,7 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, old_profile.GetTopKSamples(old_top_k, top_k_threshold); if (new_top_k.empty()) { if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file; + LOG(INFO) << "DexFile_isDexOptNeededInternal empty profile: " << profile_file; } // If the new topK is empty we shouldn't optimize so we leave the change_percent at 0.0. } else { @@ -405,7 +460,7 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (kVerboseLogging) { std::set<std::string>::iterator end = diff.end(); for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) { - LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it; + LOG(INFO) << "DexFile_isDexOptNeededInternal new in topK: " << *it; } } } @@ -413,67 +468,84 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (change_percent > change_threshold) { if (kReasonLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file << + LOG(INFO) << "DexFile_isDexOptNeededInternal size of new profile file " << profile_file << " is significantly different from old profile file " << prev_profile_file << " (top " << top_k_threshold << "% samples changed in proportion of " << change_percent << "%)"; } - if (!defer) { - CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str()); - } - return JNI_TRUE; + should_copy_profile = !defer; + // Force us to only accept system files. + force_system_only = true; } - } else { + } else if (e2_errno == ENOENT) { // Previous profile does not exist. Make a copy of the current one. if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded previous profile doesn't exist: " << prev_profile_file; - } - if (!defer) { - CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str()); + LOG(INFO) << "DexFile_isDexOptNeededInternal previous profile doesn't exist: " << prev_profile_file; } + should_copy_profile = !defer; + } else { + PLOG(INFO) << "Unable to stat previous profile file " << prev_profile_file; } } - // Check if we have an oat file in the cache - const std::string cache_dir(GetDalvikCacheOrDie(instruction_set)); - const std::string cache_location( - GetDalvikCacheFilenameOrDie(filename, cache_dir.c_str())); - oat_file.reset(OatFile::Open(cache_location, filename, NULL, false, &error_msg)); - if (oat_file.get() == nullptr) { - if (kReasonLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location - << " does not exist for " << filename << ": " << error_msg; + const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set); + + // Get the filename for odex file next to the dex file. + std::string odex_filename(DexFilenameToOdexFilename(filename, target_instruction_set)); + // Get the filename for the dalvik-cache file + std::string cache_dir; + bool have_android_data = false; + bool dalvik_cache_exists = false; + GetDalvikCache(instruction_set, false, &cache_dir, &have_android_data, &dalvik_cache_exists); + std::string cache_filename; // was cache_location + bool have_cache_filename = false; + if (dalvik_cache_exists) { + std::string error_msg; + have_cache_filename = GetDalvikCacheFilename(filename, cache_dir.c_str(), &cache_filename, + &error_msg); + if (!have_cache_filename && kVerboseLogging) { + LOG(INFO) << "DexFile_isDexOptNeededInternal failed to find cache file for dex file " << filename + << ": " << error_msg; } - return JNI_TRUE; } - uint32_t location_checksum; - if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) { - if (kReasonLogging) { - LOG(ERROR) << "DexFile_isDexOptNeeded failed to compute checksum of " << filename - << " (error " << error_msg << ")"; + bool should_relocate_if_possible = Runtime::Current()->ShouldRelocate(); + + InstructionSet isa = Runtime::Current()->GetInstructionSet(); + jbyte dalvik_cache_decision = -1; + // Lets try the cache first (since we want to load from there since thats where the relocated + // versions will be). + if (have_cache_filename && !force_system_only) { + // We can use the dalvik-cache if we find a good file. + dalvik_cache_decision = + IsDexOptNeededForFile<kVerboseLogging, kReasonLogging>(cache_filename, filename, isa); + // We will only return DexOptNeeded if both the cache and system return it. + if (dalvik_cache_decision != kDexoptNeeded && !require_system_version) { + CHECK(!(dalvik_cache_decision == kPatchoatNeeded && !should_relocate_if_possible)) + << "May not return PatchoatNeeded when patching is disabled."; + return dalvik_cache_decision; } - return JNI_TRUE; + // We couldn't find one thats easy. We should now try the system. } - if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum, - target_instruction_set, &error_msg)) { - if (kReasonLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location - << " has out-of-date checksum compared to " << filename - << " (error " << error_msg << ")"; - } - return JNI_TRUE; + jbyte system_decision = + IsDexOptNeededForFile<kVerboseLogging, kReasonLogging>(odex_filename, filename, isa); + CHECK(!(system_decision == kPatchoatNeeded && !should_relocate_if_possible)) + << "May not return PatchoatNeeded when patching is disabled."; + + if (require_system_version && system_decision == kPatchoatNeeded + && dalvik_cache_decision == kUpToDate) { + // We have a version from system relocated to the cache. Return it. + return dalvik_cache_decision; } - if (kVerboseLogging) { - LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location - << " is up-to-date for " << filename; + if (should_copy_profile && system_decision == kDexoptNeeded) { + CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str()); } - CHECK(error_msg.empty()) << error_msg; - return JNI_FALSE; + + return system_decision; } -static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename, +static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename, jstring javaPkgname, jstring javaInstructionSet, jboolean defer) { ScopedUtfChars filename(env, javaFilename); NullableScopedUtfChars pkgname(env, javaPkgname); @@ -487,8 +559,8 @@ static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring java static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) { const char* instruction_set = GetInstructionSetString(kRuntimeISA); ScopedUtfChars filename(env, javaFilename); - return IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */, - instruction_set, false /* defer */); + return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */, + instruction_set, false /* defer */); } @@ -497,7 +569,7 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"), NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"), NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"), - NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Z"), + NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"), NATIVE_METHOD(DexFile, openDexFile, "(Ljava/lang/String;Ljava/lang/String;I)J"), }; diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index f9cc36a..c4c6b10 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -18,6 +18,7 @@ #include <dlfcn.h> #include <sstream> +#include <string.h> #include "base/bit_vector.h" #include "base/stl_util.h" @@ -125,6 +126,9 @@ OatFile::OatFile(const std::string& location) } OatFile::~OatFile() { + for (auto it : oat_dex_files_) { + delete it.first.data(); + } STLDeleteValues(&oat_dex_files_); if (dlopen_handle_ != NULL) { dlclose(dlopen_handle_); @@ -305,8 +309,14 @@ bool OatFile::Setup(std::string* error_msg) { dex_file_checksum, dex_file_pointer, methods_offsets_pointer); - // Use a StringPiece backed by the oat_dex_file's internal std::string as the key. - StringPiece key(oat_dex_file->GetDexFileLocation()); + + std::string dex_canonical_location_str = DexFile::GetDexCanonicalLocation(dex_file_location.c_str()); + // make a copy since we need to persist it as a key in the object's field. + int location_size = dex_canonical_location_str.size() + 1; + char* dex_canonical_location = new char[location_size ]; + strncpy(dex_canonical_location, dex_canonical_location_str.c_str(), location_size); + + StringPiece key(dex_canonical_location); oat_dex_files_.Put(key, oat_dex_file); } return true; @@ -329,7 +339,9 @@ const byte* OatFile::End() const { const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location, const uint32_t* dex_location_checksum, bool warn_if_not_found) const { - Table::const_iterator it = oat_dex_files_.find(dex_location); + std::string dex_canonical_location = DexFile::GetDexCanonicalLocation(dex_location); + + Table::const_iterator it = oat_dex_files_.find(dex_canonical_location); if (it != oat_dex_files_.end()) { const OatFile::OatDexFile* oat_dex_file = it->second; if (dex_location_checksum == NULL || @@ -344,15 +356,18 @@ const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location, checksum = StringPrintf("0x%08x", *dex_location_checksum); } LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location + << " ( canonical path " << dex_canonical_location << ")" << " with checksum " << checksum << " in OatFile " << GetLocation(); if (kIsDebugBuild) { for (Table::const_iterator it = oat_dex_files_.begin(); it != oat_dex_files_.end(); ++it) { LOG(WARNING) << "OatFile " << GetLocation() << " contains OatDexFile " << it->second->GetDexFileLocation() + << " (canonical path " << it->first << ")" << " with checksum 0x" << std::hex << it->second->GetDexFileLocationChecksum(); } } } + return NULL; } diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h index 668ed9e..3dbe26f 100644 --- a/runtime/parsed_options.h +++ b/runtime/parsed_options.h @@ -48,8 +48,6 @@ class ParsedOptions { std::string native_bridge_library_string_; CompilerCallbacks* compiler_callbacks_; bool is_zygote_; - // TODO Change this to true when we want it on by default. - static constexpr bool kDefaultMustRelocate = false; bool must_relocate_; std::string patchoat_executable_; bool interpreter_only_; diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc index bd6656d..3081421 100644 --- a/runtime/proxy_test.cc +++ b/runtime/proxy_test.cc @@ -17,14 +17,14 @@ #include <jni.h> #include <vector> -#include "common_compiler_test.h" +#include "common_runtime_test.h" #include "field_helper.h" #include "mirror/art_field-inl.h" #include "scoped_thread_state_change.h" namespace art { -class ProxyTest : public CommonCompilerTest { +class ProxyTest : public CommonRuntimeTest { public: // Generate a proxy class with the given name and interfaces. This is a simplification from what // libcore does to fit to our test needs. We do not check for duplicated interfaces or methods and @@ -103,6 +103,12 @@ class ProxyTest : public CommonCompilerTest { soa.Self()->AssertNoPendingException(); return proxyClass; } + + protected: + void SetUpRuntimeOptions(RuntimeOptions *options) OVERRIDE { + options->push_back(std::make_pair(StringPrintf("-Ximage:%s", GetLibCoreOatFileName().c_str()), + nullptr)); + } }; // Creates a proxy class and check ClassHelper works correctly. diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h index 982553d..c4d51cb 100644 --- a/runtime/quick/inline_method_analyser.h +++ b/runtime/quick/inline_method_analyser.h @@ -48,7 +48,12 @@ enum InlineMethodOpcode : uint16_t { kIntrinsicMinMaxFloat, kIntrinsicMinMaxDouble, kIntrinsicSqrt, - kIntrinsicGet, + kIntrinsicCeil, + kIntrinsicFloor, + kIntrinsicRint, + kIntrinsicRoundFloat, + kIntrinsicRoundDouble, + kIntrinsicReferenceGet, kIntrinsicCharAt, kIntrinsicCompareTo, kIntrinsicIsEmptyOrLength, diff --git a/runtime/utils.cc b/runtime/utils.cc index 52cdcc1..4d49809 100644 --- a/runtime/utils.cc +++ b/runtime/utils.cc @@ -1236,7 +1236,7 @@ bool GetDalvikCacheFilename(const char* location, const char* cache_location, return false; } std::string cache_file(&location[1]); // skip leading slash - if (!EndsWith(location, ".dex") && !EndsWith(location, ".art")) { + if (!EndsWith(location, ".dex") && !EndsWith(location, ".art") && !EndsWith(location, ".oat")) { cache_file += "/"; cache_file += DexFile::kClassesDex; } diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc index 7cd5980..d6c90e1 100644 --- a/runtime/utils_test.cc +++ b/runtime/utils_test.cc @@ -350,6 +350,8 @@ TEST_F(UtilsTest, GetDalvikCacheFilenameOrDie) { GetDalvikCacheFilenameOrDie("/system/framework/core.jar", "/foo").c_str()); EXPECT_STREQ("/foo/system@framework@boot.art", GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str()); + EXPECT_STREQ("/foo/system@framework@boot.oat", + GetDalvikCacheFilenameOrDie("/system/framework/boot.oat", "/foo").c_str()); } TEST_F(UtilsTest, GetSystemImageFilename) { diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 18f7626..329b4dc 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -3111,7 +3111,7 @@ mirror::ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator(T* it, const } else { // Check whether the name of the called method is "<init>" const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c(); - if (strcmp(dex_file_->GetMethodName(dex_file_->GetMethodId(method_idx)), "init") != 0) { + if (strcmp(dex_file_->GetMethodName(dex_file_->GetMethodId(method_idx)), "<init>") != 0) { Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized"; return nullptr; } diff --git a/test/015-switch/expected.txt b/test/015-switch/expected.txt index ca3b518..91b4714 100644 --- a/test/015-switch/expected.txt +++ b/test/015-switch/expected.txt @@ -8,3 +8,9 @@ CORRECT (not found) CORRECT (default only) CORRECT big sparse / first CORRECT big sparse / last +default +254 +255 +256 +257 +default diff --git a/test/015-switch/src/Main.java b/test/015-switch/src/Main.java index 7198e2b..dd97a8c 100644 --- a/test/015-switch/src/Main.java +++ b/test/015-switch/src/Main.java @@ -101,5 +101,15 @@ public class Main { case 100: System.out.print("CORRECT big sparse / last\n"); break; default: System.out.print("blah!\n"); break; } + + for (a = 253; a <= 258; a++) { + switch (a) { + case 254: System.out.println("254"); break; + case 255: System.out.println("255"); break; + case 256: System.out.println("256"); break; + case 257: System.out.println("257"); break; + default: System.out.println("default"); break; + } + } } } diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java index 9ecc0a0..56972ff 100644 --- a/test/082-inline-execute/src/Main.java +++ b/test/082-inline-execute/src/Main.java @@ -34,6 +34,11 @@ public class Main { test_Math_max_F(); test_Math_min_D(); test_Math_max_D(); + test_Math_ceil(); + test_Math_floor(); + test_Math_rint(); + test_Math_round_D(); + test_Math_round_F(); test_Short_reverseBytes(); test_Integer_reverseBytes(); test_Long_reverseBytes(); @@ -49,6 +54,11 @@ public class Main { test_StrictMath_max_F(); test_StrictMath_min_D(); test_StrictMath_max_D(); + test_StrictMath_ceil(); + test_StrictMath_floor(); + test_StrictMath_rint(); + test_StrictMath_round_D(); + test_StrictMath_round_F(); test_String_charAt(); test_String_compareTo(); test_String_indexOf(); @@ -376,6 +386,104 @@ public class Main { Assert.assertEquals(Math.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE); } + public static void test_Math_ceil() { + Assert.assertEquals(Math.ceil(+0.0), +0.0d, 0.0); + Assert.assertEquals(Math.ceil(-0.0), -0.0d, 0.0); + Assert.assertEquals(Math.ceil(-0.9), -0.0d, 0.0); + Assert.assertEquals(Math.ceil(-0.5), -0.0d, 0.0); + Assert.assertEquals(Math.ceil(0.0), -0.0d, 0.0); + Assert.assertEquals(Math.ceil(+2.0), +2.0d, 0.0); + Assert.assertEquals(Math.ceil(+2.1), +3.0d, 0.0); + Assert.assertEquals(Math.ceil(+2.5), +3.0d, 0.0); + Assert.assertEquals(Math.ceil(+2.9), +3.0d, 0.0); + Assert.assertEquals(Math.ceil(+3.0), +3.0d, 0.0); + Assert.assertEquals(Math.ceil(-2.0), -2.0d, 0.0); + Assert.assertEquals(Math.ceil(-2.1), -2.0d, 0.0); + Assert.assertEquals(Math.ceil(-2.5), -2.0d, 0.0); + Assert.assertEquals(Math.ceil(-2.9), -2.0d, 0.0); + Assert.assertEquals(Math.ceil(-3.0), -3.0d, 0.0); + Assert.assertEquals(Math.ceil(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(Math.ceil(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(Math.ceil(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_Math_floor() { + Assert.assertEquals(Math.floor(+0.0), +0.0d, 0.0); + Assert.assertEquals(Math.floor(-0.0), -0.0d, 0.0); + Assert.assertEquals(Math.floor(+2.0), +2.0d, 0.0); + Assert.assertEquals(Math.floor(+2.1), +2.0d, 0.0); + Assert.assertEquals(Math.floor(+2.5), +2.0d, 0.0); + Assert.assertEquals(Math.floor(+2.9), +2.0d, 0.0); + Assert.assertEquals(Math.floor(+3.0), +3.0d, 0.0); + Assert.assertEquals(Math.floor(-2.0), -2.0d, 0.0); + Assert.assertEquals(Math.floor(-2.1), -3.0d, 0.0); + Assert.assertEquals(Math.floor(-2.5), -3.0d, 0.0); + Assert.assertEquals(Math.floor(-2.9), -3.0d, 0.0); + Assert.assertEquals(Math.floor(-3.0), -3.0d, 0.0); + Assert.assertEquals(Math.floor(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(Math.floor(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(Math.floor(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_Math_rint() { + Assert.assertEquals(Math.rint(+0.0), +0.0d, 0.0); + Assert.assertEquals(Math.rint(-0.0), -0.0d, 0.0); + Assert.assertEquals(Math.rint(+2.0), +2.0d, 0.0); + Assert.assertEquals(Math.rint(+2.1), +2.0d, 0.0); + Assert.assertEquals(Math.rint(+2.5), +2.0d, 0.0); + Assert.assertEquals(Math.rint(+2.9), +3.0d, 0.0); + Assert.assertEquals(Math.rint(+3.0), +3.0d, 0.0); + Assert.assertEquals(Math.rint(-2.0), -2.0d, 0.0); + Assert.assertEquals(Math.rint(-2.1), -2.0d, 0.0); + Assert.assertEquals(Math.rint(-2.5), -2.0d, 0.0); + Assert.assertEquals(Math.rint(-2.9), -3.0d, 0.0); + Assert.assertEquals(Math.rint(-3.0), -3.0d, 0.0); + Assert.assertEquals(Math.rint(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(Math.rint(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(Math.rint(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_Math_round_D() { + Assert.assertEquals(Math.round(+0.0d), (long)+0.0); + Assert.assertEquals(Math.round(-0.0d), (long)+0.0); + Assert.assertEquals(Math.round(2.0d), 2l); + Assert.assertEquals(Math.round(2.1d), 2l); + Assert.assertEquals(Math.round(2.5d), 3l); + Assert.assertEquals(Math.round(2.9d), 3l); + Assert.assertEquals(Math.round(3.0d), 3l); + Assert.assertEquals(Math.round(-2.0d), -2l); + Assert.assertEquals(Math.round(-2.1d), -2l); + Assert.assertEquals(Math.round(-2.5d), -2l); + Assert.assertEquals(Math.round(-2.9d), -3l); + Assert.assertEquals(Math.round(-3.0d), -3l); + Assert.assertEquals(Math.round(0.49999999999999994d), 1l); + Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d); + Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE); + Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE); + Assert.assertEquals(Math.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE); + Assert.assertEquals(Math.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE); + } + + public static void test_Math_round_F() { + Assert.assertEquals(Math.round(+0.0f), (int)+0.0); + Assert.assertEquals(Math.round(-0.0f), (int)+0.0); + Assert.assertEquals(Math.round(2.0f), 2); + Assert.assertEquals(Math.round(2.1f), 2); + Assert.assertEquals(Math.round(2.5f), 3); + Assert.assertEquals(Math.round(2.9f), 3); + Assert.assertEquals(Math.round(3.0f), 3); + Assert.assertEquals(Math.round(-2.0f), -2); + Assert.assertEquals(Math.round(-2.1f), -2); + Assert.assertEquals(Math.round(-2.5f), -2); + Assert.assertEquals(Math.round(-2.9f), -3); + Assert.assertEquals(Math.round(-3.0f), -3); + Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f); + Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE); + Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE); + Assert.assertEquals(Math.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE); + Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE); + } + public static void test_StrictMath_abs_I() { Assert.assertEquals(StrictMath.abs(0), 0); Assert.assertEquals(StrictMath.abs(123), 123); @@ -487,6 +595,104 @@ public class Main { Assert.assertEquals(StrictMath.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE); } + public static void test_StrictMath_ceil() { + Assert.assertEquals(StrictMath.ceil(+0.0), +0.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-0.0), -0.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-0.9), -0.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-0.5), -0.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(0.0), -0.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(+2.0), +2.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(+2.1), +3.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(+2.5), +3.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(+2.9), +3.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(+3.0), +3.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-2.0), -2.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-2.1), -2.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-2.5), -2.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-2.9), -2.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(-3.0), -3.0d, 0.0); + Assert.assertEquals(StrictMath.ceil(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(StrictMath.ceil(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(StrictMath.ceil(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_StrictMath_floor() { + Assert.assertEquals(StrictMath.floor(+0.0), +0.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-0.0), -0.0d, 0.0); + Assert.assertEquals(StrictMath.floor(+2.0), +2.0d, 0.0); + Assert.assertEquals(StrictMath.floor(+2.1), +2.0d, 0.0); + Assert.assertEquals(StrictMath.floor(+2.5), +2.0d, 0.0); + Assert.assertEquals(StrictMath.floor(+2.9), +2.0d, 0.0); + Assert.assertEquals(StrictMath.floor(+3.0), +3.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-2.0), -2.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-2.1), -3.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-2.5), -3.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-2.9), -3.0d, 0.0); + Assert.assertEquals(StrictMath.floor(-3.0), -3.0d, 0.0); + Assert.assertEquals(StrictMath.floor(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(StrictMath.floor(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(StrictMath.floor(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_StrictMath_rint() { + Assert.assertEquals(StrictMath.rint(+0.0), +0.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-0.0), -0.0d, 0.0); + Assert.assertEquals(StrictMath.rint(+2.0), +2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(+2.1), +2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(+2.5), +2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(+2.9), +3.0d, 0.0); + Assert.assertEquals(StrictMath.rint(+3.0), +3.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-2.0), -2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-2.1), -2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-2.5), -2.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-2.9), -3.0d, 0.0); + Assert.assertEquals(StrictMath.rint(-3.0), -3.0d, 0.0); + Assert.assertEquals(StrictMath.rint(Double.NaN), Double.NaN, 0.0); + Assert.assertEquals(StrictMath.rint(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY, 0.0); + Assert.assertEquals(StrictMath.rint(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY, 0.0); + } + + public static void test_StrictMath_round_D() { + Assert.assertEquals(StrictMath.round(+0.0d), (long)+0.0); + Assert.assertEquals(StrictMath.round(-0.0d), (long)+0.0); + Assert.assertEquals(StrictMath.round(2.0d), 2l); + Assert.assertEquals(StrictMath.round(2.1d), 2l); + Assert.assertEquals(StrictMath.round(2.5d), 3l); + Assert.assertEquals(StrictMath.round(2.9d), 3l); + Assert.assertEquals(StrictMath.round(3.0d), 3l); + Assert.assertEquals(StrictMath.round(-2.0d), -2l); + Assert.assertEquals(StrictMath.round(-2.1d), -2l); + Assert.assertEquals(StrictMath.round(-2.5d), -2l); + Assert.assertEquals(StrictMath.round(-2.9d), -3l); + Assert.assertEquals(StrictMath.round(-3.0d), -3l); + Assert.assertEquals(StrictMath.round(0.49999999999999994d), 1l); + Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d); + Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE); + Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE); + Assert.assertEquals(StrictMath.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE); + Assert.assertEquals(StrictMath.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE); + } + + public static void test_StrictMath_round_F() { + Assert.assertEquals(StrictMath.round(+0.0f), (int)+0.0); + Assert.assertEquals(StrictMath.round(-0.0f), (int)+0.0); + Assert.assertEquals(StrictMath.round(2.0f), 2); + Assert.assertEquals(StrictMath.round(2.1f), 2); + Assert.assertEquals(StrictMath.round(2.5f), 3); + Assert.assertEquals(StrictMath.round(2.9f), 3); + Assert.assertEquals(StrictMath.round(3.0f), 3); + Assert.assertEquals(StrictMath.round(-2.0f), -2); + Assert.assertEquals(StrictMath.round(-2.1f), -2); + Assert.assertEquals(StrictMath.round(-2.5f), -2); + Assert.assertEquals(StrictMath.round(-2.9f), -3); + Assert.assertEquals(StrictMath.round(-3.0f), -3); + Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f); + Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE); + Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE); + Assert.assertEquals(StrictMath.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE); + Assert.assertEquals(StrictMath.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE); + } + public static void test_Float_floatToRawIntBits() { Assert.assertEquals(Float.floatToRawIntBits(-1.0f), 0xbf800000); Assert.assertEquals(Float.floatToRawIntBits(0.0f), 0); diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt index f852620..5b41606 100644 --- a/test/115-native-bridge/expected.txt +++ b/test/115-native-bridge/expected.txt @@ -1,13 +1,55 @@ Ready for native bridge tests. Native bridge initialized. Checking for support. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. -Getting trampoline. +Getting trampoline for JNI_OnLoad with shorty (null). +Test ART callbacks: all JNI function number is 9. + name:booleanMethod, signature:(ZZZZZZZZZZ)Z, shorty:ZZZZZZZZZZZ. + name:byteMethod, signature:(BBBBBBBBBB)B, shorty:BBBBBBBBBBB. + name:charMethod, signature:(CCCCCCCCCC)C, shorty:CCCCCCCCCCC. + name:shortMethod, signature:(SSSSSSSSSS)S, shorty:SSSSSSSSSSS. + name:testCallStaticVoidMethodOnSubClassNative, signature:()V, shorty:V. + name:testFindClassOnAttachedNativeThread, signature:()V, shorty:V. + name:testFindFieldOnAttachedNativeThreadNative, signature:()V, shorty:V. + name:testGetMirandaMethodNative, signature:()Ljava/lang/reflect/Method;, shorty:L. + name:testZeroLengthByteBuffers, signature:()V, shorty:V. +trampoline_JNI_OnLoad called! +Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V. +trampoline_Java_Main_testFindClassOnAttachedNativeThread called! +Getting trampoline for Java_Main_testFindFieldOnAttachedNativeThreadNative with shorty V. +trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative called! +Getting trampoline for Java_Main_testCallStaticVoidMethodOnSubClassNative with shorty V. +trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative called! +Getting trampoline for Java_Main_testGetMirandaMethodNative with shorty L. +trampoline_Java_Main_testGetMirandaMethodNative called! +Getting trampoline for Java_Main_testZeroLengthByteBuffers with shorty V. +trampoline_Java_Main_testZeroLengthByteBuffers called! +Getting trampoline for Java_Main_byteMethod with shorty BBBBBBBBBBB. +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +trampoline_Java_Main_byteMethod called! +Getting trampoline for Java_Main_shortMethod with shorty SSSSSSSSSSS. +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +trampoline_Java_Main_shortMethod called! +Getting trampoline for Java_Main_booleanMethod with shorty ZZZZZZZZZZZ. +trampoline_Java_Main_booleanMethod called! +trampoline_Java_Main_booleanMethod called! +Getting trampoline for Java_Main_charMethod with shorty CCCCCCCCCCC. +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! +trampoline_Java_Main_charMethod called! diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc index bd3ae13..82211a5 100644 --- a/test/115-native-bridge/nativebridge.cc +++ b/test/115-native-bridge/nativebridge.cc @@ -44,13 +44,192 @@ struct NativeBridgeCallbacks { bool (*isSupported)(const char* libpath); }; +struct NativeBridgeMethod { + const char* name; + const char* signature; + bool static_method; + void* fnPtr; + void* trampoline; +}; + +static NativeBridgeMethod* find_native_bridge_method(const char *name); +static NativeBridgeArtCallbacks* gNativeBridgeArtCallbacks; + +static jint trampoline_JNI_OnLoad(JavaVM* vm, void* reserved) { + JNIEnv* env = nullptr; + typedef jint (*FnPtr_t)(JavaVM*, void*); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("JNI_OnLoad")->fnPtr); + + vm->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6); + if (env == nullptr) { + return 0; + } + + jclass klass = env->FindClass("Main"); + if (klass != nullptr) { + int i, count1, count2; + count1 = gNativeBridgeArtCallbacks->getNativeMethodCount(env, klass); + std::unique_ptr<JNINativeMethod[]> methods(new JNINativeMethod[count1]); + if (methods == nullptr) { + return 0; + } + count2 = gNativeBridgeArtCallbacks->getNativeMethods(env, klass, methods.get(), count1); + if (count1 == count2) { + printf("Test ART callbacks: all JNI function number is %d.\n", count1); + } + + for (i = 0; i < count1; i++) { + NativeBridgeMethod* nb_method = find_native_bridge_method(methods[i].name); + if (nb_method != nullptr) { + jmethodID mid = nullptr; + if (nb_method->static_method) { + mid = env->GetStaticMethodID(klass, methods[i].name, nb_method->signature); + } else { + mid = env->GetMethodID(klass, methods[i].name, nb_method->signature); + } + if (mid != nullptr) { + const char* shorty = gNativeBridgeArtCallbacks->getMethodShorty(env, mid); + if (strcmp(shorty, methods[i].signature) == 0) { + printf(" name:%s, signature:%s, shorty:%s.\n", + methods[i].name, nb_method->signature, shorty); + } + } + } + } + methods.release(); + } + + printf("%s called!\n", __FUNCTION__); + return fnPtr(vm, reserved); +} + +static void trampoline_Java_Main_testFindClassOnAttachedNativeThread(JNIEnv* env, + jclass klass) { + typedef void (*FnPtr_t)(JNIEnv*, jclass); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> + (find_native_bridge_method("testFindClassOnAttachedNativeThread")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass); +} + +static void trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEnv* env, + jclass klass) { + typedef void (*FnPtr_t)(JNIEnv*, jclass); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> + (find_native_bridge_method("testFindFieldOnAttachedNativeThreadNative")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass); +} + +static void trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env, + jclass klass) { + typedef void (*FnPtr_t)(JNIEnv*, jclass); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> + (find_native_bridge_method("testCallStaticVoidMethodOnSubClassNative")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass); +} + +static jobject trampoline_Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass klass) { + typedef jobject (*FnPtr_t)(JNIEnv*, jclass); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> + (find_native_bridge_method("testGetMirandaMethodNative")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass); +} +static void trampoline_Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass klass) { + typedef void (*FnPtr_t)(JNIEnv*, jclass); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t> + (find_native_bridge_method("testZeroLengthByteBuffers")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass); +} + +static jbyte trampoline_Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2, + jbyte b3, jbyte b4, jbyte b5, jbyte b6, + jbyte b7, jbyte b8, jbyte b9, jbyte b10) { + typedef jbyte (*FnPtr_t)(JNIEnv*, jclass, jbyte, jbyte, jbyte, jbyte, jbyte, + jbyte, jbyte, jbyte, jbyte, jbyte); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("byteMethod")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10); +} -static std::vector<void*> symbols; +static jshort trampoline_Java_Main_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2, + jshort s3, jshort s4, jshort s5, jshort s6, + jshort s7, jshort s8, jshort s9, jshort s10) { + typedef jshort (*FnPtr_t)(JNIEnv*, jclass, jshort, jshort, jshort, jshort, jshort, + jshort, jshort, jshort, jshort, jshort); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("shortMethod")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10); +} + +static jboolean trampoline_Java_Main_booleanMethod(JNIEnv* env, jclass klass, jboolean b1, + jboolean b2, jboolean b3, jboolean b4, + jboolean b5, jboolean b6, jboolean b7, + jboolean b8, jboolean b9, jboolean b10) { + typedef jboolean (*FnPtr_t)(JNIEnv*, jclass, jboolean, jboolean, jboolean, jboolean, jboolean, + jboolean, jboolean, jboolean, jboolean, jboolean); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("booleanMethod")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10); +} + +static jchar trampoline_Java_Main_charMethod(JNIEnv* env, jclass klass, jchar c1, jchar c2, + jchar c3, jchar c4, jchar c5, jchar c6, + jchar c7, jchar c8, jchar c9, jchar c10) { + typedef jchar (*FnPtr_t)(JNIEnv*, jclass, jchar, jchar, jchar, jchar, jchar, + jchar, jchar, jchar, jchar, jchar); + FnPtr_t fnPtr = reinterpret_cast<FnPtr_t>(find_native_bridge_method("charMethod")->fnPtr); + printf("%s called!\n", __FUNCTION__); + return fnPtr(env, klass, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10); +} + +NativeBridgeMethod gNativeBridgeMethods[] = { + { "JNI_OnLoad", "", true, nullptr, + reinterpret_cast<void*>(trampoline_JNI_OnLoad) }, + { "booleanMethod", "(ZZZZZZZZZZ)Z", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_booleanMethod) }, + { "byteMethod", "(BBBBBBBBBB)B", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_byteMethod) }, + { "charMethod", "(CCCCCCCCCC)C", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_charMethod) }, + { "shortMethod", "(SSSSSSSSSS)S", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_shortMethod) }, + { "testCallStaticVoidMethodOnSubClassNative", "()V", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_testCallStaticVoidMethodOnSubClassNative) }, + { "testFindClassOnAttachedNativeThread", "()V", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_testFindClassOnAttachedNativeThread) }, + { "testFindFieldOnAttachedNativeThreadNative", "()V", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_testFindFieldOnAttachedNativeThreadNative) }, + { "testGetMirandaMethodNative", "()Ljava/lang/reflect/Method;", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_testGetMirandaMethodNative) }, + { "testZeroLengthByteBuffers", "()V", true, nullptr, + reinterpret_cast<void*>(trampoline_Java_Main_testZeroLengthByteBuffers) }, +}; + +static NativeBridgeMethod* find_native_bridge_method(const char *name) { + const char* pname = name; + if (strncmp(name, "Java_Main_", 10) == 0) { + pname += 10; + } + + for (size_t i = 0; i < sizeof(gNativeBridgeMethods) / sizeof(gNativeBridgeMethods[0]); i++) { + if (strcmp(pname, gNativeBridgeMethods[i].name) == 0) { + return &gNativeBridgeMethods[i]; + } + } + return nullptr; +} // NativeBridgeCallbacks implementations extern "C" bool native_bridge_initialize(NativeBridgeArtCallbacks* art_cbs) { - printf("Native bridge initialized.\n"); + if (art_cbs != nullptr) { + gNativeBridgeArtCallbacks = art_cbs; + printf("Native bridge initialized.\n"); + } return true; } @@ -80,17 +259,16 @@ extern "C" void* native_bridge_loadLibrary(const char* libpath, int flag) { extern "C" void* native_bridge_getTrampoline(void* handle, const char* name, const char* shorty, uint32_t len) { - printf("Getting trampoline.\n"); + printf("Getting trampoline for %s with shorty %s.\n", name, shorty); // The name here is actually the JNI name, so we can directly do the lookup. void* sym = dlsym(handle, name); - if (sym != nullptr) { - symbols.push_back(sym); - } + NativeBridgeMethod* method = find_native_bridge_method(name); + if (method == nullptr) + return nullptr; + method->fnPtr = sym; - // As libarttest is the same arch as the host, we can actually directly use the code and do not - // need to create a trampoline. :-) - return sym; + return method->trampoline; } extern "C" bool native_bridge_isSupported(const char* libpath) { @@ -109,6 +287,3 @@ NativeBridgeCallbacks NativeBridgeItf { .getTrampoline = &native_bridge_getTrampoline, .isSupported = &native_bridge_isSupported }; - - - diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 5c1bc03..d7ee383 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -81,38 +81,10 @@ endif # Tests that are broken in --trace mode. TEST_ART_BROKEN_TRACE_RUN_TESTS := \ - 003-omnibus-opcodes \ - 004-InterfaceTest \ 004-SignalTest \ - 004-ThreadStress \ - 005-annotations \ - 012-math \ 018-stack-overflow \ - 023-many-interfaces \ - 027-arithmetic \ - 031-class-attributes \ - 037-inherit \ - 044-proxy \ - 046-reflect \ - 051-thread \ - 055-enum-performance \ - 062-character-encodings \ - 064-field-access \ - 074-gc-thrash \ - 078-polymorphic-virtual \ - 080-oom-throw \ - 082-inline-execute \ - 083-compiler-regressions \ - 093-serialization \ 097-duplicate-method \ - 100-reflect2 \ - 102-concurrent-gc \ - 103-string-append \ - 107-int-math2 \ - 112-double-math \ - 114-ParallelGC \ - 700-LoadArgRegs \ - 701-easy-div-rem + 107-int-math2 ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-relocate)) ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-no-prebuild)) diff --git a/test/run-test b/test/run-test index aef7c52..ca7e68c 100755 --- a/test/run-test +++ b/test/run-test @@ -33,7 +33,11 @@ cd "${progdir}" progdir=`pwd` prog="${progdir}"/`basename "${prog}"` test_dir="test-$$" -tmp_dir="/tmp/$USER/${test_dir}" +if [ -z "$TMPDIR" ]; then + tmp_dir="/tmp/$USER/${test_dir}" +else + tmp_dir="${TMPDIR}/$USER/${test_dir}" +fi export JAVA="java" export JAVAC="javac -g" |