diff options
49 files changed, 782 insertions, 142 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index b84154b..169a651 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -83,7 +83,8 @@ ART_TARGET_CLANG := $(USE_CLANG_PLATFORM_BUILD) else ART_TARGET_CLANG := false endif -ART_TARGET_CLANG_arm := +# b/25130937 +ART_TARGET_CLANG_arm := false ART_TARGET_CLANG_arm64 := ART_TARGET_CLANG_mips := ART_TARGET_CLANG_mips64 := diff --git a/compiler/Android.mk b/compiler/Android.mk index 5770edf..1ed0490 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -243,6 +243,11 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endif endif + ifneq ($(TARGET_HAVE_QC_PERF),true) + # CAF bailout patches break dex2oat on some devices - disable them if unneeded + LOCAL_CFLAGS += -DDISABLE_CAF_BAILOUT + endif + LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime ifeq ($$(art_target_or_host),host) @@ -263,6 +268,11 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT ifeq ($$(art_target_or_host),target) # For atrace. LOCAL_SHARED_LIBRARIES += libcutils + ifeq ($(TARGET_HAVE_QC_PERF),true) + # FIXME! Current PERF is built with GCC and needs emutls + LOCAL_CLANG := false + LOCAL_WHOLE_STATIC_LIBRARIES += libqc-art-compiler + endif include $(BUILD_SHARED_LIBRARY) else # host LOCAL_MULTILIB := both diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 02d5327..2fd32ce 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -447,6 +447,17 @@ class SuspendCheckElimination : public PassME { } }; +// dummy pass, for placeholder only +class DummyPass : public PassME { + public: + DummyPass() : PassME("DummyPass", kNoNodes, "") { + } + + bool Gate(const PassDataHolder*) const { + return false; + } +}; + } // namespace art #endif // ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index b78b3d7..7832379 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -355,6 +355,7 @@ enum MIROptimizationFlagPositions { kMIRInlinedPred, // Invoke is inlined via prediction. kMIRCallee, // Instruction is inlined from callee. kMIRIgnoreSuspendCheck, + kMIRIgnoreZeroDivCheck, kMIRDup, kMIRMark, // Temporary node mark can be used by // opt passes for their private needs. diff --git a/compiler/dex/compiler_ir.cc b/compiler/dex/compiler_ir.cc index 6e1853b..3a25b03 100644 --- a/compiler/dex/compiler_ir.cc +++ b/compiler/dex/compiler_ir.cc @@ -47,7 +47,33 @@ CompilationUnit::CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDr mir_graph(nullptr), cg(nullptr), timings("QuickCompiler", true, false), - print_pass(false) { + print_pass(false), + compiler_(nullptr) { +} + +CompilationUnit::CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, + ClassLinker* linker, const QuickCompiler* compiler) + : compiler_driver(driver), + class_linker(linker), + dex_file(nullptr), + class_loader(nullptr), + class_def_idx(0), + method_idx(0), + access_flags(0), + invoke_type(kDirect), + shorty(nullptr), + disable_opt(0), + enable_debug(0), + verbose(false), + instruction_set(isa), + target64(Is64BitInstructionSet(isa)), + arena(pool), + arena_stack(pool), + mir_graph(nullptr), + cg(nullptr), + timings("QuickCompiler", true, false), + print_pass(false), + compiler_(compiler) { } CompilationUnit::~CompilationUnit() { diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index d28df1d..dc5af72 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -27,6 +27,7 @@ #include "base/timing_logger.h" #include "invoke_type.h" #include "safe_map.h" +#include "quick/quick_compiler.h" namespace art { @@ -161,6 +162,8 @@ struct OptionContent { struct CompilationUnit { CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, ClassLinker* linker); + CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, ClassLinker* linker, + const QuickCompiler* compiler); ~CompilationUnit(); void StartTimingSplit(const char* label); @@ -194,7 +197,7 @@ struct CompilationUnit { std::unique_ptr<Mir2Lir> cg; // Target-specific codegen. TimingLogger timings; bool print_pass; // Do we want to print a pass or not? - + const QuickCompiler* compiler_; /** * @brief Holds pass options for current pass being applied to compilation unit. * @details This is updated for every pass to contain the overridden pass options diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index a7ba061..e95a7a1 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -471,10 +471,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 93 DIV_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C | DF_ZERO_DIV_CHECK, // 94 REM_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C | DF_ZERO_DIV_CHECK, // 95 AND_INT vAA, vBB, vCC DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, @@ -504,10 +504,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9E DIV_LONG vAA, vBB, vCC - DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, + DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C | DF_ZERO_DIV_CHECK, // 9F REM_LONG vAA, vBB, vCC - DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, + DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C | DF_ZERO_DIV_CHECK, // A0 AND_LONG vAA, vBB, vCC DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, @@ -537,10 +537,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C, // A9 DIV_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C | DF_ZERO_DIV_CHECK, // AA REM_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C | DF_ZERO_DIV_CHECK, // AB ADD_DOUBLE vAA, vBB, vCC DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, @@ -552,10 +552,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, // AE DIV_DOUBLE vAA, vBB, vCC - DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, + DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C | DF_ZERO_DIV_CHECK, // AF REM_DOUBLE vAA, vBB, vCC - DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, + DF_DA | DF_A_WIDE | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_FP_A | DF_FP_B | DF_FP_C | DF_ZERO_DIV_CHECK, // B0 ADD_INT_2ADDR vA, vB DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, @@ -567,10 +567,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B3 DIV_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B | DF_ZERO_DIV_CHECK, // B4 REM_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B | DF_ZERO_DIV_CHECK, // B5 AND_INT_2ADDR vA, vB DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, @@ -600,10 +600,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B, // BE DIV_LONG_2ADDR vA, vB - DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B, + DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B | DF_ZERO_DIV_CHECK, // BF REM_LONG_2ADDR vA, vB - DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B, + DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B | DF_ZERO_DIV_CHECK, // C0 AND_LONG_2ADDR vA, vB DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_CORE_A | DF_CORE_B, @@ -633,10 +633,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B, // C9 DIV_FLOAT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B, + DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B | DF_ZERO_DIV_CHECK, // CA REM_FLOAT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B, + DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B | DF_ZERO_DIV_CHECK, // CB ADD_DOUBLE_2ADDR vA, vB DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B, @@ -648,10 +648,10 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B, // CE DIV_DOUBLE_2ADDR vA, vB - DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B, + DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B | DF_ZERO_DIV_CHECK, // CF REM_DOUBLE_2ADDR vA, vB - DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B, + DF_DA | DF_A_WIDE | DF_UA | DF_UB | DF_B_WIDE | DF_FP_A | DF_FP_B | DF_ZERO_DIV_CHECK, // D0 ADD_INT_LIT16 vA, vB, #+CCCC DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, @@ -1373,7 +1373,7 @@ void MIRGraph::CompilerInitializeSSAConversion() { vreg_to_ssa_map_[i] = i; ssa_last_defs_[i] = 0; } - + reg_location_ = nullptr; // Create a compiler temporary for Method*. This is done after SSA initialization. CompilerTemp* method_temp = GetNewCompilerTemp(kCompilerTempSpecialMethodPtr, false); // The MIR graph keeps track of the sreg for method pointer specially, so record that now. @@ -1434,7 +1434,7 @@ bool MIRGraph::VerifyPredInfo(BasicBlock* bb) { char block_name1[BLOCK_NAME_LEN], block_name2[BLOCK_NAME_LEN]; GetBlockName(bb, block_name1); GetBlockName(pred_bb, block_name2); - DumpCFG("/sdcard/cfg/", false); + DumpCFG("/data/quick/", false); LOG(FATAL) << "Successor " << block_name1 << " not found from " << block_name2; } diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 920be0b4..01b6e47 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -136,7 +136,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) ifield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)), sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)), method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)), - suspend_checks_in_loops_(nullptr) { + suspend_checks_in_loops_(nullptr), + pass_failed_(false), + qcm(nullptr) { memset(&temp_, 0, sizeof(temp_)); use_counts_.reserve(256); raw_use_counts_.reserve(256); @@ -161,8 +163,11 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) MIRGraph::~MIRGraph() { STLDeleteElements(&block_list_); STLDeleteElements(&m_units_); + CleanupGraphData(); } +void MIRGraph::CleanupGraphData() { +} /* * Parse an instruction, return the length of the instruction */ @@ -880,7 +885,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ merged_df_flags_ = merged_df_flags; if (cu_->enable_debug & (1 << kDebugDumpCFG)) { - DumpCFG("/sdcard/1_post_parse_cfg/", true); + DumpCFG("/data/quick/1_post_parse_cfg/", true); } if (cu_->verbose) { @@ -931,9 +936,10 @@ uint64_t MIRGraph::GetDataFlowAttributes(MIR* mir) { // It's possible the path is not valid, or some other errors appear. In that case return false. static bool CreateDumpFile(std::string& fname, const char* dir_prefix, NarrowDexOffset start_offset, const char *suffix, int nr, std::string* output) { - std::string dir = StringPrintf("./%s", dir_prefix); + std::string dir = StringPrintf("%s", dir_prefix); + errno = 0; int64_t max_name_length = pathconf(dir.c_str(), _PC_NAME_MAX); - if (max_name_length <= 0) { + if (max_name_length <= 0 && errno != 0) { PLOG(ERROR) << "Could not get file name restrictions for " << dir; return false; } @@ -952,6 +958,10 @@ static bool CreateDumpFile(std::string& fname, const char* dir_prefix, NarrowDex return true; } +const char * MIRGraph::GetExtendedMirOpName(int index) { + return extended_mir_op_names_[index]; +} + // TODO: use a configurable base prefix, and adjust callers to supply pass name. /* Dump the CFG into a DOT graph */ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suffix) { @@ -1004,7 +1014,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff mir->ssa_rep ? GetDalvikDisassembly(mir) : !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) : - extended_mir_op_names_[opcode - kMirOpFirst], + MIRGraph::GetExtendedMirOpName(opcode - kMirOpFirst), (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", (mir->optimization_flags & MIR_IGNORE_SUSPEND_CHECK) != 0 ? " no_suspendcheck" : " ", @@ -1307,7 +1317,7 @@ void MIRGraph::DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir return; // It is not an extended instruction. } - decoded_mir->append(extended_mir_op_names_[opcode - kMirOpFirst]); + decoded_mir->append(MIRGraph::GetExtendedMirOpName(opcode - kMirOpFirst)); switch (opcode) { case kMirOpPhi: { @@ -1510,7 +1520,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { // Handle special cases that recover the original dalvik instruction. if (opcode == kMirOpCheck) { - str.append(extended_mir_op_names_[opcode - kMirOpFirst]); + str.append(MIRGraph::GetExtendedMirOpName(opcode - kMirOpFirst)); str.append(": "); // Recover the original Dex instruction. insn = mir->meta.throw_insn->dalvikInsn; diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 23b7c42..e60172f 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -32,6 +32,12 @@ #include "reg_storage.h" #include "utils/arena_bit_vector.h" +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + namespace art { struct CompilationUnit; @@ -82,6 +88,7 @@ enum DataFlowAttributePos { kUsesSField, // Accesses a static field (SGET/SPUT). kCanInitializeClass, // Can trigger class initialization (SGET/SPUT/INVOKE_STATIC). kDoLVN, // Worth computing local value numbers. + kZeroDivCheck, // check for zero divider }; #define DF_NOP UINT64_C(0) @@ -121,6 +128,7 @@ enum DataFlowAttributePos { #define DF_SFIELD (UINT64_C(1) << kUsesSField) #define DF_CLINIT (UINT64_C(1) << kCanInitializeClass) #define DF_LVN (UINT64_C(1) << kDoLVN) +#define DF_ZERO_DIV_CHECK (UINT64_C(1) << kZeroDivCheck) #define DF_HAS_USES (DF_UA | DF_UB | DF_UC) @@ -165,6 +173,7 @@ enum OatMethodAttributes { #define MIR_INLINED_PRED (1 << kMIRInlinedPred) #define MIR_CALLEE (1 << kMIRCallee) #define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck) +#define MIR_IGNORE_ZERO_DIV_CHECK (1 << kMIRIgnoreZeroDivCheck) #define MIR_DUP (1 << kMIRDup) #define MIR_MARK (1 << kMIRMark) #define MIR_STORE_NON_TEMPORAL (1 << kMIRStoreNonTemporal) @@ -242,6 +251,8 @@ struct SSARepresentation { static uint32_t GetStartUseIndex(Instruction::Code opcode); }; +struct ExtendedMIR; + /* * The Midlevel Intermediate Representation node, which may be largely considered a * wrapper around a Dalvik byte code. @@ -354,7 +365,7 @@ class MIR : public ArenaObject<kArenaAllocMIR> { } meta; explicit MIR() : offset(0), optimization_flags(0), m_unit_index(0), bb(NullBasicBlockId), - next(nullptr), ssa_rep(nullptr) { + next(nullptr), ssa_rep(nullptr), extraData(nullptr) { memset(&meta, 0, sizeof(meta)); } @@ -364,6 +375,8 @@ class MIR : public ArenaObject<kArenaAllocMIR> { MIR* Copy(CompilationUnit *c_unit); MIR* Copy(MIRGraph* mir_Graph); + + ExtendedMIR* extraData; }; struct SuccessorBlockInfo; @@ -552,6 +565,9 @@ struct CallInfo { const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, RegStorage(), INVALID_SREG, INVALID_SREG}; + +class QCMIRGraph; + class MIRGraph { public: MIRGraph(CompilationUnit* cu, ArenaAllocator* arena); @@ -833,6 +849,12 @@ class MIRGraph { */ void SetConstantWide(int32_t ssa_reg, int64_t value); + int64_t ConstantValueWide(int32_t s_reg) const { + DCHECK(IsConst(s_reg)); + return (static_cast<int64_t>(constant_values_[s_reg + 1]) << 32) | + Low32Bits(static_cast<int64_t>(constant_values_[s_reg])); + } + bool IsConstantNullRef(RegLocation loc) const { return loc.ref && loc.is_const && (ConstantValue(loc) == 0); } @@ -1221,7 +1243,7 @@ class MIRGraph { */ void CountUses(BasicBlock* bb); - static uint64_t GetDataFlowAttributes(Instruction::Code opcode); + static uint64_t GetDataFlowAttributes(Instruction::Code opcode) QC_WEAK; static uint64_t GetDataFlowAttributes(MIR* mir); /** @@ -1271,8 +1293,16 @@ class MIRGraph { static const char* extended_mir_op_names_[kMirOpLast - kMirOpFirst]; + static const char * GetExtendedMirOpName(int index) QC_WEAK; void HandleSSADef(int* defs, int dalvik_reg, int reg_index); + void CleanupGraphData() QC_WEAK; + + bool SupportMLA() QC_WEAK; + + void SetPassFail() { pass_failed_ = true; } + bool PassFailed() { return pass_failed_; } + protected: int FindCommonParent(int block1, int block2); void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1, @@ -1470,6 +1500,9 @@ class MIRGraph { static const uint64_t oat_data_flow_attributes_[kMirOpLast]; + // flag marks if optimizing pass has failed + bool pass_failed_; + friend class MirOptimizationTest; friend class ClassInitCheckEliminationTest; friend class SuspendCheckEliminationTest; @@ -1481,6 +1514,11 @@ class MIRGraph { friend class TypeInferenceTest; friend class QuickCFITest; friend class QuickAssembleX86TestBase; + + friend class QCMIRGraph; + + public: + QCMIRGraph* qcm; }; } // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 80b7ac1..0e045b3 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -464,6 +464,10 @@ static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) return is_taken; } +bool MIRGraph::SupportMLA() { + return true; +} + /* Do some MIR-level extended basic block optimizations */ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { if (bb->block_type == kDead) { @@ -471,7 +475,8 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { } // Currently multiply-accumulate backend supports are only available on arm32 and arm64. if (cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2) { - MultiplyAddOpt(bb); + if (SupportMLA()) + MultiplyAddOpt(bb); } bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kLocalValueNumbering)) == 0u; std::unique_ptr<ScopedArenaAllocator> allocator; diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h index 8762b53..fcb6f7d 100644 --- a/compiler/dex/pass_driver.h +++ b/compiler/dex/pass_driver.h @@ -25,11 +25,19 @@ namespace art { +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + class Pass; class PassDataHolder; class PassDriver; class PassManager; +const Pass* GetMorePassInstance() QC_WEAK; + // Empty holder for the constructor. class PassDriverDataHolder { }; diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h index cbe4a02..c7eaddd 100644 --- a/compiler/dex/pass_driver_me.h +++ b/compiler/dex/pass_driver_me.h @@ -37,7 +37,7 @@ class PassManagerOptions; class PassDriverME: public PassDriver { public: explicit PassDriverME(const PassManager* const pass_manager, CompilationUnit* cu) - : PassDriver(pass_manager), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { + : PassDriver(pass_manager), pass_me_data_holder_(), dump_cfg_folder_("/data/quick/") { pass_me_data_holder_.bb = nullptr; pass_me_data_holder_.c_unit = cu; } diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc index 375003b..9b370f0 100644 --- a/compiler/dex/pass_driver_me_opts.cc +++ b/compiler/dex/pass_driver_me_opts.cc @@ -27,6 +27,10 @@ namespace art { +const Pass* GetMorePassInstance() { + return new DummyPass; +} + void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) { /* * Create the pass list. These passes are immutable and are shared across the threads. @@ -52,6 +56,7 @@ void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) { pass_manager->AddPass(new MethodUseCount); pass_manager->AddPass(new BBOptimizations); pass_manager->AddPass(new SuspendCheckElimination); + pass_manager->AddPass(GetMorePassInstance()); } void PassDriverMEOpts::ApplyPass(PassDataHolder* data, const Pass* pass) { diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index df4a9f2..7751676 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1082,6 +1082,19 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { */ #define PADDING_MOV_R5_R5 0x1C2D +uint32_t ArmMir2Lir::ProcessMoreEncodings(const ArmEncodingMap* encoder, int i, uint32_t operand) { + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind << " " << operand; + uint32_t value = 0; + return value; +} + +const ArmEncodingMap * ArmMir2Lir::GetEncoder(int opcode) { + if (opcode < 0 || opcode >= kArmLast) + LOG(FATAL) << "invalid opcode " << opcode; + const ArmEncodingMap *encoder = &EncodingMap[opcode]; + return encoder; +} + uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { uint8_t* const write_buffer = write_pos; for (; lir != nullptr; lir = NEXT_LIR(lir)) { @@ -1098,7 +1111,7 @@ uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { } } } else if (LIKELY(!lir->flags.is_nop)) { - const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + const ArmEncodingMap *encoder = GetEncoder(lir->opcode); uint32_t bits = encoder->skeleton; for (int i = 0; i < 4; i++) { uint32_t operand; @@ -1214,7 +1227,8 @@ uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { } break; default: - LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + bits |= ProcessMoreEncodings(encoder, i, operand); + break; } } } @@ -1324,7 +1338,7 @@ void ArmMir2Lir::AssembleLIR() { base_reg, 0, 0, 0, 0, lir->target); new_adr->offset = lir->offset; new_adr->flags.fixup = kFixupAdr; - new_adr->flags.size = EncodingMap[kThumb2Adr].size; + new_adr->flags.size = GetEncoder(kThumb2Adr)->size; InsertLIRBefore(lir, new_adr); lir->offset += new_adr->flags.size; offset_adjustment += new_adr->flags.size; @@ -1339,7 +1353,7 @@ void ArmMir2Lir::AssembleLIR() { } else if (lir->opcode == kThumb2LdrdPcRel8) { lir->opcode = kThumb2LdrdI8; } - lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.size = GetEncoder(lir->opcode)->size; offset_adjustment += lir->flags.size; // Change the load to be relative to the new Adr base. if (lir->opcode == kThumb2LdrdI8) { @@ -1389,13 +1403,13 @@ void ArmMir2Lir::AssembleLIR() { /* operand[0] is src1 in both cb[n]z & CmpRI8 */ lir->operands[1] = 0; lir->target = 0; - lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.size = GetEncoder(lir->opcode)->size; // Add back the new size. offset_adjustment += lir->flags.size; // Set up the new following inst. new_inst->offset = lir->offset + lir->flags.size; new_inst->flags.fixup = kFixupCondBranch; - new_inst->flags.size = EncodingMap[new_inst->opcode].size; + new_inst->flags.size = GetEncoder(new_inst->opcode)->size; offset_adjustment += new_inst->flags.size; // lir no longer pcrel, unlink and link in new_inst. @@ -1420,7 +1434,7 @@ void ArmMir2Lir::AssembleLIR() { if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { offset_adjustment -= lir->flags.size; lir->opcode = kThumb2BCond; - lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.size = GetEncoder(lir->opcode)->size; // Fixup kind remains the same. offset_adjustment += lir->flags.size; res = kRetryAll; @@ -1456,7 +1470,7 @@ void ArmMir2Lir::AssembleLIR() { offset_adjustment -= lir->flags.size; lir->opcode = kThumb2BUncond; lir->operands[0] = 0; - lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.size = GetEncoder(lir->opcode)->size; lir->flags.fixup = kFixupT2Branch; offset_adjustment += lir->flags.size; res = kRetryAll; @@ -1518,7 +1532,7 @@ void ArmMir2Lir::AssembleLIR() { LIR *new_mov16L = RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0, WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); - new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size; + new_mov16L->flags.size = GetEncoder(new_mov16L->opcode)->size; new_mov16L->flags.fixup = kFixupMovImmLST; new_mov16L->offset = lir->offset; // Link the new instruction, retaining lir. @@ -1530,7 +1544,7 @@ void ArmMir2Lir::AssembleLIR() { LIR *new_mov16H = RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0, WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); - new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size; + new_mov16H->flags.size = GetEncoder(new_mov16H->opcode)->size; new_mov16H->flags.fixup = kFixupMovImmHST; new_mov16H->offset = lir->offset; // Link the new instruction, retaining lir. @@ -1547,7 +1561,7 @@ void ArmMir2Lir::AssembleLIR() { lir->opcode = kThumbAddRRHH; } lir->operands[1] = rs_rARM_PC.GetReg(); - lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.size = GetEncoder(lir->opcode)->size; offset_adjustment += lir->flags.size; // Must stay in fixup list and have offset updated; will be used by LST/HSP pair. lir->flags.fixup = kFixupNone; @@ -1635,7 +1649,7 @@ void ArmMir2Lir::AssembleLIR() { size_t ArmMir2Lir::GetInsnSize(LIR* lir) { DCHECK(!IsPseudoLirOp(lir->opcode)); - return EncodingMap[lir->opcode].size; + return GetEncoder(lir->opcode)->size; } // Encode instruction bit pattern and assign offsets. @@ -1647,8 +1661,8 @@ uint32_t ArmMir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offse if (!lir->flags.is_nop) { if (lir->flags.fixup != kFixupNone) { if (!IsPseudoLirOp(lir->opcode)) { - lir->flags.size = EncodingMap[lir->opcode].size; - lir->flags.fixup = EncodingMap[lir->opcode].fixup; + lir->flags.size = GetEncoder(lir->opcode)->size; + lir->flags.fixup = GetEncoder(lir->opcode)->fixup; } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { lir->flags.size = (offset & 0x2); lir->flags.fixup = kFixupAlign4; diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index b94e707..3ae8790 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -22,10 +22,18 @@ #include "base/logging.h" #include "dex/quick/mir_to_lir.h" +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + namespace art { struct CompilationUnit; +class QCArmMir2Lir; + class ArmMir2Lir FINAL : public Mir2Lir { protected: // Inherited class for ARM backend. @@ -57,6 +65,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { public: ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + ~ArmMir2Lir(); // Required for target - codegen helpers. bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, @@ -206,6 +215,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* OpMem(OpKind op, RegStorage r_base, int disp); void OpPcRelLoad(RegStorage reg, LIR* target); LIR* OpReg(OpKind op, RegStorage r_dest_src); + LIR* OpBkpt(); void OpRegCopy(RegStorage r_dest, RegStorage r_src); LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); @@ -271,12 +281,16 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; size_t GetInstructionOffset(LIR* lir); + void GenMoreMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) QC_WEAK; + // void MachineSpecificPreprocessMIR(BasicBlock* bb, MIR* mir); void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE; bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; + void CleanupCodeGenData() QC_WEAK; + private: void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -301,6 +315,12 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); + + + static uint32_t ProcessMoreEncodings(const ArmEncodingMap* encoder, int i, uint32_t operand) QC_WEAK; + + static const ArmEncodingMap * GetEncoder(int opcode) QC_WEAK; + static constexpr ResourceMask GetRegMaskArm(RegStorage reg); static constexpr ResourceMask EncodeArmRegList(int reg_list); static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list); @@ -351,6 +371,16 @@ class ArmMir2Lir FINAL : public Mir2Lir { InvokeType type); void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest); + + virtual void ApplyArchOptimizations(LIR* head_lir, LIR* tail_lir, BasicBlock* bb) QC_WEAK; + + void CompilerPostInitializeRegAlloc() QC_WEAK; + void ArmMir2LirPostInit(ArmMir2Lir* mir_to_lir) QC_WEAK; + + friend class QCArmMir2Lir; + + public: + QCArmMir2Lir * qcm2l; }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index cf01884..4ddf616 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -754,8 +754,7 @@ RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStora RegStorage temp = AllocTemp(); OpRegRegReg(kOpDiv, temp, reg1, reg2); - OpRegReg(kOpMul, temp, reg2); - OpRegRegReg(kOpSub, rl_result.reg, reg1, temp); + NewLIR4(kThumb2Mls, rl_result.reg.GetReg(), temp.GetReg(), reg2.GetReg(), reg1.GetReg()); FreeTemp(temp); } diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 5f27338..fd16bb8 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -140,11 +140,17 @@ ResourceMask ArmMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { return GetRegMaskArm(reg); } +void ArmMir2Lir::CompilerPostInitializeRegAlloc() { + // nothing here +} + constexpr ResourceMask ArmMir2Lir::GetRegMaskArm(RegStorage reg) { - return reg.IsDouble() + return (reg.IsQuad()) + ? (ResourceMask::FourBits((reg.GetRegNum() * 4) + kArmFPReg0)) + : (reg.IsDouble() /* Each double register is equal to a pair of single-precision FP registers */ ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kArmFPReg0) - : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum()); + : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum())); } constexpr ResourceMask ArmMir2Lir::EncodeArmRegList(int reg_list) { @@ -586,6 +592,18 @@ ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* << " is wrong: expecting " << i << ", seeing " << static_cast<int>(ArmMir2Lir::EncodingMap[i].opcode); } + qcm2l = nullptr; + ArmMir2LirPostInit(this); +} + +ArmMir2Lir::~ArmMir2Lir() { + CleanupCodeGenData(); +} + +void ArmMir2Lir::CleanupCodeGenData() { +} + +void ArmMir2Lir::ArmMir2LirPostInit(ArmMir2Lir*) { } Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, @@ -633,6 +651,8 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() { reg_pool_->next_core_reg_ = 2; reg_pool_->next_sp_reg_ = 0; reg_pool_->next_dp_reg_ = 0; + + CompilerPostInitializeRegAlloc(); } /* @@ -814,17 +834,17 @@ LIR* ArmMir2Lir::CheckSuspendUsingLoad() { uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return ArmMir2Lir::EncodingMap[opcode].flags; + return GetEncoder(opcode)->flags; } const char* ArmMir2Lir::GetTargetInstName(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return ArmMir2Lir::EncodingMap[opcode].name; + return GetEncoder(opcode)->name; } const char* ArmMir2Lir::GetTargetInstFmt(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return ArmMir2Lir::EncodingMap[opcode].fmt; + return GetEncoder(opcode)->fmt; } /* @@ -1009,8 +1029,15 @@ void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], true); break; default: - LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode; + GenMoreMachineSpecificExtendedMethodMIR(bb, mir); + // LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode; } } +void ArmMir2Lir::GenMoreMachineSpecificExtendedMethodMIR(BasicBlock*, MIR*) { + // nothing here +} + +void ArmMir2Lir::ApplyArchOptimizations(LIR*, LIR*, BasicBlock*) { +} } // namespace art diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 2ef92f8..0a2a969 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -282,6 +282,11 @@ LIR* ArmMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { return NewLIR1(opcode, r_dest_src.GetReg()); } +LIR* ArmMir2Lir::OpBkpt() { + LOG(ERROR) << "Inserting breakpoint"; + return NewLIR0(kThumbBkpt); +} + LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) { bool thumb_form = @@ -399,15 +404,15 @@ LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_s break; } DCHECK(!IsPseudoLirOp(opcode)); - if (EncodingMap[opcode].flags & IS_BINARY_OP) { + if (GetEncoder(opcode)->flags & IS_BINARY_OP) { return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg()); - } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { - if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) { + } else if (GetEncoder(opcode)->flags & IS_TERTIARY_OP) { + if (GetEncoder(opcode)->field_loc[2].kind == kFmtShift) { return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift); } else { return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg()); } - } else if (EncodingMap[opcode].flags & IS_QUAD_OP) { + } else if (GetEncoder(opcode)->flags & IS_QUAD_OP) { return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift); } else { LOG(FATAL) << "Unexpected encoding operand count"; @@ -498,10 +503,10 @@ LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src break; } DCHECK(!IsPseudoLirOp(opcode)); - if (EncodingMap[opcode].flags & IS_QUAD_OP) { + if (GetEncoder(opcode)->flags & IS_QUAD_OP) { return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); } else { - DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); + DCHECK(GetEncoder(opcode)->flags & IS_TERTIARY_OP); return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); } } @@ -643,7 +648,7 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, in RegStorage r_scratch = AllocTemp(); LoadConstant(r_scratch, value); LIR* res; - if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + if (GetEncoder(alt_opcode)->flags & IS_QUAD_OP) res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); else res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index b78fb80..5886da3 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -682,9 +682,23 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) } } +const A64EncodingMap* Arm64Mir2Lir::GetEncoder(int opcode) { + if (opcode < 0 || opcode >= kA64Last) + LOG(FATAL) << "invalid opcode " << opcode; + const A64EncodingMap* encoder = &EncodingMap[opcode]; + return encoder; +} + /* Nop, used for aligning code. Nop is an alias for hint #0. */ #define PADDING_NOP (UINT32_C(0xd503201f)) +uint32_t Arm64Mir2Lir::ProcessMoreEncodings(const A64EncodingMap *encoder, + int i, uint32_t operand) { + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind << " " << operand; + uint32_t value = 0; + return value; +} + uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { uint8_t* const write_buffer = write_pos; for (; lir != nullptr; lir = NEXT_LIR(lir)) { @@ -697,7 +711,7 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { } if (LIKELY(!lir->flags.is_nop)) { - const A64EncodingMap *encoder = &EncodingMap[opcode]; + const A64EncodingMap *encoder = GetEncoder(opcode); // Select the right variant of the skeleton. uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; @@ -831,8 +845,8 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { bits |= value; break; default: - LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name - << " (" << kind << ")"; + bits |= ProcessMoreEncodings(encoder, i, operand); + break; } } } @@ -1050,7 +1064,7 @@ void Arm64Mir2Lir::AssembleLIR() { } break; default: - LOG(FATAL) << "Unexpected case " << lir->flags.fixup; + LOG(FATAL) << "Unexpected case: opcode: " << lir->opcode << ", fixup: " << lir->flags.fixup; } prev_lir = lir; lir = lir->u.a.pcrel_next; @@ -1101,7 +1115,7 @@ void Arm64Mir2Lir::AssembleLIR() { size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) { A64Opcode opcode = UNWIDE(lir->opcode); DCHECK(!IsPseudoLirOp(opcode)); - return EncodingMap[opcode].size; + return GetEncoder(opcode)->size; } // Encode instruction bit pattern and assign offsets. @@ -1114,8 +1128,8 @@ uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t off if (!lir->flags.is_nop) { if (lir->flags.fixup != kFixupNone) { if (!IsPseudoLirOp(opcode)) { - lir->flags.size = EncodingMap[opcode].size; - lir->flags.fixup = EncodingMap[opcode].fixup; + lir->flags.size = GetEncoder(opcode)->size; + lir->flags.fixup = GetEncoder(opcode)->fixup; } else { DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4); lir->flags.size = 0; diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index ca2e012..016cb0f 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -23,8 +23,14 @@ #include <map> -namespace art { +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif +namespace art { +class QCArm64Mir2Lir; class Arm64Mir2Lir FINAL : public Mir2Lir { protected: class InToRegStorageArm64Mapper : public InToRegStorageMapper { @@ -49,6 +55,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { public: Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + ~Arm64Mir2Lir(); // Required for target - codegen helpers. bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, @@ -264,6 +271,10 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; + void GenMoreMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) QC_WEAK; + + void CleanupCodeGenData() QC_WEAK; + private: /** * @brief Given register xNN (dNN), returns register wNN (sNN). @@ -409,6 +420,20 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { ArenaVector<LIR*> dex_cache_access_insns_; int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; + + void Cleanup() QC_WEAK; + + private: + static uint32_t ProcessMoreEncodings(const A64EncodingMap* encoder, int i, uint32_t operand) QC_WEAK; + static const A64EncodingMap* GetEncoder(int opcode) QC_WEAK; + + virtual void ApplyArchOptimizations(LIR* head_lir, LIR* tail_lir, BasicBlock* bb) QC_WEAK; + + void CompilerPostInitializeRegAlloc() QC_WEAK; + void Arm64Mir2LirPostInit(Arm64Mir2Lir* mir_to_lir) QC_WEAK; + + friend class QCArm64Mir2Lir; + QCArm64Mir2Lir* qcm2l; }; } // namespace art diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index d5de18d..f772572 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -140,6 +140,10 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { return res_reg; } +void Arm64Mir2Lir::CompilerPostInitializeRegAlloc() { + // nothing here +} + /* * Decode the register id. This routine makes assumptions on the encoding made by RegStorage. */ @@ -614,6 +618,19 @@ Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAlloca << " is wrong: expecting " << i << ", seeing " << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode); } + + qcm2l = nullptr; + Arm64Mir2LirPostInit(this); +} + +void Arm64Mir2Lir::CleanupCodeGenData() { +} + +Arm64Mir2Lir::~Arm64Mir2Lir() { + CleanupCodeGenData(); +} + +void Arm64Mir2Lir::Arm64Mir2LirPostInit(Arm64Mir2Lir*) { } Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, @@ -658,6 +675,8 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() { reg_pool_->next_core_reg_ = 2; reg_pool_->next_sp_reg_ = 0; reg_pool_->next_dp_reg_ = 0; + + CompilerPostInitializeRegAlloc(); } /* @@ -798,17 +817,17 @@ LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() { uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags; + return GetEncoder(UNWIDE(opcode))->flags; } const char* Arm64Mir2Lir::GetTargetInstName(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name; + return GetEncoder(UNWIDE(opcode))->name; } const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt; + return GetEncoder(UNWIDE(opcode))->fmt; } RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) { @@ -907,8 +926,15 @@ void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) GenMaddMsubLong(rl_dest, rl_src[0], rl_src[1], rl_src[2], opcode == kMirOpMsubLong); break; default: - LOG(FATAL) << "Unexpected opcode: " << static_cast<int>(opcode); + GenMoreMachineSpecificExtendedMethodMIR(bb, mir); + // LOG(FATAL) << "Unexpected opcode: " << static_cast<int>(opcode); } } +void Arm64Mir2Lir::GenMoreMachineSpecificExtendedMethodMIR(BasicBlock*, MIR*) { +} + +void Arm64Mir2Lir::ApplyArchOptimizations(LIR*, LIR*, BasicBlock*) { +} + } // namespace art diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 483231f..7758d60 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -93,7 +93,7 @@ size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) { bool opcode_is_wide = IS_WIDE(lir->opcode); A64Opcode opcode = UNWIDE(lir->opcode); DCHECK(!IsPseudoLirOp(opcode)); - const A64EncodingMap *encoder = &EncodingMap[opcode]; + const A64EncodingMap *encoder = GetEncoder(opcode); uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; return (bits >> 30); } @@ -613,11 +613,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r } DCHECK(!IsPseudoLirOp(opcode)); - if (EncodingMap[opcode].flags & IS_BINARY_OP) { + if (GetEncoder(opcode)->flags & IS_BINARY_OP) { DCHECK_EQ(shift, ENCODE_NO_SHIFT); return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg()); - } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { - A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind; + } else if (GetEncoder(opcode)->flags & IS_TERTIARY_OP) { + A64EncodingKind kind = GetEncoder(opcode)->field_loc[2].kind; if (kind == kFmtShift) { return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift); } @@ -649,8 +649,8 @@ LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage } DCHECK(!IsPseudoLirOp(opcode)); - if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { - A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind; + if (GetEncoder(opcode)->flags & IS_TERTIARY_OP) { + A64EncodingKind kind = GetEncoder(opcode)->field_loc[2].kind; if (kind == kFmtExtend) { return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), EncodeExtend(ext, amount)); @@ -750,11 +750,11 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_s A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); - if (EncodingMap[opcode].flags & IS_QUAD_OP) { + if (GetEncoder(opcode)->flags & IS_QUAD_OP) { DCHECK(!IsExtendEncoding(shift)); return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); } else { - DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); + DCHECK(GetEncoder(opcode)->flags & IS_TERTIARY_OP); DCHECK_EQ(shift, ENCODE_NO_SHIFT); return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); } @@ -924,7 +924,7 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1 r_scratch = AllocTemp(); LoadConstant(r_scratch, value); } - if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + if (GetEncoder(alt_opcode)->flags & IS_QUAD_OP) res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); else res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); @@ -998,7 +998,7 @@ LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) if (UNLIKELY(neg)) opcode = neg_opcode; - if (EncodingMap[opcode].flags & IS_QUAD_OP) + if (GetEncoder(opcode)->flags & IS_QUAD_OP) return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); else @@ -1094,7 +1094,7 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto if (UNLIKELY(expected_scale == 0)) { // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. - DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); + DCHECK_NE(GetEncoder(UNWIDE(opcode))->flags & IS_TERTIARY_OP, 0U); DCHECK_EQ(scale, 0); load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); } else { @@ -1172,7 +1172,7 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt if (UNLIKELY(expected_scale == 0)) { // This is a tertiary op (e.g. strb), it does not not support scale. - DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); + DCHECK_NE(GetEncoder(UNWIDE(opcode))->flags & IS_TERTIARY_OP, 0U); DCHECK_EQ(scale, 0); store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); } else { diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index af10817..403fbd6 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1545,14 +1545,14 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, break; case Instruction::DIV_INT: case Instruction::DIV_INT_2ADDR: - check_zero = true; + check_zero = (flags & MIR_IGNORE_ZERO_DIV_CHECK) ? false : true; op = kOpDiv; is_div_rem = true; break; /* NOTE: returns in kArg1 */ case Instruction::REM_INT: case Instruction::REM_INT_2ADDR: - check_zero = true; + check_zero = (flags & MIR_IGNORE_ZERO_DIV_CHECK) ? false : true; op = kOpRem; is_div_rem = true; break; diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 767fe25..c993065 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -146,7 +146,9 @@ inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) { DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0); DCHECK_LT(static_cast<size_t>(reg), reginfo_map_.size()); DCHECK(reginfo_map_[reg] != nullptr) << "No info for 0x" << reg; - *mask = mask->Union(reginfo_map_[reg]->DefUseMask()); + if (reginfo_map_[reg]) { + *mask = mask->Union(reginfo_map_[reg]->DefUseMask()); + } } /* diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index c50246d..ebb429f 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1280,6 +1280,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { head_lir->u.m.def_mask = &kEncodeAll; } + MachineSpecificPreprocessMIR(bb, mir); if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { HandleExtendedMethodMIR(bb, mir); continue; @@ -1291,6 +1292,8 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { if (head_lir) { // Eliminate redundant loads/stores and delay stores into later slots. ApplyLocalOptimizations(head_lir, last_lir_insn_); + // Apply architecture-specific optimizations + ApplyArchOptimizations(head_lir, last_lir_insn_, bb); } return false; } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 73787e9..0d2da42 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -327,6 +327,8 @@ class Mir2Lir { static const uint32_t kLowSingleStorageMask = 0x00000001; static const uint32_t kHighSingleStorageMask = 0x00000002; static const uint32_t k64SoloStorageMask = 0x00000003; + static const uint32_t kLowDoubleStorageMask = 0x00000003; + static const uint32_t kHighDoubleStorageMask = 0x0000000c; static const uint32_t k128SoloStorageMask = 0x0000000f; static const uint32_t k256SoloStorageMask = 0x000000ff; static const uint32_t k512SoloStorageMask = 0x0000ffff; @@ -693,6 +695,7 @@ class Mir2Lir { void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir); void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir); virtual void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir); + virtual void ApplyArchOptimizations(LIR*, LIR*, BasicBlock*) { return; } // Shared by all targets - implemented in ralloc_util.cc int GetSRegHi(int lowSreg); @@ -753,6 +756,8 @@ class Mir2Lir { void MarkClean(RegLocation loc); void MarkDirty(RegLocation loc); void MarkInUse(RegStorage reg); + void MarkFree(RegStorage reg); + void MarkDead(RegStorage reg); bool CheckCorePoolSanity(); virtual RegLocation UpdateLoc(RegLocation loc); virtual RegLocation UpdateLocWide(RegLocation loc); @@ -1350,6 +1355,9 @@ class Mir2Lir { */ virtual void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); + /* non virtual so it doesn't have to be implemented */ + virtual void MachineSpecificPreprocessMIR(BasicBlock*, MIR*) { } + /** * @brief Lowers the kMirOpSelect MIR into LIR. * @param bb The basic block in which the MIR is from. @@ -1414,6 +1422,9 @@ class Mir2Lir { virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0; virtual void OpPcRelLoad(RegStorage reg, LIR* target) = 0; virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0; + virtual LIR* OpBkpt() { // not abstract so it doesn't have to be implemeted for other platforms + return NULL; + }; virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0; virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0; virtual LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) = 0; diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index ff4659a..0e9d268 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -59,6 +59,11 @@ static_assert(5U == static_cast<size_t>(kX86_64), "kX86_64 not 5"); static_assert(6U == static_cast<size_t>(kMips), "kMips not 6"); static_assert(7U == static_cast<size_t>(kMips64), "kMips64 not 7"); +#ifndef DISABLE_CAF_BAILOUT +// check the pass status for early bail out +thread_local bool check_bail_out; +#endif + // Additional disabled optimizations (over generally disabled) per instruction set. static constexpr uint32_t kDisabledOptimizationsPerISA[] = { // 0 = kNone. @@ -488,6 +493,11 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) return true; } +// check certain conditions that we don't want Quick compiler to handle +bool QuickCompiler::CheckMoreConditions(CompilationUnit*) const { + return true; +} + // Skip the method that we do not support currently. bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, CompilationUnit* cu) const { @@ -497,6 +507,10 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil return false; } + if (!CheckMoreConditions(cu)) { + return false; + } + // Check whether we do have limitations at all. if (kSupportedTypes[cu->instruction_set] == nullptr && kUnsupportedOpcodesSize[cu->instruction_set] == 0U) { @@ -637,7 +651,7 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, if (instruction_set == kArm) { instruction_set = kThumb2; } - CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker); + CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker, this); cu.dex_file = &dex_file; cu.class_def_idx = class_def_idx; cu.method_idx = method_idx; @@ -720,6 +734,22 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu); pass_driver.Launch(); +#ifndef DISABLE_CAF_BAILOUT + if (check_bail_out && cu.mir_graph->PassFailed()) { +#else + if (GetCheckBailOutFlag() && cu.mir_graph->PassFailed()) { +#endif + return nullptr; + } + +#ifndef DISABLE_CAF_BAILOUT + if (check_bail_out) { +#else + if (GetCheckBailOutFlag()) { +#endif + VLOG(compiler) << "fast compile applied to " << PrettyMethod(method_idx, dex_file); + } + /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */ if (cu.compiler_driver->ProfilePresent() && !cu.mir_graph->MethodIsLeaf() @@ -848,6 +878,9 @@ QuickCompiler::QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) { if (pass_manager_options->GetPrintPassOptions()) { PassDriverMEPostOpt::PrintPassOptions(post_opt_pass_manager_.get()); } +#ifdef DISABLE_CAF_BAILOUT + check_bail_out_ = false; +#endif } QuickCompiler::~QuickCompiler() { diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index 43dd578..7d66901 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -19,6 +19,12 @@ #include "compiler.h" +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + namespace art { class Compiler; @@ -65,12 +71,23 @@ class QuickCompiler : public Compiler { return post_opt_pass_manager_.get(); } + bool CheckMoreConditions(CompilationUnit* cu) const QC_WEAK; + +#ifdef DISABLE_CAF_BAILOUT + void SetCheckBailOutFlag() { check_bail_out_ = true; } + void ResetCheckBailOutFlag() { check_bail_out_ = false; } + bool GetCheckBailOutFlag() const { return check_bail_out_; } +#endif + protected: explicit QuickCompiler(CompilerDriver* driver); private: std::unique_ptr<PassManager> pre_opt_pass_manager_; std::unique_ptr<PassManager> post_opt_pass_manager_; +#ifdef DISABLE_CAF_BAILOUT + bool check_bail_out_; +#endif DISALLOW_COPY_AND_ASSIGN(QuickCompiler); }; diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 8ec86fa..e0ed90d 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -928,6 +928,24 @@ void Mir2Lir::MarkInUse(RegStorage reg) { } } +void Mir2Lir::MarkFree(RegStorage reg) { + if (reg.IsPair()) { + GetRegInfo(reg.GetLow())->MarkFree(); + GetRegInfo(reg.GetHigh())->MarkFree(); + } else { + GetRegInfo(reg)->MarkFree(); + } +} + +void Mir2Lir::MarkDead(RegStorage reg) { + if (reg.IsPair()) { + GetRegInfo(reg.GetLow())->MarkDead(); + GetRegInfo(reg.GetHigh())->MarkDead(); + } else { + GetRegInfo(reg)->MarkDead(); + } +} + bool Mir2Lir::CheckCorePoolSanity() { for (RegisterInfo* info : tempreg_info_) { int my_sreg = info->SReg(); diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h index 78e81b2..72db75c 100644 --- a/compiler/dex/quick/resource_mask.h +++ b/compiler/dex/quick/resource_mask.h @@ -86,6 +86,14 @@ class ResourceMask { start_bit >= 64u ? UINT64_C(3) << (start_bit - 64u) : 0u); } + // Four consecutive bits. The start_bit must be even. + static constexpr ResourceMask FourBits(size_t start_bit) { + return + DCHECK_CONSTEXPR((start_bit & 1u) == 0u, << start_bit << " isn't even", Bit(0)) + ResourceMask(start_bit >= 64u ? 0u : UINT64_C(0xf) << start_bit, + start_bit >= 64u ? UINT64_C(0xf) << (start_bit - 64u) : 0u); + } + static constexpr ResourceMask NoBits() { return ResourceMask(UINT64_C(0), UINT64_C(0)); } diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 46ed011..802ee29 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -159,6 +159,10 @@ class RegStorage : public ValueObject { return ((reg_ & kShapeMask) == k64BitSolo); } + constexpr bool Is128BitSolo() const { + return ((reg_ & kShapeMask) == k128BitSolo); + } + constexpr bool IsPair() const { return ((reg_ & kShapeMask) == k64BitPair); } @@ -175,6 +179,12 @@ class RegStorage : public ValueObject { (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } + constexpr bool IsQuad() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + (reg_ & (kFloatingPoint | k128BitSolo)) == (kFloatingPoint | k128BitSolo); + } + constexpr bool IsSingle() const { return DCHECK_CONSTEXPR(Valid(), , false) @@ -189,6 +199,10 @@ class RegStorage : public ValueObject { return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } + static constexpr bool IsQuad(uint16_t reg) { + return (reg & (kFloatingPoint | k128BitSolo)) == (kFloatingPoint | k128BitSolo); + } + static constexpr bool IsSingle(uint16_t reg) { return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint; } @@ -230,24 +244,60 @@ class RegStorage : public ValueObject { return ((reg_ & kRegTypeMask) | k32BitSolo); } + // Retrieve the low register num of a pair + int GetLowRegNum() const { + DCHECK(IsPair()); + return (reg_ & kRegNumMask); + } + // Create a stand-alone RegStorage from the low reg of a pair. RegStorage GetLow() const { DCHECK(IsPair()); return RegStorage(k32BitSolo, reg_ & kRegTypeMask); } + // Create a stand-alone RegStorage from the low 32bit of 64bit float solo. + RegStorage GetLowFromFloatSolo64() const { + DCHECK(IsFloat() && Is64BitSolo()); + return RegStorage(k32BitSolo, ((reg_ & kRegNumMask) << 1) | kFloatingPoint); + } + + // Create a stand-alone RegStorage from the low 64bit of 128bit float solo. + RegStorage GetLowFromFloatSolo128() const { + DCHECK(IsFloat() && Is128BitSolo()); + return RegStorage(k64BitSolo, ((reg_ & kRegNumMask) << 1) | kFloatingPoint); + } + // Retrieve the most significant register of a pair. int GetHighReg() const { DCHECK(IsPair()); return k32BitSolo | ((reg_ & kHighRegMask) >> kHighRegShift) | (reg_ & kFloatingPoint); } + // Retrieve the high register num of a pair. + int GetHighRegNum() const { + DCHECK(IsPair()); + return ((reg_ & kHighRegMask) >> kHighRegShift); + } + // Create a stand-alone RegStorage from the high reg of a pair. RegStorage GetHigh() const { DCHECK(IsPair()); return RegStorage(kValid | GetHighReg()); } + // Create a stand-alone RegStorage from the high 32bit of 64bit float solo. + RegStorage GetHighFromFloatSolo64() const { + DCHECK(IsFloat() && Is64BitSolo()); + return RegStorage(k32BitSolo, (((reg_ & kRegNumMask) << 1) +1) | kFloatingPoint); + } + + // Create a stand-alone RegStorage from the high 64bit of 128bit float solo. + RegStorage GetHighFromFloatSolo128() const { + DCHECK(IsFloat() && Is128BitSolo()); + return RegStorage(k64BitSolo, (((reg_ & kRegNumMask) << 1) +1) | kFloatingPoint); + } + void SetHighReg(int reg) { DCHECK(IsPair()); reg_ = (reg_ & ~kHighRegMask) | ((reg & kHighRegNumMask) << kHighRegShift); @@ -310,6 +360,11 @@ class RegStorage : public ValueObject { return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } + // Create a floating point 128-bit solo. + static RegStorage FloatSolo128(int reg_num) { + return RegStorage(k128BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); + } + static constexpr RegStorage InvalidReg() { return RegStorage(kInvalid); } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 0613e6e..f20a2c9 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -74,6 +74,10 @@ namespace art { +#ifndef DISABLE_CAF_BAILOUT +extern thread_local bool check_bail_out; +#endif + static constexpr bool kTimeCompileMethod = !kIsDebugBuild; // Whether to produce 64-bit ELF files for 64-bit targets. @@ -384,6 +388,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, timings_logger_(timer), compiler_context_(nullptr), support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), + status_map_(new std::vector<SafeMap<int32_t, int32_t>>(thread_count)), dedupe_code_("dedupe code", *swap_space_allocator_), dedupe_src_mapping_table_("dedupe source mapping table", *swap_space_allocator_), dedupe_mapping_table_("dedupe mapping table", *swap_space_allocator_), @@ -2169,6 +2174,13 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, CompilerDriver* const driver = manager->GetCompiler(); + // reset the status map properly + SafeMap<int32_t, int32_t> *status_map = driver->GetStatusMap(self); + + if (status_map != nullptr) { + status_map->clear(); + } + // Can we run DEX-to-DEX compiler on this class ? DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile; { @@ -2284,6 +2296,9 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i IsMethodToCompile(method_ref); if (compile) { // NOTE: if compiler declines to compile this method, it will return null. +#ifndef DISABLE_CAF_BAILOUT + check_bail_out = false; +#endif compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, class_loader, dex_file); } @@ -2527,4 +2542,8 @@ bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file return inliner->IsStringInitMethodIndex(method_index); } +SafeMap<int32_t, int32_t> *CompilerDriver::GetStatusMap(Thread *) { + return nullptr; +} + } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index f737007..93c34f3 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -43,6 +43,12 @@ #include "utils/dex_cache_arrays_layout.h" #include "utils/swap_space.h" +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + namespace art { namespace mirror { @@ -473,6 +479,9 @@ class CompilerDriver { had_hard_verifier_failure_ = true; } + // get the status map associated with a thread + SafeMap<int32_t, int32_t> *GetStatusMap(Thread *self) QC_WEAK; + private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -687,6 +696,7 @@ class CompilerDriver { bool support_boot_image_fixup_; + std::unique_ptr<std::vector<SafeMap<int32_t, int32_t>>> status_map_; // DeDuplication data structures, these own the corresponding byte arrays. template <typename ContentType> class DedupeHashFunc { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index d175efe..8fa4e7b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -854,13 +854,15 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, value, field_type, resolved_field->GetOffset(), - resolved_field->IsVolatile())); + resolved_field->IsVolatile(), + field_index)); } else { current_block_->AddInstruction(new (arena_) HInstanceFieldGet( current_block_->GetLastInstruction(), field_type, resolved_field->GetOffset(), - resolved_field->IsVolatile())); + resolved_field->IsVolatile(), + field_index)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } @@ -971,11 +973,11 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, DCHECK_EQ(value->GetType(), field_type); current_block_->AddInstruction( new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(), - resolved_field->IsVolatile())); + resolved_field->IsVolatile(), field_index)); } else { current_block_->AddInstruction( new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(), - resolved_field->IsVolatile())); + resolved_field->IsVolatile(), field_index)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } return true; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ea97c51..d434839 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -73,6 +73,20 @@ void HInliner::Run() { } } +// additional check for an inlinable method +bool HInliner::CanInlineMethod(const DexCompilationUnit& , + HGraph&, + HInvoke*) const { + return false; +} + +// try if we can remove exception checks +void HInliner::TryRemoveExceptionChecks(const DexCompilationUnit&, + HGraph&, + HInvoke*) const { + return; +} + bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); @@ -234,11 +248,15 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, inliner.Run(); } + TryRemoveExceptionChecks(dex_compilation_unit, *callee_graph, invoke_instruction); + HReversePostOrderIterator it(*callee_graph); it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader()) { + if (block->IsLoopHeader() && !CanInlineMethod(dex_compilation_unit, + *callee_graph, + invoke_instruction)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 4602e77..12429cc 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -20,6 +20,12 @@ #include "invoke_type.h" #include "optimization.h" +#ifdef QC_STRONG +#define QC_WEAK +#else +#define QC_WEAK __attribute__((weak)) +#endif + namespace art { class CompilerDriver; @@ -47,6 +53,12 @@ class HInliner : public HOptimization { static constexpr const char* kInlinerPassName = "inliner"; private: + bool CanInlineMethod(const DexCompilationUnit& dex_compilation_unit, + HGraph& graph, + HInvoke* invoke_instruction) const QC_WEAK; + void TryRemoveExceptionChecks(const DexCompilationUnit& dex_compilation_unit, + HGraph& graph, + HInvoke* invoke_instruction) const QC_WEAK; bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 0244620..668956a 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 88490d0..119c22c 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1315,6 +1315,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Do a reverse post order of the blocks in the callee and do (1), (2), // and (3) to the blocks that apply. HLoopInformation* info = at->GetLoopInformation(); + HLoopInformation* info_callee = nullptr; for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); if (current != exit_block_ && current != entry_block_ && current != first) { @@ -1324,13 +1325,21 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(current); outer_graph->reverse_post_order_.Put(++index_of_at, current); if (info != nullptr) { - current->SetLoopInformation(info); + if (current->GetLoopInformation() == nullptr) + current->SetLoopInformation(info); + else + info_callee = current->GetLoopInformation(); for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(current); } } } } + // inlining loop + if (info_callee != nullptr) { + ((ArenaBitVector &)(info_callee->GetBlocks())).ClearAllBits(); + info_callee->Populate(); + } // Do (1), (2), and (3) to `to`. to->SetGraph(outer_graph); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f8149d1..a537430 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -50,6 +50,7 @@ class LiveInterval; class LocationSummary; class SlowPathCode; class SsaBuilder; +class HExtendedLoopInformation; static const int kDefaultNumberOfBlocks = 8; static const int kDefaultNumberOfSuccessors = 2; @@ -390,7 +391,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { suspend_check_(nullptr), back_edges_(graph->GetArena(), kDefaultNumberOfBackEdges), // Make bit vector growable, as the number of blocks may change. - blocks_(graph->GetArena(), graph->GetBlocks().Size(), true) {} + blocks_(graph->GetArena(), graph->GetBlocks().Size(), true), + extended_loop_info_(nullptr) {} HBasicBlock* GetHeader() const { return header_; @@ -466,6 +468,14 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { void Add(HBasicBlock* block); void Remove(HBasicBlock* block); + void SetExtendedLoopInformation(HExtendedLoopInformation *extended_loop_info) { + extended_loop_info_ = extended_loop_info; + } + + HExtendedLoopInformation* GetExtendedLoopInformation() { + return extended_loop_info_; + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -475,6 +485,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { GrowableArray<HBasicBlock*> back_edges_; ArenaBitVector blocks_; + HExtendedLoopInformation* extended_loop_info_; + DISALLOW_COPY_AND_ASSIGN(HLoopInformation); }; @@ -3128,17 +3140,25 @@ class HNullCheck : public HExpression<1> { class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile) - : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {} + FieldInfo(MemberOffset field_offset, + Primitive::Type field_type, + bool is_volatile, + uint32_t index) + : field_offset_(field_offset), + field_type_(field_type), + is_volatile_(is_volatile), + index_(index) {} MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } + uint32_t GetFieldIndex() const { return index_; } bool IsVolatile() const { return is_volatile_; } private: const MemberOffset field_offset_; const Primitive::Type field_type_; const bool is_volatile_; + uint32_t index_; }; class HInstanceFieldGet : public HExpression<1> { @@ -3148,11 +3168,22 @@ class HInstanceFieldGet : public HExpression<1> { MemberOffset field_offset, bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type, is_volatile) { + field_info_(field_offset, field_type, is_volatile, 0) { + SetRawInputAt(0, value); + } + + HInstanceFieldGet(HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset, + bool is_volatile, + uint32_t field_idx) + : HExpression(field_type, SideEffects::DependsOnSomething()), + field_info_(field_offset, field_type, is_volatile, field_idx) { SetRawInputAt(0, value); } bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + // TODO: add CanBeNull for accessing a first page field from cannot-be-null object bool InstructionDataEquals(HInstruction* other) const OVERRIDE { HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); @@ -3168,9 +3199,11 @@ class HInstanceFieldGet : public HExpression<1> { } const FieldInfo& GetFieldInfo() const { return field_info_; } + uint32_t GetFieldIndex() const { return field_info_.GetFieldIndex(); } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } + bool IsPrimitiveField() const { return (GetFieldType() != Primitive::kPrimNot); } DECLARE_INSTRUCTION(InstanceFieldGet); @@ -3188,20 +3221,35 @@ class HInstanceFieldSet : public HTemplateInstruction<2> { MemberOffset field_offset, bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type, is_volatile) { + field_info_(field_offset, field_type, is_volatile, 0) { SetRawInputAt(0, object); SetRawInputAt(1, value); } + HInstanceFieldSet(HInstruction* object, + HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset, + bool is_volatile, + uint32_t field_idx) + : HTemplateInstruction(SideEffects::ChangesSomething()), + field_info_(field_offset, field_type, is_volatile, field_idx) { + SetRawInputAt(0, object); + SetRawInputAt(1, value); + } + + // TODO: add CanBeNull for accessing a first page field from cannot-be-null object bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; } const FieldInfo& GetFieldInfo() const { return field_info_; } + uint32_t GetFieldIndex() const { return field_info_.GetFieldIndex(); } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } + bool IsPrimitiveField() const { return (GetFieldType() != Primitive::kPrimNot); } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -3566,7 +3614,17 @@ class HStaticFieldGet : public HExpression<1> { MemberOffset field_offset, bool is_volatile) : HExpression(field_type, SideEffects::DependsOnSomething()), - field_info_(field_offset, field_type, is_volatile) { + field_info_(field_offset, field_type, is_volatile, 0) { + SetRawInputAt(0, cls); + } + + HStaticFieldGet(HInstruction* cls, + Primitive::Type field_type, + MemberOffset field_offset, + bool is_volatile, + uint32_t field_idx) + : HExpression(field_type, SideEffects::DependsOnSomething()), + field_info_(field_offset, field_type, is_volatile, field_idx) { SetRawInputAt(0, cls); } @@ -3583,9 +3641,11 @@ class HStaticFieldGet : public HExpression<1> { } const FieldInfo& GetFieldInfo() const { return field_info_; } + uint32_t GetFieldIndex() const { return field_info_.GetFieldIndex(); } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } + bool IsPrimitiveField() const { return (GetFieldType() != Primitive::kPrimNot); } DECLARE_INSTRUCTION(StaticFieldGet); @@ -3603,15 +3663,29 @@ class HStaticFieldSet : public HTemplateInstruction<2> { MemberOffset field_offset, bool is_volatile) : HTemplateInstruction(SideEffects::ChangesSomething()), - field_info_(field_offset, field_type, is_volatile) { + field_info_(field_offset, field_type, is_volatile, 0) { + SetRawInputAt(0, cls); + SetRawInputAt(1, value); + } + + HStaticFieldSet(HInstruction* cls, + HInstruction* value, + Primitive::Type field_type, + MemberOffset field_offset, + bool is_volatile, + uint32_t field_idx) + : HTemplateInstruction(SideEffects::ChangesSomething()), + field_info_(field_offset, field_type, is_volatile, field_idx) { SetRawInputAt(0, cls); SetRawInputAt(1, value); } const FieldInfo& GetFieldInfo() const { return field_info_; } + uint32_t GetFieldIndex() const { return field_info_.GetFieldIndex(); } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } + bool IsPrimitiveField() const { return (GetFieldType() != Primitive::kPrimNot); } HInstruction* GetValue() const { return InputAt(1); } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9..2d1c0ba 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 69c26f0..5f741b9 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -54,9 +54,24 @@ #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" +#include "dex/quick/quick_compiler.h" namespace art { +class OptimizingCompiler; + +// fast compile path +CompiledMethod* TryFastCompile(CompilerDriver* driver, + Compiler* compiler, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject jclass_loader, + const DexFile& dex_file) __attribute__((weak)); + + /** * Used by the code generator, to allocate the code in a vector. */ @@ -262,6 +277,8 @@ void OptimizingCompiler::Init() { << "Graph visualizer requires the compiler to run single-threaded. " << "Invoke the compiler with '-j1'."; visualizer_output_.reset(new std::ofstream(cfg_file_name)); + if (visualizer_output_->fail()) + LOG(INFO) << "can't create cfg file " << cfg_file_name; } if (driver->GetDumpStats()) { compilation_stats_.reset(new OptimizingCompilerStats()); @@ -301,11 +318,25 @@ static bool CanOptimize(const DexFile::CodeItem& code_item) { return code_item.tries_size_ == 0; } + +HOptimization* GetMoreOptimizing(HGraph*, + const DexCompilationUnit&, + CompilerDriver*, + OptimizingCompilerStats*) __attribute__((weak)); +HOptimization* GetMoreOptimizing(HGraph*, + const DexCompilationUnit&, + CompilerDriver*, + OptimizingCompilerStats*) { + return nullptr; +} + static void RunOptimizations(HOptimization* optimizations[], size_t length, PassInfoPrinter* pass_info_printer) { for (size_t i = 0; i < length; ++i) { HOptimization* optimization = optimizations[i]; + if (optimization == nullptr) + continue; { PassInfo pass_info(optimization->GetPassName(), pass_info_printer); optimization->Run(); @@ -321,48 +352,55 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier simplify3(graph, stats, "instruction_simplifier_before_codegen"); - - IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, dex_file, dex_compilation_unit, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_before_codegen"); + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, + dex_compilation_unit.GetDexFile(), driver); HOptimization* optimizations[] = { - &intrinsics, - &fold1, - &simplify1, - &dce1, - &inliner, + intrinsics, + fold1, + simplify1, + dce1, + inliner, + GetMoreOptimizing(graph, dex_compilation_unit, driver, stats), // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &type_propagation, - &simplify2, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + type_propagation, + simplify2, + dce2, // The codegen has a few assumptions that only the instruction simplifier can // satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - &simplify3, + simplify3, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); @@ -627,6 +665,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, CompiledMethod* method = nullptr; if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) && !compiler_driver->GetVerifiedMethod(&dex_file, method_idx)->HasRuntimeThrow()) { + // try fast compile before going into optimizing compiler + method = TryFastCompile(compiler_driver, delegate_.get(), code_item, access_flags, invoke_type, + class_def_idx, method_idx, jclass_loader, dex_file); + + if (method != nullptr) { + return method; + } method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, method_idx, jclass_loader, dex_file); } else { @@ -658,4 +703,17 @@ bool IsCompilingWithCoreImage() { return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art"); } +// fast compile path +CompiledMethod* TryFastCompile(CompilerDriver*, + Compiler*, + const DexFile::CodeItem*, + uint32_t, + InvokeType, + uint16_t, + uint32_t, + jobject, + const DexFile&) { + return nullptr; +} + } // namespace art diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h index e4b1e7d..3215102 100644 --- a/compiler/utils/growable_array.h +++ b/compiler/utils/growable_array.h @@ -128,7 +128,9 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { } } // We should either have found the element, or it was the last (unscanned) element. - DCHECK(found || (element == elem_list_[num_used_ - 1])); + // if element is not in array, don't touch anything + if (!found && (element != elem_list_[num_used_ - 1])) + return; num_used_--; } diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk index 321cd75..3783c2b 100644 --- a/dex2oat/Android.mk +++ b/dex2oat/Android.mk @@ -21,6 +21,10 @@ include art/build/Android.executable.mk DEX2OAT_SRC_FILES := \ dex2oat.cc +ifeq ($$(art_target_or_host),target) +LOCAL_SHARED_LIBRARIES += libcutils +endif + # TODO: Remove this when the framework (installd) supports pushing the # right instruction-set parameter for the primary architecture. ifneq ($(filter ro.zygote=zygote64,$(PRODUCT_DEFAULT_PROPERTY_OVERRIDES)),) diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 9d7e68a..03679fa 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -75,6 +75,9 @@ #include "vector_output_stream.h" #include "well_known_classes.h" #include "zip_archive.h" +#ifdef HAVE_ANDROID_OS +#include "cutils/properties.h" +#endif namespace art { @@ -847,6 +850,18 @@ class Dex2Oat FINAL { } } + // Override the number of compiler threads with optimal value (thru system property) + #ifdef HAVE_ANDROID_OS + const char* propertyName = "ro.sys.fw.dex2oat_thread_count"; + char thread_count_str[PROPERTY_VALUE_MAX]; + + if (property_get(propertyName, thread_count_str, "") > 0) { + if (ParseUint(thread_count_str, &thread_count_)) { + LOG(INFO) << "Adjusted thread count (for runtime dex2oat): " << thread_count_ << ", " << thread_count_str; + } + } + #endif + image_ = (!image_filename_.empty()); if (!requested_specific_compiler && !kUseOptimizingCompiler) { // If no specific compiler is requested, the current behavior is diff --git a/runtime/Android.mk b/runtime/Android.mk index b38f9bc..8c58b19 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -475,6 +475,9 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT LOCAL_SHARED_LIBRARIES += libutils # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted. LOCAL_SHARED_LIBRARIES += libcutils + ifeq ($(TARGET_HAVE_QC_PERF),true) + LOCAL_WHOLE_STATIC_LIBRARIES += libqc-art + endif else # host LOCAL_SHARED_LIBRARIES += libziparchive-host # For ashmem_create_region. diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S index 2af636e..049bd4f 100644 --- a/runtime/arch/arm/asm_support_arm.S +++ b/runtime/arch/arm/asm_support_arm.S @@ -50,6 +50,11 @@ // generated at END. .macro DEF_ENTRY thumb_or_arm, name \thumb_or_arm +// Clang ignores .thumb_func and requires an explicit .thumb. Investigate whether we should still +// carry around the .thumb_func. + .ifc \thumb_or_arm, .thumb_func + .thumb + .endif .type \name, #function .hidden \name // Hide this as a global symbol, so we do not incur plt calls. .global \name diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc index f8590d3..f7881a1 100644 --- a/runtime/arch/arm/instruction_set_features_arm.cc +++ b/runtime/arch/arm/instruction_set_features_arm.cc @@ -88,7 +88,7 @@ const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromVariant( "arm1136j-s", "arm1136jf-s", "arm1156t2-s", "arm1156t2f-s", "arm1176jz-s", "arm1176jzf-s", "cortex-a5", "cortex-a8", "cortex-a9", "cortex-a9-mp", "cortex-r4f", - "marvell-pj4", "mpcore", "mpcorenovfp" + "marvell-pj4", "mpcore", "mpcorenovfp", "scorpion" }; if (!FindVariantInArray(arm_variants_without_known_features, arraysize(arm_variants_without_known_features), diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index d02ab14..bb9229b 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -897,7 +897,7 @@ ENTRY art_quick_imt_conflict_trampoline ldr r0, [sp, #0] @ load caller Method* ldr r0, [r0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET] @ load dex_cache_resolved_methods add r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET @ get starting address of data - ldr r0, [r0, r12, lsl 2] @ load the target method + ldr r0, [r0, r12, lsl #2] @ load the target method b art_quick_invoke_interface_trampoline END art_quick_imt_conflict_trampoline diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc index 395cee8..613bb5c 100644 --- a/runtime/arch/arm64/instruction_set_features_arm64.cc +++ b/runtime/arch/arm64/instruction_set_features_arm64.cc @@ -39,7 +39,7 @@ const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromVariant( if (!needs_a53_835769_fix) { // Check to see if this is an expected variant. static const char* arm64_known_variants[] = { - "denver64" + "denver64", "kryo" }; if (!FindVariantInArray(arm64_known_variants, arraysize(arm64_known_variants), variant)) { std::ostringstream os; |