diff options
Diffstat (limited to 'compiler/dex')
-rw-r--r-- | compiler/dex/quick/arm64/codegen_arm64.h | 1 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 29 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/utility_arm64.cc | 206 | ||||
-rw-r--r-- | compiler/dex/quick/gen_common.cc | 2 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 6 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 3 | ||||
-rw-r--r-- | compiler/dex/quick/ralloc_util.cc | 19 |
7 files changed, 190 insertions, 76 deletions
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 2c587b8..4680f8f 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -240,6 +240,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void OpRegCopyWide(RegStorage dest, RegStorage src) OVERRIDE; bool InexpensiveConstantInt(int32_t value) OVERRIDE; + bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE; bool InexpensiveConstantFloat(int32_t value) OVERRIDE; bool InexpensiveConstantLong(int64_t value) OVERRIDE; bool InexpensiveConstantDouble(int64_t value) OVERRIDE; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 9403d5e..cc0d967 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -1192,22 +1192,7 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) { - if (!rl_src2.is_const) { - return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); - } - } else { - // Associativity. - if (!rl_src2.is_const) { - DCHECK(rl_src1.is_const); - std::swap(rl_src1, rl_src2); - } - } - DCHECK(rl_src2.is_const); - OpKind op = kOpBkpt; - int64_t val = mir_graph_->ConstantValueWide(rl_src2); - switch (opcode) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: @@ -1233,6 +1218,20 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_de LOG(FATAL) << "Unexpected opcode"; } + if (op == kOpSub) { + if (!rl_src2.is_const) { + return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + } + } else { + // Associativity. + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + std::swap(rl_src1, rl_src2); + } + } + DCHECK(rl_src2.is_const); + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val); diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index cd1840a..5326e74 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -269,8 +269,47 @@ int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { return (n << 12 | imm_r << 6 | imm_s); } +// Maximum number of instructions to use for encoding the immediate. +static const int max_num_ops_per_const_load = 2; + +/** + * @brief Return the number of fast halfwords in the given uint64_t integer. + * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The + * number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for + * a more accurate description. + * @param value The input 64-bit integer. + * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is + * the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits + * set (0xffff). Additionally (retval & 0x8) is set when m > n. + */ +static int GetNumFastHalfWords(uint64_t value) { + unsigned int num_0000_halfwords = 0; + unsigned int num_ffff_halfwords = 0; + for (int shift = 0; shift < 64; shift += 16) { + uint16_t halfword = static_cast<uint16_t>(value >> shift); + if (halfword == 0) + num_0000_halfwords++; + else if (halfword == UINT16_C(0xffff)) + num_ffff_halfwords++; + } + if (num_0000_halfwords >= num_ffff_halfwords) { + DCHECK_LE(num_0000_halfwords, 4U); + return num_0000_halfwords; + } else { + DCHECK_LE(num_ffff_halfwords, 4U); + return num_ffff_halfwords | 0x8; + } +} + +// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a +// constant is considered for promotion. If the constant is "inexpensive" then the promotion +// algorithm will give it a low priority for promotion, even when it is referenced many times in +// the code. + bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { - return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); + // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool). + // We therefore return true and give it a low priority for promotion. + return true; } bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { @@ -278,13 +317,70 @@ bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { } bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { - return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); + int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7); + if (num_slow_halfwords <= max_num_ops_per_const_load) { + return true; + } + return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0); } bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { return EncodeImmDouble(value) >= 0; } +// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use +// when one of the operands is an immediate (e.g. register version or immediate version of add). + +bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { + switch (opcode) { + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + // The code below is consistent with the implementation of OpRegRegImm(). + { + int32_t abs_value = std::abs(value); + if (abs_value < 0x1000) { + return true; + } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { + return true; + } + return false; + } + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + return true; + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + case Instruction::AND_INT_LIT16: + case Instruction::AND_INT_LIT8: + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + case Instruction::OR_INT_LIT16: + case Instruction::OR_INT_LIT8: + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + case Instruction::XOR_INT_LIT16: + case Instruction::XOR_INT_LIT8: + if (value == 0 || value == INT32_C(-1)) { + return true; + } + return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0); + default: + return false; + } +} + /* * Load a immediate using one single instruction when possible; otherwise * use a pair of movz and movk instructions. @@ -358,9 +454,6 @@ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { // TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { - // Maximum number of instructions to use for encoding the immediate. - const int max_num_ops = 2; - if (r_dest.IsFloat()) { return LoadFPConstantValueWide(r_dest, value); } @@ -378,19 +471,12 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. - int num_0000_halfwords = 0; - int num_ffff_halfwords = 0; uint64_t uvalue = static_cast<uint64_t>(value); - for (int shift = 0; shift < 64; shift += 16) { - uint16_t halfword = static_cast<uint16_t>(uvalue >> shift); - if (halfword == 0) - num_0000_halfwords++; - else if (halfword == UINT16_C(0xffff)) - num_ffff_halfwords++; - } - int num_fast_halfwords = std::max(num_0000_halfwords, num_ffff_halfwords); + int num_fast_halfwords = GetNumFastHalfWords(uvalue); + int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7); + bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0; - if (num_fast_halfwords < 3) { + if (num_slow_halfwords > 1) { // A single movz/movn is not enough. Try the logical immediate route. int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); if (log_imm >= 0) { @@ -398,19 +484,19 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } } - if (num_fast_halfwords >= 4 - max_num_ops) { + if (num_slow_halfwords <= max_num_ops_per_const_load) { // We can encode the number using a movz/movn followed by one or more movk. ArmOpcode op; uint16_t background; LIR* res = nullptr; // Decide whether to use a movz or a movn. - if (num_0000_halfwords >= num_ffff_halfwords) { - op = WIDE(kA64Movz3rdM); - background = 0; - } else { + if (more_ffff_halfwords) { op = WIDE(kA64Movn3rdM); background = 0xffff; + } else { + op = WIDE(kA64Movz3rdM); + background = 0; } // Emit the first instruction (movz, movn). @@ -726,7 +812,7 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1 int64_t abs_value = (neg) ? -value : value; ArmOpcode opcode = kA64Brk1d; ArmOpcode alt_opcode = kA64Brk1d; - int32_t log_imm = -1; + bool is_logical = false; bool is_wide = r_dest.Is64Bit(); ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); int info = 0; @@ -761,65 +847,89 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); } else { - log_imm = -1; alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); } break; - // case kOpRsub: - // opcode = kThumb2RsubRRI8M; - // alt_opcode = kThumb2RsubRRR; - // break; case kOpAdc: - log_imm = -1; alt_opcode = kA64Adc3rrr; break; case kOpSbc: - log_imm = -1; alt_opcode = kA64Sbc3rrr; break; case kOpOr: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64Orr3Rrl; alt_opcode = kA64Orr4rrro; break; case kOpAnd: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64And3Rrl; alt_opcode = kA64And4rrro; break; case kOpXor: - log_imm = EncodeLogicalImmediate(is_wide, value); + is_logical = true; opcode = kA64Eor3Rrl; alt_opcode = kA64Eor4rrro; break; case kOpMul: // TUNING: power of 2, shift & add - log_imm = -1; alt_opcode = kA64Mul3rrr; break; default: LOG(FATAL) << "Bad opcode: " << op; } - if (log_imm >= 0) { - return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); - } else { - RegStorage r_scratch; - if (is_wide) { - r_scratch = AllocTempWide(); - LoadConstantWide(r_scratch, value); + if (is_logical) { + int log_imm = EncodeLogicalImmediate(is_wide, value); + if (log_imm >= 0) { + return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); } else { - r_scratch = AllocTemp(); - LoadConstant(r_scratch, value); + // When the immediate is either 0 or ~0, the logical operation can be trivially reduced + // to a - possibly negated - assignment. + if (value == 0) { + switch (op) { + case kOpOr: + case kOpXor: + // Or/Xor by zero reduces to an assignment. + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + default: + // And by zero reduces to a `mov rdest, xzr'. + DCHECK(op == kOpAnd); + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); + } + } else if (value == INT64_C(-1) + || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) { + switch (op) { + case kOpAnd: + // And by -1 reduces to an assignment. + return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + case kOpXor: + // Xor by -1 reduces to an `mvn rdest, rsrc'. + return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg()); + default: + // Or by -1 reduces to a `mvn rdest, xzr'. + DCHECK(op == kOpOr); + return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); + } + } } - if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) - res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); - else - res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); - FreeTemp(r_scratch); - return res; } + + RegStorage r_scratch; + if (is_wide) { + r_scratch = AllocTempWide(); + LoadConstantWide(r_scratch, value); + } else { + r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + } + if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); + else + res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); + FreeTemp(r_scratch); + return res; } LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index aae9155..3e03750 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -256,7 +256,7 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_temp = UpdateLoc(rl_src2); int32_t constant_value = mir_graph_->ConstantValue(rl_src2); if ((rl_temp.location == kLocDalvikFrame) && - InexpensiveConstantInt(constant_value)) { + InexpensiveConstantInt(constant_value, opcode)) { // OK - convert this to a compare immediate and branch OpCmpImmBranch(cond, rl_src1.reg, mir_graph_->ConstantValue(rl_src2), taken); return; diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 4d8b91e..e519011 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -926,11 +926,11 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::XOR_INT: case Instruction::XOR_INT_2ADDR: if (rl_src[0].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[1], mir_graph_->ConstantValue(rl_src[0].orig_sreg)); } else if (rl_src[1].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1].orig_sreg)); } else { @@ -951,7 +951,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::USHR_INT: case Instruction::USHR_INT_2ADDR: if (rl_src[1].is_const && - InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) { GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1])); } else { GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 70c17da..d8b3083 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -1445,6 +1445,9 @@ class Mir2Lir : public Backend { virtual bool InexpensiveConstantFloat(int32_t value) = 0; virtual bool InexpensiveConstantLong(int64_t value) = 0; virtual bool InexpensiveConstantDouble(int64_t value) = 0; + virtual bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { + return InexpensiveConstantInt(value); + } // May be optimized by targets. virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src); diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 45244e1..be966e1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -1171,12 +1171,13 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num } else { counts[p_map_idx].count += use_count; } - } else if (!IsInexpensiveConstant(loc)) { + } else { if (loc.wide && WideGPRsAreAliases()) { - // Longs and doubles can be counted together. i++; } - counts[p_map_idx].count += use_count; + if (!IsInexpensiveConstant(loc)) { + counts[p_map_idx].count += use_count; + } } } } @@ -1185,9 +1186,10 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num static int SortCounts(const void *val1, const void *val2) { const Mir2Lir::RefCounts* op1 = reinterpret_cast<const Mir2Lir::RefCounts*>(val1); const Mir2Lir::RefCounts* op2 = reinterpret_cast<const Mir2Lir::RefCounts*>(val2); - // Note that we fall back to sorting on reg so we get stable output - // on differing qsort implementations (such as on host and target or - // between local host and build servers). + // Note that we fall back to sorting on reg so we get stable output on differing qsort + // implementations (such as on host and target or between local host and build servers). + // Note also that if a wide val1 and a non-wide val2 have the same count, then val1 always + // ``loses'' (as STARTING_WIDE_SREG is or-ed in val1->s_reg). return (op1->count == op2->count) ? (op1->s_reg - op2->s_reg) : (op1->count < op2->count ? 1 : -1); @@ -1230,8 +1232,8 @@ void Mir2Lir::DoPromotion() { * TUNING: replace with linear scan once we have the ability * to describe register live ranges for GC. */ - size_t core_reg_count_size = cu_->target64 ? num_regs * 2 : num_regs; - size_t fp_reg_count_size = num_regs * 2; + size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2; + size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2; RefCounts *core_regs = static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size, kArenaAllocRegAlloc)); @@ -1261,7 +1263,6 @@ void Mir2Lir::DoPromotion() { // Sum use counts of SSA regs by original Dalvik vreg. CountRefs(core_regs, fp_regs, num_regs); - // Sort the count arrays qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts); qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts); |