/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This file contains codegen for the Thumb2 ISA. */ #include "codegen_arm.h" #include "arch/instruction_set_features.h" #include "arm_lir.h" #include "base/bit_utils.h" #include "base/logging.h" #include "dex/compiler_ir.h" #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/reg_storage_eq.h" #include "driver/compiler_driver.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array-inl.h" namespace art { LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { OpRegReg(kOpCmp, src1, src2); return OpCondBranch(cond, target); } /* * Generate a Thumb2 IT instruction, which can nullify up to * four subsequent instructions based on a condition and its * inverse. The condition applies to the first instruction, which * is executed if the condition is met. The string "guide" consists * of 0 to 3 chars, and applies to the 2nd through 4th instruction. * A "T" means the instruction is executed if the condition is * met, and an "E" means the instruction is executed if the condition * is not met. */ LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) { int mask; int mask3 = 0; int mask2 = 0; int mask1 = 0; ArmConditionCode code = ArmConditionEncoding(ccode); int cond_bit = code & 1; int alt_bit = cond_bit ^ 1; switch (strlen(guide)) { case 3: mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; FALLTHROUGH_INTENDED; case 2: mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; FALLTHROUGH_INTENDED; case 1: mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; break; case 0: break; default: LOG(FATAL) << "OAT: bad case in OpIT"; UNREACHABLE(); } mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | (1 << (3 - strlen(guide))); return NewLIR2(kThumb2It, code, mask); } void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) { int mask; int mask3 = 0; int mask2 = 0; int mask1 = 0; ArmConditionCode code = static_cast(it->operands[0]); int cond_bit = code & 1; int alt_bit = cond_bit ^ 1; switch (strlen(new_guide)) { case 3: mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit; FALLTHROUGH_INTENDED; case 2: mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit; FALLTHROUGH_INTENDED; case 1: mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit; break; case 0: break; default: LOG(FATAL) << "OAT: bad case in UpdateIT"; UNREACHABLE(); } mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | (1 << (3 - strlen(new_guide))); it->operands[1] = mask; } void ArmMir2Lir::OpEndIT(LIR* it) { // TODO: use the 'it' pointer to do some checks with the LIR, for example // we could check that the number of instructions matches the mask // in the IT instruction. CHECK(it != nullptr); GenBarrier(); } /* * 64-bit 3way compare function. * mov rX, #-1 * cmp op1hi, op2hi * blt done * bgt flip * sub rX, op1lo, op2lo (treat as unsigned) * beq done * ite hi * mov(hi) rX, #-1 * mov(!hi) rX, #1 * flip: * neg rX * done: */ void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { LIR* target1; LIR* target2; rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); RegStorage t_reg = AllocTemp(); LoadConstant(t_reg, -1); OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); LIR* branch1 = OpCondBranch(kCondLt, nullptr); LIR* branch2 = OpCondBranch(kCondGt, nullptr); OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); LIR* branch3 = OpCondBranch(kCondEq, nullptr); LIR* it = OpIT(kCondHi, "E"); NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1)); LoadConstant(t_reg, 1); OpEndIT(it); target2 = NewLIR0(kPseudoTargetLabel); OpRegReg(kOpNeg, t_reg, t_reg); target1 = NewLIR0(kPseudoTargetLabel); RegLocation rl_temp = LocCReturn(); // Just using as template, will change rl_temp.reg.SetReg(t_reg.GetReg()); StoreValue(rl_dest, rl_temp); FreeTemp(t_reg); branch1->target = target1; branch2->target = target2; branch3->target = branch1->target; } void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode) { int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); DCHECK_GE(ModifiedImmediate(val_lo), 0); DCHECK_GE(ModifiedImmediate(val_hi), 0); LIR* taken = &block_label_list_[bb->taken]; LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegStorage low_reg = rl_src1.reg.GetLow(); RegStorage high_reg = rl_src1.reg.GetHigh(); if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { RegStorage t_reg = AllocTemp(); NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0); FreeTemp(t_reg); OpCondBranch(ccode, taken); return; } switch (ccode) { case kCondEq: case kCondNe: OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken); break; case kCondLt: OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); ccode = kCondUlt; break; case kCondLe: OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); ccode = kCondLs; break; case kCondGt: OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); ccode = kCondHi; break; case kCondGe: OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); ccode = kCondUge; break; default: LOG(FATAL) << "Unexpected ccode: " << ccode; } OpCmpImmBranch(ccode, low_reg, val_lo, taken); } void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, int32_t true_val, int32_t false_val, RegStorage rs_dest, RegisterClass dest_reg_class) { UNUSED(dest_reg_class); // TODO: Generalize the IT below to accept more than one-instruction loads. DCHECK(InexpensiveConstantInt(true_val)); DCHECK(InexpensiveConstantInt(false_val)); if ((true_val == 0 && code == kCondEq) || (false_val == 0 && code == kCondNe)) { OpRegRegReg(kOpSub, rs_dest, left_op, right_op); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(kCondNe, ""); LoadConstant(rs_dest, code == kCondEq ? false_val : true_val); OpEndIT(it); return; } OpRegReg(kOpCmp, left_op, right_op); // Same? LIR* it = OpIT(code, "E"); // if-convert the test LoadConstant(rs_dest, true_val); // .eq case - load true LoadConstant(rs_dest, false_val); // .eq case - load true OpEndIT(it); } void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { UNUSED(bb); RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); // Avoid using float regs here. RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; rl_src = LoadValue(rl_src, src_reg_class); ConditionCode ccode = mir->meta.ccode; if (mir->ssa_rep->num_uses == 1) { // CONST case int true_val = mir->dalvikInsn.vB; int false_val = mir->dalvikInsn.vC; rl_result = EvalLoc(rl_dest, result_reg_class, true); // Change kCondNe to kCondEq for the special cases below. if (ccode == kCondNe) { ccode = kCondEq; std::swap(true_val, false_val); } bool cheap_false_val = InexpensiveConstantInt(false_val); if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(kCondLs, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { OpRegImm(kOpCmp, rl_src.reg, 0); LIR* it = OpIT(ccode, "E"); LoadConstant(rl_result.reg, true_val); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else { // Unlikely case - could be tuned. RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class); RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class); LoadConstant(t_reg1, true_val); LoadConstant(t_reg2, false_val); OpRegImm(kOpCmp, rl_src.reg, 0); LIR* it = OpIT(ccode, "E"); OpRegCopy(rl_result.reg, t_reg1); OpRegCopy(rl_result.reg, t_reg2); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } } else { // MOVE case RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; rl_true = LoadValue(rl_true, result_reg_class); rl_false = LoadValue(rl_false, result_reg_class); rl_result = EvalLoc(rl_dest, result_reg_class, true); OpRegImm(kOpCmp, rl_src.reg, 0); LIR* it = nullptr; if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? it = OpIT(NegateComparison(ccode), ""); OpRegCopy(rl_result.reg, rl_false.reg); } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? it = OpIT(ccode, ""); OpRegCopy(rl_result.reg, rl_true.reg); } else { // Normal - select between the two. it = OpIT(ccode, "E"); OpRegCopy(rl_result.reg, rl_true.reg); OpRegCopy(rl_result.reg, rl_false.reg); } OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } StoreValue(rl_dest, rl_result); } void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); // Normalize such that if either operand is constant, src2 will be constant. ConditionCode ccode = mir->meta.ccode; if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); ccode = FlipComparisonOrder(ccode); } if (rl_src2.is_const) { rl_src2 = UpdateLocWide(rl_src2); // Do special compare/branch against simple const operand if not already in registers. int64_t val = mir_graph_->ConstantValueWide(rl_src2); if ((rl_src2.location != kLocPhysReg) && ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); return; } } LIR* taken = &block_label_list_[bb->taken]; LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); switch (ccode) { case kCondEq: OpCondBranch(kCondNe, not_taken); break; case kCondNe: OpCondBranch(kCondNe, taken); break; case kCondLt: OpCondBranch(kCondLt, taken); OpCondBranch(kCondGt, not_taken); ccode = kCondUlt; break; case kCondLe: OpCondBranch(kCondLt, taken); OpCondBranch(kCondGt, not_taken); ccode = kCondLs; break; case kCondGt: OpCondBranch(kCondGt, taken); OpCondBranch(kCondLt, not_taken); ccode = kCondHi; break; case kCondGe: OpCondBranch(kCondGt, taken); OpCondBranch(kCondLt, not_taken); ccode = kCondUge; break; default: LOG(FATAL) << "Unexpected ccode: " << ccode; } OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); OpCondBranch(ccode, taken); } /* * Generate a register comparison to an immediate and branch. Caller * is responsible for setting branch target field. */ LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) { LIR* branch = nullptr; ArmConditionCode arm_cond = ArmConditionEncoding(cond); /* * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit * compare-and-branch if zero is ideal if it will reach. However, because null checks * branch forward to a slow path, they will frequently not reach - and thus have to * be converted to a long form during assembly (which will trigger another assembly * pass). Here we estimate the branch distance for checks, and if large directly * generate the long form in an attempt to avoid an extra assembly pass. * TODO: consider interspersing slowpaths in code following unconditional branches. */ bool skip = ((target != nullptr) && (target->opcode == kPseudoThrowTarget)); skip &= ((mir_graph_->GetNumDalvikInsns() - current_dalvik_offset_) > 64); if (!skip && reg.Low8() && (check_value == 0)) { if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) { branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, reg.GetReg(), 0); } else if (arm_cond == kArmCondLs) { // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz. // This case happens for a bounds check of array[0]. branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0); } } if (branch == nullptr) { OpRegImm(kOpCmp, reg, check_value); branch = NewLIR2(kThumbBCond, 0, arm_cond); } branch->target = target; return branch; } LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { LIR* res; int opcode; // If src or dest is a pair, we'll be using low reg. if (r_dest.IsPair()) { r_dest = r_dest.GetLow(); } if (r_src.IsPair()) { r_src = r_src.GetLow(); } if (r_dest.IsFloat() || r_src.IsFloat()) return OpFpRegCopy(r_dest, r_src); if (r_dest.Low8() && r_src.Low8()) opcode = kThumbMovRR; else if (!r_dest.Low8() && !r_src.Low8()) opcode = kThumbMovRR_H2H; else if (r_dest.Low8()) opcode = kThumbMovRR_H2L; else opcode = kThumbMovRR_L2H; res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { res->flags.is_nop = true; } return res; } void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { if (r_dest != r_src) { LIR* res = OpRegCopyNoInsert(r_dest, r_src); AppendLIR(res); } } void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { if (r_dest != r_src) { bool dest_fp = r_dest.IsFloat(); bool src_fp = r_src.IsFloat(); DCHECK(r_dest.Is64Bit()); DCHECK(r_src.Is64Bit()); // Note: If the register is get by register allocator, it should never be a pair. // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs. // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect(). if (dest_fp && r_dest.IsPair()) { r_dest = As64BitFloatReg(r_dest); } if (src_fp && r_src.IsPair()) { r_src = As64BitFloatReg(r_src); } if (dest_fp) { if (src_fp) { OpRegCopy(r_dest, r_src); } else { NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg()); } } else { if (src_fp) { NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg()); } else { // Handle overlap if (r_src.GetHighReg() == r_dest.GetLowReg()) { DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg()); OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); OpRegCopy(r_dest.GetLow(), r_src.GetLow()); } else { OpRegCopy(r_dest.GetLow(), r_src.GetLow()); OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); } } } } } // Table of magic divisors struct MagicTable { uint32_t magic; uint32_t shift; DividePattern pattern; }; static const MagicTable magic_table[] = { {0, 0, DivideNone}, // 0 {0, 0, DivideNone}, // 1 {0, 0, DivideNone}, // 2 {0x55555556, 0, Divide3}, // 3 {0, 0, DivideNone}, // 4 {0x66666667, 1, Divide5}, // 5 {0x2AAAAAAB, 0, Divide3}, // 6 {0x92492493, 2, Divide7}, // 7 {0, 0, DivideNone}, // 8 {0x38E38E39, 1, Divide5}, // 9 {0x66666667, 2, Divide5}, // 10 {0x2E8BA2E9, 1, Divide5}, // 11 {0x2AAAAAAB, 1, Divide5}, // 12 {0x4EC4EC4F, 2, Divide5}, // 13 {0x92492493, 3, Divide7}, // 14 {0x88888889, 3, Divide7}, // 15 }; // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, RegLocation rl_dest, int lit) { UNUSED(dalvik_opcode); if ((lit < 0) || (lit >= static_cast(sizeof(magic_table)/sizeof(magic_table[0])))) { return false; } DividePattern pattern = magic_table[lit].pattern; if (pattern == DivideNone) { return false; } RegStorage r_magic = AllocTemp(); LoadConstant(r_magic, magic_table[lit].magic); rl_src = LoadValue(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage r_hi = AllocTemp(); RegStorage r_lo = AllocTemp(); // rl_dest and rl_src might overlap. // Reuse r_hi to save the div result for reminder case. RegStorage r_div_result = is_div ? rl_result.reg : r_hi; NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); switch (pattern) { case Divide3: OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31)); break; case Divide5: OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, EncodeShift(kArmAsr, magic_table[lit].shift)); break; case Divide7: OpRegReg(kOpAdd, r_hi, rl_src.reg); OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, EncodeShift(kArmAsr, magic_table[lit].shift)); break; default: LOG(FATAL) << "Unexpected pattern: " << pattern; } if (!is_div) { // div_result = src / lit // tmp1 = div_result * lit // dest = src - tmp1 RegStorage tmp1 = r_lo; EasyMultiplyOp ops[2]; bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops); DCHECK_NE(canEasyMultiply, false); GenEasyMultiplyTwoOps(tmp1, r_div_result, ops); OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1); } StoreValue(rl_dest, rl_result); return true; } // Try to convert *lit to 1 RegRegRegShift/RegRegShift form. bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) { if (lit == 0) { // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will // have been optimized away earlier. op->op = kOpInvalid; op->shift = 0; return true; } if (IsPowerOfTwo(lit)) { op->op = kOpLsl; op->shift = CTZ(lit); return true; } if (IsPowerOfTwo(lit - 1)) { op->op = kOpAdd; op->shift = CTZ(lit - 1); return true; } if (IsPowerOfTwo(lit + 1)) { op->op = kOpRsub; op->shift = CTZ(lit + 1); return true; } op->op = kOpInvalid; op->shift = 0; return false; } // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms. bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { if (GetEasyMultiplyOp(lit, &ops[0])) { ops[1].op = kOpInvalid; ops[1].shift = 0; return true; } int lit1 = lit; uint32_t shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpLsl; ops[1].shift = shift; return true; } lit1 = lit - 1; shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpAdd; ops[1].shift = shift; return true; } lit1 = lit + 1; shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpRsub; ops[1].shift = shift; return true; } ops[1].op = kOpInvalid; ops[1].shift = 0; return false; } // Generate instructions to do multiply. // Additional temporary register is required, // if it need to generate 2 instructions and src/dest overlap. void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) { // tmp1 = ( src << shift1) + [ src | -src | 0 ] // dest = (tmp1 << shift2) + [ src | -src | 0 ] RegStorage r_tmp1; if (ops[1].op == kOpInvalid) { r_tmp1 = r_dest; } else if (r_dest.GetReg() != r_src.GetReg()) { r_tmp1 = r_dest; } else { r_tmp1 = AllocTemp(); } switch (ops[0].op) { case kOpLsl: OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift); break; case kOpAdd: OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); break; case kOpRsub: OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); break; default: DCHECK_EQ(ops[0].op, kOpInvalid); break; } switch (ops[1].op) { case kOpInvalid: return; case kOpLsl: OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift); break; case kOpAdd: OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); break; case kOpRsub: OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); break; default: LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps"; break; } } bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { EasyMultiplyOp ops[2]; if (!GetEasyMultiplyTwoOps(lit, ops)) { return false; } rl_src = LoadValue(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops); StoreValue(rl_dest, rl_result); return true; } RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) { UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags); LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; UNREACHABLE(); } RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) { UNUSED(rl_dest, rl_src1, lit, is_div); LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; UNREACHABLE(); } RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // Put the literal in a temp. RegStorage lit_temp = AllocTemp(); LoadConstant(lit_temp, lit); // Use the generic case for div/rem with arg2 in a register. // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); FreeTemp(lit_temp); return rl_result; } RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2, bool is_div) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_div) { // Simple case, use sdiv instruction. OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2); } else { // Remainder case, use the following code: // temp = reg1 / reg2 - integer division // temp = temp * reg2 // dest = reg1 - temp RegStorage temp = AllocTemp(); OpRegRegReg(kOpDiv, temp, reg1, reg2); OpRegReg(kOpMul, temp, reg2); OpRegRegReg(kOpSub, rl_result.reg, reg1, temp); FreeTemp(temp); } return rl_result; } bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { DCHECK_EQ(cu_->instruction_set, kThumb2); if (is_long) { return false; } RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[1]; rl_src1 = LoadValue(rl_src1, kCoreReg); rl_src2 = LoadValue(rl_src2, kCoreReg); RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E"); OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); OpEndIT(it); StoreValue(rl_dest, rl_result); return true; } bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_dest = InlineTarget(info); RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (size == k64) { // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); } else { Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); } StoreValueWide(rl_dest, rl_result); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); StoreValue(rl_dest, rl_result); } return true; } bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); if (size == k64) { // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile); StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); } return true; } // Generate a CAS with memory_order_seq_cst semantics. bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { DCHECK_EQ(cu_->instruction_set, kThumb2); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] RegLocation rl_dest = InlineTarget(info); // boolean place for result // We have only 5 temporary registers available and actually only 4 if the InlineTarget // above locked one of the temps. For a straightforward CAS64 we need 7 registers: // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop // into the same temps, reducing the number of required temps down to 5. We shall work // around the potentially locked temp by using LR for r_ptr, unconditionally. // TODO: Pass information about the need for more temps to the stack frame generation // code so that we can rely on being able to allocate enough temps. DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp()); MarkTemp(rs_rARM_LR); FreeTemp(rs_rARM_LR); LockTemp(rs_rARM_LR); bool load_early = true; if (is_long) { RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() : rl_src_expected.reg; RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() : rl_src_new_value.reg; bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat(); bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat(); bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg); bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg); if (!expected_is_good_reg && !new_value_is_good_reg) { // None of expected/new_value is non-temp reg, need to load both late load_early = false; // Make sure they are not in the temp regs and the load will not be skipped. if (expected_is_core_reg) { FlushRegWide(rl_src_expected.reg); ClobberSReg(rl_src_expected.s_reg_low); ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); rl_src_expected.location = kLocDalvikFrame; } if (new_value_is_core_reg) { FlushRegWide(rl_src_new_value.reg); ClobberSReg(rl_src_new_value.s_reg_low); ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); rl_src_new_value.location = kLocDalvikFrame; } } } // Prevent reordering with prior memory operations. GenMemBarrier(kAnyStore); RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_new_value; if (!is_long) { rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); } else if (load_early) { rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); } if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { // Mark card for object assuming new value is stored. MarkGCCard(0, rl_new_value.reg, rl_object.reg); } RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); RegStorage r_ptr = rs_rARM_LR; OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); // Free now unneeded rl_object and rl_offset to give more temps. ClobberSReg(rl_object.s_reg_low); FreeTemp(rl_object.reg); ClobberSReg(rl_offset.s_reg_low); FreeTemp(rl_offset.reg); RegLocation rl_expected; if (!is_long) { rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg); } else if (load_early) { rl_expected = LoadValueWide(rl_src_expected, kCoreReg); } else { // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. RegStorage low_reg = AllocTemp(); RegStorage high_reg = AllocTemp(); rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg); rl_expected = rl_new_value; } // do { // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); // result = tmp != 0; RegStorage r_tmp = AllocTemp(); LIR* target = NewLIR0(kPseudoTargetLabel); LIR* it = nullptr; if (is_long) { RegStorage r_tmp_high = AllocTemp(); if (!load_early) { LoadValueDirectWide(rl_src_expected, rl_expected.reg); } NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); if (!load_early) { LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); } // Make sure we use ORR that sets the ccode if (r_tmp.Low8() && r_tmp_high.Low8()) { NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg()); } else { NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0); } FreeTemp(r_tmp_high); // Now unneeded DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); } else { NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); OpRegReg(kOpSub, r_tmp, rl_expected.reg); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); } // Still one conditional left from OpIT(kCondEq, "T") from either branch OpRegImm(kOpCmp /* eq */, r_tmp, 1); OpEndIT(it); OpCondBranch(kCondEq, target); if (!load_early) { FreeTemp(rl_expected.reg); // Now unneeded. } // Prevent reordering with subsequent memory operations. GenMemBarrier(kLoadAny); // result := (tmp1 != 0) ? 0 : 1; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondUlt, ""); LoadConstant(rl_result.reg, 0); /* cc */ FreeTemp(r_tmp); // Now unneeded. OpEndIT(it); // Barrier to terminate OpIT. StoreValue(rl_dest, rl_result); // Now, restore lr to its non-temp status. Clobber(rs_rARM_LR); UnmarkTemp(rs_rARM_LR); return true; } bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { constexpr int kLargeArrayThreshold = 256; RegLocation rl_src = info->args[0]; RegLocation rl_src_pos = info->args[1]; RegLocation rl_dst = info->args[2]; RegLocation rl_dst_pos = info->args[3]; RegLocation rl_length = info->args[4]; // Compile time check, handle exception by non-inline method to reduce related meta-data. if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) || (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) || (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) { return false; } ClobberCallerSave(); LockCallTemps(); // Prepare for explicit register usage. LockTemp(rs_r12); RegStorage rs_src = rs_r0; RegStorage rs_dst = rs_r1; LoadValueDirectFixed(rl_src, rs_src); LoadValueDirectFixed(rl_dst, rs_dst); // Handle null pointer exception in slow-path. LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr); LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr); // Handle potential overlapping in slow-path. LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr); // Handle exception or big length in slow-path. RegStorage rs_length = rs_r2; LoadValueDirectFixed(rl_length, rs_length); LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr); // Src bounds check. RegStorage rs_pos = rs_r3; RegStorage rs_arr_length = rs_r12; LoadValueDirectFixed(rl_src_pos, rs_pos); LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr); Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); OpRegReg(kOpSub, rs_arr_length, rs_pos); LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); // Dst bounds check. LoadValueDirectFixed(rl_dst_pos, rs_pos); LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr); Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); OpRegReg(kOpSub, rs_arr_length, rs_pos); LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); // Everything is checked now. OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value()); OpRegReg(kOpAdd, rs_dst, rs_pos); OpRegReg(kOpAdd, rs_dst, rs_pos); OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value()); LoadValueDirectFixed(rl_src_pos, rs_pos); OpRegReg(kOpAdd, rs_src, rs_pos); OpRegReg(kOpAdd, rs_src, rs_pos); RegStorage rs_tmp = rs_pos; OpRegRegImm(kOpLsl, rs_length, rs_length, 1); // Copy one element. OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2); LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); OpRegImm(kOpSub, rs_length, 2); LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf); StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf); // Copy two elements. LIR *begin_loop = NewLIR0(kPseudoTargetLabel); LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr); OpRegImm(kOpSub, rs_length, 4); LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32); StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32); OpUnconditionalBranch(begin_loop); LIR *check_failed = NewLIR0(kPseudoTargetLabel); LIR* launchpad_branch = OpUnconditionalBranch(nullptr); LIR* return_point = NewLIR0(kPseudoTargetLabel); src_check_branch->target = check_failed; dst_check_branch->target = check_failed; src_dst_same->target = check_failed; len_neg_or_too_big->target = check_failed; src_pos_negative->target = check_failed; src_bad_len->target = check_failed; dst_pos_negative->target = check_failed; dst_bad_len->target = check_failed; jmp_to_begin_loop->target = begin_loop; jmp_to_ret->target = return_point; AddIntrinsicSlowPath(info, launchpad_branch, return_point); ClobberCallerSave(); // We must clobber everything because slow path will return here return true; } void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* lir = NewLIR2(kThumb2LdrPcRel12, reg.GetReg(), 0); lir->target = target; } bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { return dex_cache_arrays_layout_.Valid(); } void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) { LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0); LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0); ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH; LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg()); add_pc->flags.fixup = kFixupLabel; movw->operands[2] = WrapPointer(dex_file); movw->operands[3] = offset; movw->operands[4] = WrapPointer(add_pc); movt->operands[2] = movw->operands[2]; movt->operands[3] = movw->operands[3]; movt->operands[4] = movw->operands[4]; dex_cache_access_insns_.push_back(movw); dex_cache_access_insns_.push_back(movt); } void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, bool wide) { DCHECK(!wide) << "Unsupported"; if (dex_cache_arrays_base_reg_.Valid()) { LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_, r_dest, kNotVolatile); } else { OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest); LoadRefDisp(r_dest, 0, r_dest, kNotVolatile); } } LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) { return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count); } LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) { return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count); } void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, RegLocation rl_src3, bool is_sub) { rl_src1 = LoadValue(rl_src1, kCoreReg); rl_src2 = LoadValue(rl_src2, kCoreReg); rl_src3 = LoadValue(rl_src3, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); StoreValue(rl_dest, rl_result); } void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) { UNUSED(lit); OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kArmLsl, second_bit - first_bit)); if (first_bit != 0) { OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); } } void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) { DCHECK(reg.IsPair()); // TODO: support k64BitSolo. RegStorage t_reg = AllocTemp(); NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0); FreeTemp(t_reg); GenDivZeroCheck(kCondEq); } // Test suspend flag, return target of taken suspend branch LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { #ifdef ARM_R4_SUSPEND_FLAG NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1); return OpCondBranch((target == nullptr) ? kCondEq : kCondNe, target); #else RegStorage t_reg = AllocTemp(); LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(), t_reg, kUnsignedHalf, kNotVolatile); LIR* cmp_branch = OpCmpImmBranch((target == nullptr) ? kCondNe : kCondEq, t_reg, 0, target); FreeTemp(t_reg); return cmp_branch; #endif } // Decrement register and branch on condition LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { // Combine sub & test using sub setflags encoding here OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); return OpCondBranch(c_code, target); } bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) { return false; } // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. LIR* barrier = last_lir_insn_; int dmb_flavor; // TODO: revisit Arm barrier kinds switch (barrier_kind) { case kAnyStore: dmb_flavor = kISH; break; case kLoadAny: dmb_flavor = kISH; break; case kStoreStore: dmb_flavor = kISHST; break; case kAnyAny: dmb_flavor = kISH; break; default: LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; dmb_flavor = kSY; // quiet gcc. break; } bool ret = false; // If the same barrier already exists, don't generate another. if (barrier == nullptr || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { barrier = NewLIR1(kThumb2Dmb, dmb_flavor); ret = true; } // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. DCHECK(!barrier->flags.use_def_invalid); barrier->u.m.def_mask = &kEncodeAll; return ret; } void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage z_reg = AllocTemp(); LoadConstantNoClobber(z_reg, 0); // Check for destructive overlap if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) { RegStorage t_reg = AllocTemp(); OpRegCopy(t_reg, rl_result.reg.GetLow()); OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg); FreeTemp(t_reg); } else { OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh()); } FreeTemp(z_reg); StoreValueWide(rl_dest, rl_result); } void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { UNUSED(opcode); /* * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed * dest = src1.lo * src2.lo; * tmp1 += src1.lo * src2.hi; * dest.hi += tmp1; * * To pull off inline multiply, we have a worst-case requirement of 7 temporary * registers. Normally for Arm, we get 5. We can get to 6 by including * lr in the temp set. The only problematic case is all operands and result are * distinct, and none have been promoted. In that case, we can succeed by aggressively * freeing operand temp registers after they are no longer needed. All other cases * can proceed normally. We'll just punt on the case of the result having a misaligned * overlap with either operand and send that case to a runtime handler. */ RegLocation rl_result; if (PartiallyIntersects(rl_src1, rl_dest) || (PartiallyIntersects(rl_src2, rl_dest))) { FlushAllRegs(); CallRuntimeHelperRegLocationRegLocation(kQuickLmul, rl_src1, rl_src2, false); rl_result = GetReturnWide(kCoreReg); StoreValueWide(rl_dest, rl_result); return; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); int reg_status = 0; RegStorage res_lo; RegStorage res_hi; bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() && !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh()); bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh()); bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh()); // Check if rl_dest is *not* either operand and we have enough temp registers. if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) && (dest_promoted || src1_promoted || src2_promoted)) { // In this case, we do not need to manually allocate temp registers for result. rl_result = EvalLoc(rl_dest, kCoreReg, true); res_lo = rl_result.reg.GetLow(); res_hi = rl_result.reg.GetHigh(); } else { res_lo = AllocTemp(); if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) { // In this case, we have enough temp registers to be allocated for result. res_hi = AllocTemp(); reg_status = 1; } else { // In this case, all temps are now allocated. // res_hi will be allocated after we can free src1_hi. reg_status = 2; } } // Temporarily add LR to the temp pool, and assign it to tmp1 MarkTemp(rs_rARM_LR); FreeTemp(rs_rARM_LR); RegStorage tmp1 = rs_rARM_LR; LockTemp(rs_rARM_LR); if (rl_src1.reg == rl_src2.reg) { DCHECK(res_hi.Valid()); DCHECK(res_lo.Valid()); NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetLowReg()); OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); } else { NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg()); if (reg_status == 2) { DCHECK(!res_hi.Valid()); DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg()); DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); // Will force free src1_hi, so must clobber. Clobber(rl_src1.reg); FreeTemp(rl_src1.reg.GetHigh()); res_hi = AllocTemp(); } DCHECK(res_hi.Valid()); DCHECK(res_lo.Valid()); NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetLowReg()); NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(), tmp1.GetReg()); NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0); if (reg_status == 2) { FreeTemp(rl_src1.reg.GetLow()); } } if (reg_status != 0) { // We had manually allocated registers for rl_result. // Now construct a RegLocation. rl_result = GetReturnWide(kCoreReg); // Just using as a template. rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi); } // Free tmp1 but keep LR as temp for StoreValueWide() if needed. FreeTemp(tmp1); StoreValueWide(rl_dest, rl_result); // Now, restore lr to its non-temp status. Clobber(rs_rARM_LR); UnmarkTemp(rs_rARM_LR); } void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, int flags) { switch (opcode) { case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: GenMulLong(opcode, rl_dest, rl_src1, rl_src2); return; case Instruction::NEG_LONG: GenNegLong(rl_dest, rl_src2); return; default: break; } // Fallback for all other ops. Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); } /* * Generate array load */ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = RegClassBySize(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; RegLocation rl_result; bool constant_index = rl_index.is_const; rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } if (rl_dest.wide) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } // If index is constant, just fold it into the data offset if (constant_index) { data_offset += mir_graph_->ConstantValue(rl_index) << scale; } /* null object? */ GenNullCheck(rl_array.reg, opt_flags); bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); RegStorage reg_len; if (needs_range_check) { reg_len = AllocTemp(); /* Get len */ Load32Disp(rl_array.reg, len_offset, reg_len); MarkPossibleNullPointerException(opt_flags); } else { ForceImplicitNullCheck(rl_array.reg, opt_flags); } if (rl_dest.wide || rl_dest.fp || constant_index) { RegStorage reg_ptr; if (constant_index) { reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case. } else { // No special indexed operation, lea + load w/ displacement reg_ptr = AllocTempRef(); OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); FreeTemp(rl_index.reg); } rl_result = EvalLoc(rl_dest, reg_class, true); if (needs_range_check) { if (constant_index) { GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); } else { GenArrayBoundsCheck(rl_index.reg, reg_len); } FreeTemp(reg_len); } LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile); if (!constant_index) { FreeTemp(reg_ptr); } if (rl_dest.wide) { StoreValueWide(rl_dest, rl_result); } else { StoreValue(rl_dest, rl_result); } } else { // Offset base, then use indexed load RegStorage reg_ptr = AllocTempRef(); OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); FreeTemp(rl_array.reg); rl_result = EvalLoc(rl_dest, reg_class, true); if (needs_range_check) { GenArrayBoundsCheck(rl_index.reg, reg_len); FreeTemp(reg_len); } LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size); FreeTemp(reg_ptr); StoreValue(rl_dest, rl_result); } } /* * Generate array store * */ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = RegClassBySize(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); bool constant_index = rl_index.is_const; int data_offset; if (size == k64 || size == kDouble) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } // If index is constant, just fold it into the data offset. if (constant_index) { data_offset += mir_graph_->ConstantValue(rl_index) << scale; } rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } RegStorage reg_ptr; bool allocated_reg_ptr_temp = false; if (constant_index) { reg_ptr = rl_array.reg; } else if (IsTemp(rl_array.reg) && !card_mark) { Clobber(rl_array.reg); reg_ptr = rl_array.reg; } else { allocated_reg_ptr_temp = true; reg_ptr = AllocTempRef(); } /* null object? */ GenNullCheck(rl_array.reg, opt_flags); bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); RegStorage reg_len; if (needs_range_check) { reg_len = AllocTemp(); // NOTE: max live temps(4) here. /* Get len */ Load32Disp(rl_array.reg, len_offset, reg_len); MarkPossibleNullPointerException(opt_flags); } else { ForceImplicitNullCheck(rl_array.reg, opt_flags); } /* at this point, reg_ptr points to array, 2 live temps */ if (rl_src.wide || rl_src.fp || constant_index) { if (rl_src.wide) { rl_src = LoadValueWide(rl_src, reg_class); } else { rl_src = LoadValue(rl_src, reg_class); } if (!constant_index) { OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); } if (needs_range_check) { if (constant_index) { GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); } else { GenArrayBoundsCheck(rl_index.reg, reg_len); } FreeTemp(reg_len); } StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile); } else { /* reg_ptr -> array data */ OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); rl_src = LoadValue(rl_src, reg_class); if (needs_range_check) { GenArrayBoundsCheck(rl_index.reg, reg_len); FreeTemp(reg_len); } StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size); } if (allocated_reg_ptr_temp) { FreeTemp(reg_ptr); } if (card_mark) { MarkGCCard(opt_flags, rl_src.reg, rl_array.reg); } } void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift, int flags) { UNUSED(flags); rl_src = LoadValueWide(rl_src, kCoreReg); // Per spec, we only care about low 6 bits of shift amount. int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; if (shift_amount == 0) { StoreValueWide(rl_dest, rl_src); return; } if (PartiallyIntersects(rl_src, rl_dest)) { GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); return; } RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); switch (opcode) { case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: if (shift_amount == 1) { OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow()); OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh()); } else if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg); LoadConstant(rl_result.reg.GetLow(), 0); } else if (shift_amount > 31) { OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32); LoadConstant(rl_result.reg.GetLow(), 0); } else { OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(), EncodeShift(kArmLsr, 32 - shift_amount)); OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount); } break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); } else if (shift_amount > 31) { OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); } else { RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), EncodeShift(kArmLsl, 32 - shift_amount)); FreeTemp(t_reg); OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); } break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: if (shift_amount == 32) { OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); LoadConstant(rl_result.reg.GetHigh(), 0); } else if (shift_amount > 31) { OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); LoadConstant(rl_result.reg.GetHigh(), 0); } else { RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), EncodeShift(kArmLsl, 32 - shift_amount)); FreeTemp(t_reg); OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); } break; default: LOG(FATAL) << "Unexpected case"; } StoreValueWide(rl_dest, rl_result); } void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, int flags) { if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { if (!rl_src2.is_const) { // Don't bother with special handling for subtract from immediate. GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); return; } } else { // Normalize if (!rl_src2.is_const) { DCHECK(rl_src1.is_const); std::swap(rl_src1, rl_src2); } } if (PartiallyIntersects(rl_src1, rl_dest)) { GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); return; } DCHECK(rl_src2.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src2); uint32_t val_lo = Low32Bits(val); uint32_t val_hi = High32Bits(val); int32_t mod_imm_lo = ModifiedImmediate(val_lo); int32_t mod_imm_hi = ModifiedImmediate(val_hi); // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit switch (opcode) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); return; } break; default: break; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // NOTE: once we've done the EvalLoc on dest, we can no longer bail. switch (opcode) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); break; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); } if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); } break; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); break; case Instruction::AND_LONG: case Instruction::AND_LONG_2ADDR: if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); } if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); } break; case Instruction::SUB_LONG_2ADDR: case Instruction::SUB_LONG: NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); break; default: LOG(FATAL) << "Unexpected opcode " << opcode; } StoreValueWide(rl_dest, rl_result); } bool ArmMir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, RegLocation rl_dest, int lit) { if (lit < 2) { return false; } // ARM does either not support a division instruction, or it is potentially expensive. Look for // more special cases. if (!IsPowerOfTwo(lit)) { return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit); } return Mir2Lir::HandleEasyDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit); } } // namespace art