From a262f7707330dccfb50af6345813083182b61043 Mon Sep 17 00:00:00 2001 From: Ningsheng Jian Date: Tue, 25 Nov 2014 16:48:07 +0800 Subject: ARM: Combine multiply accumulate operations. Try to combine integer multiply and add(sub) into a MAC operation. For AArch64, also try to combine long type multiply and add(sub). Change-Id: Ic85812e941eb5a66abc355cab81a4dd16de1b66e --- compiler/dex/compiler_enums.h | 28 ++++ compiler/dex/mir_dataflow.cc | 58 +++++++- compiler/dex/mir_graph.cc | 30 +++++ compiler/dex/mir_graph.h | 28 ++++ compiler/dex/mir_optimization.cc | 205 +++++++++++++++++++++++++++++ compiler/dex/quick/arm/arm_lir.h | 11 +- compiler/dex/quick/arm/assemble_arm.cc | 4 + compiler/dex/quick/arm/codegen_arm.h | 4 + compiler/dex/quick/arm/int_arm.cc | 11 ++ compiler/dex/quick/arm/target_arm.cc | 26 ++++ compiler/dex/quick/arm64/arm64_lir.h | 1 + compiler/dex/quick/arm64/assemble_arm64.cc | 4 + compiler/dex/quick/arm64/codegen_arm64.h | 6 + compiler/dex/quick/arm64/int_arm64.cc | 28 +++- compiler/dex/quick/arm64/target_arm64.cc | 31 +++++ disassembler/disassembler_arm.cc | 2 +- test/704-multiply-accumulate/expected.txt | 1 + test/704-multiply-accumulate/info.txt | 1 + test/704-multiply-accumulate/src/Main.java | 171 ++++++++++++++++++++++++ 19 files changed, 639 insertions(+), 11 deletions(-) create mode 100644 test/704-multiply-accumulate/expected.txt create mode 100644 test/704-multiply-accumulate/info.txt create mode 100644 test/704-multiply-accumulate/src/Main.java diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index a3fe8ad..7ff06a0 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -311,6 +311,34 @@ enum ExtendedMIROpcode { // arg[0]: TypeSize (most other vector opcodes have this in vC) kMirOpPackedArrayPut, + // @brief Multiply-add integer. + // vA: destination + // vB: multiplicand + // vC: multiplier + // arg[0]: addend + kMirOpMaddInt, + + // @brief Multiply-subtract integer. + // vA: destination + // vB: multiplicand + // vC: multiplier + // arg[0]: minuend + kMirOpMsubInt, + + // @brief Multiply-add long. + // vA: destination + // vB: multiplicand + // vC: multiplier + // arg[0]: addend + kMirOpMaddLong, + + // @brief Multiply-subtract long. + // vA: destination + // vB: multiplicand + // vC: multiplier + // arg[0]: minuend + kMirOpMsubLong, + kMirOpLast, }; diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 64895d8..6704112 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -897,6 +897,18 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 120 MirOpPackedArrayPut DF_UB | DF_UC | DF_NULL_CHK_B | DF_RANGE_CHK_C | DF_REF_B | DF_CORE_C | DF_LVN, + + // 121 MirOpMaddInt + DF_FORMAT_EXTENDED, + + // 122 MirOpMsubInt + DF_FORMAT_EXTENDED, + + // 123 MirOpMaddLong + DF_FORMAT_EXTENDED, + + // 124 MirOpMsubLong + DF_FORMAT_EXTENDED, }; /* Return the base virtual register for a SSA name */ @@ -906,7 +918,7 @@ int MIRGraph::SRegToVReg(int ssa_reg) const { /* Any register that is used before being defined is considered live-in */ void MIRGraph::HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v, - ArenaBitVector* live_in_v, int dalvik_reg_id) { + ArenaBitVector* live_in_v, int dalvik_reg_id) { use_v->SetBit(dalvik_reg_id); if (!def_v->IsBitSet(dalvik_reg_id)) { live_in_v->SetBit(dalvik_reg_id); @@ -919,8 +931,8 @@ void MIRGraph::HandleDef(ArenaBitVector* def_v, int dalvik_reg_id) { } void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v, - ArenaBitVector* live_in_v, - const MIR::DecodedInstruction& d_insn) { + ArenaBitVector* live_in_v, + const MIR::DecodedInstruction& d_insn) { // For vector MIRs, vC contains type information bool is_vector_type_wide = false; int type_size = d_insn.vC >> 16; @@ -951,6 +963,24 @@ void MIRGraph::HandleExtended(ArenaBitVector* use_v, ArenaBitVector* def_v, HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB + 1); } break; + case kMirOpMaddInt: + case kMirOpMsubInt: + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0]); + HandleDef(def_v, d_insn.vA); + break; + case kMirOpMaddLong: + case kMirOpMsubLong: + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vB + 1); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.vC + 1); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0]); + HandleLiveInUse(use_v, def_v, live_in_v, d_insn.arg[0] + 1); + HandleDef(def_v, d_insn.vA); + HandleDef(def_v, d_insn.vA + 1); + break; default: LOG(ERROR) << "Unexpected Extended Opcode " << d_insn.opcode; break; @@ -1139,6 +1169,28 @@ void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) { HandleSSAUse(mir->ssa_rep->uses, d_insn.vB + 1, 1); } break; + case kMirOpMaddInt: + case kMirOpMsubInt: + AllocateSSAUseData(mir, 3); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vB, 0); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vC, 1); + HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0], 2); + AllocateSSADefData(mir, 1); + HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0); + break; + case kMirOpMaddLong: + case kMirOpMsubLong: + AllocateSSAUseData(mir, 6); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vB, 0); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vB + 1, 1); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vC, 2); + HandleSSAUse(mir->ssa_rep->uses, d_insn.vC + 1, 3); + HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0], 4); + HandleSSAUse(mir->ssa_rep->uses, d_insn.arg[0] + 1, 5); + AllocateSSADefData(mir, 2); + HandleSSADef(mir->ssa_rep->defs, d_insn.vA, 0); + HandleSSADef(mir->ssa_rep->defs, d_insn.vA + 1, 1); + break; default: LOG(ERROR) << "Missing case for extended MIR: " << mir->dalvikInsn.opcode; break; diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index d7ecb2c..71ad635 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -70,6 +70,10 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { "MemBarrier", "PackedArrayGet", "PackedArrayPut", + "MaddInt", + "MsubInt", + "MaddLong", + "MsubLong", }; MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) @@ -1386,6 +1390,27 @@ void MIRGraph::DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir } FillTypeSizeString(mir->dalvikInsn.arg[0], decoded_mir); break; + case kMirOpMaddInt: + case kMirOpMsubInt: + case kMirOpMaddLong: + case kMirOpMsubLong: + if (ssa_rep != nullptr) { + decoded_mir->append(" "); + decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[0], false)); + if (defs > 1) { + decoded_mir->append(", "); + decoded_mir->append(GetSSANameWithConst(ssa_rep->defs[1], false)); + } + for (int i = 0; i < uses; i++) { + decoded_mir->append(", "); + decoded_mir->append(GetSSANameWithConst(ssa_rep->uses[i], false)); + } + } else { + decoded_mir->append(StringPrintf(" v%d, v%d, v%d, v%d", + mir->dalvikInsn.vA, mir->dalvikInsn.vB, + mir->dalvikInsn.vC, mir->dalvikInsn.arg[0])); + } + break; default: break; } @@ -2459,6 +2484,11 @@ int MIR::DecodedInstruction::FlagsOf() const { return Instruction::kContinue | Instruction::kThrow; case kMirOpPackedArrayPut: return Instruction::kContinue | Instruction::kThrow; + case kMirOpMaddInt: + case kMirOpMsubInt: + case kMirOpMaddLong: + case kMirOpMsubLong: + return Instruction::kContinue; default: LOG(WARNING) << "ExtendedFlagsOf: Unhandled case: " << static_cast (opcode); return 0; diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 9890690..851ca15 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -1265,6 +1265,34 @@ class MIRGraph { void ComputeDomPostOrderTraversal(BasicBlock* bb); int GetSSAUseCount(int s_reg); bool BasicBlockOpt(BasicBlock* bb); + void MultiplyAddOpt(BasicBlock* bb); + + /** + * @brief Check whether the given MIR is possible to throw an exception. + * @param mir The mir to check. + * @return Returns 'true' if the given MIR might throw an exception. + */ + bool CanThrow(MIR* mir); + /** + * @brief Combine multiply and add/sub MIRs into corresponding extended MAC MIR. + * @param mul_mir The multiply MIR to be combined. + * @param add_mir The add/sub MIR to be combined. + * @param mul_is_first_addend 'true' if multiply product is the first addend of add operation. + * @param is_wide 'true' if the operations are long type. + * @param is_sub 'true' if it is a multiply-subtract operation. + */ + void CombineMultiplyAdd(MIR* mul_mir, MIR* add_mir, bool mul_is_first_addend, + bool is_wide, bool is_sub); + /* + * @brief Check whether the first MIR anti-depends on the second MIR. + * @details To check whether one of first MIR's uses of vregs is redefined by the second MIR, + * i.e. there is a write-after-read dependency. + * @param first The first MIR. + * @param second The second MIR. + * @param Returns true if there is a write-after-read dependency. + */ + bool HasAntiDependency(MIR* first, MIR* second); + bool BuildExtendedBBList(class BasicBlock* bb); bool FillDefBlockMatrix(BasicBlock* bb); void InitializeDominationInfo(BasicBlock* bb); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index fc96075..f78b39f 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -426,6 +426,10 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { if (bb->block_type == kDead) { return true; } + // Currently multiply-accumulate backend supports are only available on arm32 and arm64. + if (cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2) { + MultiplyAddOpt(bb); + } bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kLocalValueNumbering)) == 0u; std::unique_ptr allocator; std::unique_ptr global_valnum; @@ -1709,4 +1713,205 @@ void MIRGraph::EliminateSuspendChecksEnd() { temp_.sce.inliner = nullptr; } +bool MIRGraph::CanThrow(MIR* mir) { + if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { + return false; + } + const int opt_flags = mir->optimization_flags; + uint64_t df_attributes = GetDataFlowAttributes(mir); + + if (((df_attributes & DF_HAS_NULL_CHKS) != 0) && ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0)) { + return true; + } + if ((df_attributes & DF_IFIELD) != 0) { + // The IGET/IPUT family. + const MirIFieldLoweringInfo& field_info = GetIFieldLoweringInfo(mir); + bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); + // Already processed null check above. + if (fast) { + return false; + } + } else if ((df_attributes & DF_HAS_RANGE_CHKS) != 0) { + // The AGET/APUT family. + // Already processed null check above. + if ((opt_flags & MIR_IGNORE_RANGE_CHECK) != 0) { + return false; + } + } else if ((df_attributes & DF_SFIELD) != 0) { + // The SGET/SPUT family. + const MirSFieldLoweringInfo& field_info = GetSFieldLoweringInfo(mir); + bool fast = (df_attributes & DF_DA) ? field_info.FastGet() : field_info.FastPut(); + bool is_class_initialized = field_info.IsClassInitialized() || + ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) != 0); + if (fast && is_class_initialized) { + return false; + } + } + return true; +} + +bool MIRGraph::HasAntiDependency(MIR* first, MIR* second) { + DCHECK(first->ssa_rep != nullptr); + DCHECK(second->ssa_rep != nullptr); + if ((second->ssa_rep->num_defs > 0) && (first->ssa_rep->num_uses > 0)) { + int vreg0 = SRegToVReg(second->ssa_rep->defs[0]); + int vreg1 = (second->ssa_rep->num_defs == 2) ? + SRegToVReg(second->ssa_rep->defs[1]) : INVALID_VREG; + for (int i = 0; i < first->ssa_rep->num_uses; i++) { + int32_t use = SRegToVReg(first->ssa_rep->uses[i]); + if (use == vreg0 || use == vreg1) { + return true; + } + } + } + return false; +} + +void MIRGraph::CombineMultiplyAdd(MIR* mul_mir, MIR* add_mir, bool mul_is_first_addend, + bool is_wide, bool is_sub) { + if (is_wide) { + if (is_sub) { + add_mir->dalvikInsn.opcode = static_cast(kMirOpMsubLong); + } else { + add_mir->dalvikInsn.opcode = static_cast(kMirOpMaddLong); + } + } else { + if (is_sub) { + add_mir->dalvikInsn.opcode = static_cast(kMirOpMsubInt); + } else { + add_mir->dalvikInsn.opcode = static_cast(kMirOpMaddInt); + } + } + add_mir->ssa_rep->num_uses = is_wide ? 6 : 3; + int32_t addend0 = INVALID_SREG; + int32_t addend1 = INVALID_SREG; + if (is_wide) { + addend0 = mul_is_first_addend ? add_mir->ssa_rep->uses[2] : add_mir->ssa_rep->uses[0]; + addend1 = mul_is_first_addend ? add_mir->ssa_rep->uses[3] : add_mir->ssa_rep->uses[1]; + } else { + addend0 = mul_is_first_addend ? add_mir->ssa_rep->uses[1] : add_mir->ssa_rep->uses[0]; + } + + AllocateSSAUseData(add_mir, add_mir->ssa_rep->num_uses); + add_mir->ssa_rep->uses[0] = mul_mir->ssa_rep->uses[0]; + add_mir->ssa_rep->uses[1] = mul_mir->ssa_rep->uses[1]; + // Clear the original multiply product ssa use count, as it is not used anymore. + raw_use_counts_[mul_mir->ssa_rep->defs[0]] = 0; + use_counts_[mul_mir->ssa_rep->defs[0]] = 0; + if (is_wide) { + DCHECK_EQ(add_mir->ssa_rep->num_uses, 6); + add_mir->ssa_rep->uses[2] = mul_mir->ssa_rep->uses[2]; + add_mir->ssa_rep->uses[3] = mul_mir->ssa_rep->uses[3]; + add_mir->ssa_rep->uses[4] = addend0; + add_mir->ssa_rep->uses[5] = addend1; + raw_use_counts_[mul_mir->ssa_rep->defs[1]] = 0; + use_counts_[mul_mir->ssa_rep->defs[1]] = 0; + } else { + DCHECK_EQ(add_mir->ssa_rep->num_uses, 3); + add_mir->ssa_rep->uses[2] = addend0; + } + // Copy in the decoded instruction information. + add_mir->dalvikInsn.vB = SRegToVReg(add_mir->ssa_rep->uses[0]); + if (is_wide) { + add_mir->dalvikInsn.vC = SRegToVReg(add_mir->ssa_rep->uses[2]); + add_mir->dalvikInsn.arg[0] = SRegToVReg(add_mir->ssa_rep->uses[4]); + } else { + add_mir->dalvikInsn.vC = SRegToVReg(add_mir->ssa_rep->uses[1]); + add_mir->dalvikInsn.arg[0] = SRegToVReg(add_mir->ssa_rep->uses[2]); + } + // Original multiply MIR is set to Nop. + mul_mir->dalvikInsn.opcode = static_cast(kMirOpNop); +} + +void MIRGraph::MultiplyAddOpt(BasicBlock* bb) { + if (bb->block_type == kDead) { + return; + } + ScopedArenaAllocator allocator(&cu_->arena_stack); + ScopedArenaSafeMap ssa_mul_map(std::less(), allocator.Adapter()); + ScopedArenaSafeMap::iterator map_it; + for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { + Instruction::Code opcode = mir->dalvikInsn.opcode; + bool is_sub = true; + bool is_candidate_multiply = false; + switch (opcode) { + case Instruction::MUL_INT: + case Instruction::MUL_INT_2ADDR: + is_candidate_multiply = true; + break; + case Instruction::MUL_LONG: + case Instruction::MUL_LONG_2ADDR: + if (cu_->target64) { + is_candidate_multiply = true; + } + break; + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + is_sub = false; + FALLTHROUGH_INTENDED; + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + if (((map_it = ssa_mul_map.find(mir->ssa_rep->uses[0])) != ssa_mul_map.end()) && !is_sub) { + // a*b+c + CombineMultiplyAdd(map_it->second, mir, true /* product is the first addend */, + false /* is_wide */, false /* is_sub */); + ssa_mul_map.erase(mir->ssa_rep->uses[0]); + } else if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[1])) != ssa_mul_map.end()) { + // c+a*b or c-a*b + CombineMultiplyAdd(map_it->second, mir, false /* product is the second addend */, + false /* is_wide */, is_sub); + ssa_mul_map.erase(map_it); + } + break; + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + is_sub = false; + FALLTHROUGH_INTENDED; + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if (!cu_->target64) { + break; + } + if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[0])) != ssa_mul_map.end() && !is_sub) { + // a*b+c + CombineMultiplyAdd(map_it->second, mir, true /* product is the first addend */, + true /* is_wide */, false /* is_sub */); + ssa_mul_map.erase(map_it); + } else if ((map_it = ssa_mul_map.find(mir->ssa_rep->uses[2])) != ssa_mul_map.end()) { + // c+a*b or c-a*b + CombineMultiplyAdd(map_it->second, mir, false /* product is the second addend */, + true /* is_wide */, is_sub); + ssa_mul_map.erase(map_it); + } + break; + default: + if (!ssa_mul_map.empty() && CanThrow(mir)) { + // Should not combine multiply and add MIRs across potential exception. + ssa_mul_map.clear(); + } + break; + } + + // Exclude the case when an MIR writes a vreg which is previous candidate multiply MIR's uses. + // It is because that current RA may allocate the same physical register to them. For this + // kind of cases, the multiplier has been updated, we should not use updated value to the + // multiply-add insn. + if (ssa_mul_map.size() > 0) { + for (auto it = ssa_mul_map.begin(); it != ssa_mul_map.end();) { + MIR* mul = it->second; + if (HasAntiDependency(mul, mir)) { + it = ssa_mul_map.erase(it); + } else { + ++it; + } + } + } + + if (is_candidate_multiply && + (GetRawUseCount(mir->ssa_rep->defs[0]) == 1) && (mir->next != nullptr)) { + ssa_mul_map.Put(mir->ssa_rep->defs[0], mir); + } + } +} + } // namespace art diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index b9d9a11..5d09ae1 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -481,10 +481,10 @@ enum ArmOpcode { kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0]. - kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0]. - kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0]. - kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0]. + kThumb2LslRRI5, // lsl [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [00] rm[3..0]. + kThumb2LsrRRI5, // lsr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [01] rm[3..0]. + kThumb2AsrRRI5, // asr [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [10] rm[3..0]. + kThumb2RorRRI5, // ror [11101010010011110] imm3[14..12] rd[11..8] imm2[7..6] [11] rm[3..0]. kThumb2BicRRI8M, // bic rd, rn, # [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2AndRRI8M, // and rd, rn, # [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. kThumb2OrrRRI8M, // orr rd, rn, # [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. @@ -512,7 +512,8 @@ enum ArmOpcode { kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0]. kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0]. - kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0]. + kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[11-8] [0000] rm[3-0]. + kThumb2Mls, // mls [111110110000] rn[19-16] ra[15-12] rd[11-8] [0001] rm[3-0]. kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0]. kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111]. diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index de93e26..65fb3cd 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -896,6 +896,10 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123, "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), + ENCODING_MAP(kThumb2Mls, 0xfb000010, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123, + "mls", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Umull, 0xfba00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 0ae7ee3..fa8dfe3 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -182,6 +182,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub); // Required for target - single operation generators. LIR* OpUnconditionalBranch(LIR* target); @@ -259,6 +261,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; size_t GetInstructionOffset(LIR* lir); + void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE; + private: void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 1a7b439..fe1d126 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -1075,6 +1075,17 @@ LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) { return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count); } +void ArmMir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub) { + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + rl_src3 = LoadValue(rl_src3, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR4(is_sub ? kThumb2Mls : kThumb2Mla, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), + rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); + StoreValue(rl_dest, rl_result); +} + void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) { diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 7190a49..d374353 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -948,4 +948,30 @@ int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) { return count; } +void ArmMir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { + UNUSED(bb); + DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)); + RegLocation rl_src[3]; + RegLocation rl_dest = mir_graph_->GetBadLoc(); + rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc(); + switch (static_cast(mir->dalvikInsn.opcode)) { + case kMirOpMaddInt: + rl_dest = mir_graph_->GetDest(mir); + rl_src[0] = mir_graph_->GetSrc(mir, 0); + rl_src[1] = mir_graph_->GetSrc(mir, 1); + rl_src[2]= mir_graph_->GetSrc(mir, 2); + GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], false); + break; + case kMirOpMsubInt: + rl_dest = mir_graph_->GetDest(mir); + rl_src[0] = mir_graph_->GetSrc(mir, 0); + rl_src[1] = mir_graph_->GetSrc(mir, 1); + rl_src[2]= mir_graph_->GetSrc(mir, 2); + GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], true); + break; + default: + LOG(FATAL) << "Unexpected opcode: " << mir->dalvikInsn.opcode; + } +} + } // namespace art diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index f8a7310..943c5c1 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -312,6 +312,7 @@ enum A64Opcode { kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0]. kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}". kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0]. + kA64Madd4rrrr, // madd[s0011011000] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0]. kA64Movk3rdM, // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0]. kA64Movn3rdM, // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0]. kA64Movz3rdM, // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index cab11cc..1fed5da 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -445,6 +445,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, "lsr", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Madd4rrrr), SF_VARIANTS(0x1b000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP, + "madd", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769), ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000), kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0, diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 766ac23..55866e2 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -193,6 +193,10 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; + void GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub); + void GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub); // Required for target - single operation generators. LIR* OpUnconditionalBranch(LIR* target) OVERRIDE; @@ -226,6 +230,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { bool InexpensiveConstantLong(int64_t value) OVERRIDE; bool InexpensiveConstantDouble(int64_t value) OVERRIDE; + void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) OVERRIDE; + bool WideGPRsAreAliases() const OVERRIDE { return true; // 64b architecture. } diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 57e67d5..5ac2aa0 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -949,10 +949,33 @@ LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) { UNREACHABLE(); } +void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub) { + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + rl_src3 = LoadValue(rl_src3, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), + rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); + StoreValue(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + RegLocation rl_src3, bool is_sub) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + rl_src3 = LoadValueWide(rl_src3, kCoreReg); + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(), + rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); +} + void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit ATTRIBUTE_UNUSED, int first_bit, int second_bit) { - OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit)); + OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, + EncodeShift(kA64Lsl, second_bit - first_bit)); if (first_bit != 0) { OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); } @@ -1686,7 +1709,8 @@ bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { RegLocation rl_src_i = info->args[0]; RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info); // result reg RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - RegLocation rl_i = IsWide(size) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg); + RegLocation rl_i = IsWide(size) ? + LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg); NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg()); IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result); return true; diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index e7fa8ed..030c5ed 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -849,4 +849,35 @@ int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int c return count; } +void Arm64Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { + UNUSED(bb); + DCHECK(MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)); + RegLocation rl_src[3]; + RegLocation rl_dest = mir_graph_->GetBadLoc(); + rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc(); + ExtendedMIROpcode opcode = static_cast(mir->dalvikInsn.opcode); + switch (opcode) { + case kMirOpMaddInt: + case kMirOpMsubInt: + rl_dest = mir_graph_->GetDest(mir); + rl_src[0] = mir_graph_->GetSrc(mir, 0); + rl_src[1] = mir_graph_->GetSrc(mir, 1); + rl_src[2]= mir_graph_->GetSrc(mir, 2); + GenMaddMsubInt(rl_dest, rl_src[0], rl_src[1], rl_src[2], + (opcode == kMirOpMsubInt) ? true : false); + break; + case kMirOpMaddLong: + case kMirOpMsubLong: + rl_dest = mir_graph_->GetDestWide(mir); + rl_src[0] = mir_graph_->GetSrcWide(mir, 0); + rl_src[1] = mir_graph_->GetSrcWide(mir, 2); + rl_src[2] = mir_graph_->GetSrcWide(mir, 4); + GenMaddMsubLong(rl_dest, rl_src[0], rl_src[1], rl_src[2], + (opcode == kMirOpMsubLong) ? true : false); + break; + default: + LOG(FATAL) << "Unexpected opcode: " << static_cast(opcode); + } +} + } // namespace art diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 9243b1a..52fd736 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -1498,7 +1498,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } else if ((op2 >> 3) == 6) { // 0110xxx // Multiply, multiply accumulate, and absolute difference op1 = (instr >> 20) & 0x7; - op2 = (instr >> 4) & 0x2; + op2 = (instr >> 4) & 0x1; ArmRegister Ra(instr, 12); ArmRegister Rn(instr, 16); ArmRegister Rm(instr, 0); diff --git a/test/704-multiply-accumulate/expected.txt b/test/704-multiply-accumulate/expected.txt new file mode 100644 index 0000000..76f5a5a --- /dev/null +++ b/test/704-multiply-accumulate/expected.txt @@ -0,0 +1 @@ +Done! diff --git a/test/704-multiply-accumulate/info.txt b/test/704-multiply-accumulate/info.txt new file mode 100644 index 0000000..a12fd44 --- /dev/null +++ b/test/704-multiply-accumulate/info.txt @@ -0,0 +1 @@ +Tests for multiply accumulate operations. diff --git a/test/704-multiply-accumulate/src/Main.java b/test/704-multiply-accumulate/src/Main.java new file mode 100644 index 0000000..7404b9b --- /dev/null +++ b/test/704-multiply-accumulate/src/Main.java @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + static int imax = Integer.MAX_VALUE; + static int imin = Integer.MIN_VALUE; + static long lmax = Long.MAX_VALUE; + static long lmin = Long.MIN_VALUE; + static CA ca; + + public static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void test_int() { + int result = 0; + int a = imax; + int b = imin; + int c = 10; + int d = c; + int tmp = 0; + int [] ia = new int[5]; + for (int i = 0; i < 100; i++) { + tmp = i*c; + result += i*i; + result = i - tmp; + } + expectEquals(result, -891); + + result = c*c + (result - c); + expectEquals(result, -801); + + result = a + a*a; + expectEquals(result, -2147483648); + + result = b + b*b; + expectEquals(result, -2147483648); + + result = b - a*a; + expectEquals(result, 2147483647); + + result = d*d; + d++; + result += result; + expectEquals(result, 200); + + result = c*c; + tmp++; + result += result; + expectEquals(result, 200); + + result = 0; + try { + result = c*c; + ia[c] = d; // array out of bound. + result += d; + } catch (Exception e) { + } + expectEquals(result, 100); + + CA obj = new CA(); + result = a*c + obj.ia; + expectEquals(result, 2); + + result = 0; + obj = ca; + try { + result = a*c; + tmp = obj.ia; + result = result + tmp; + } catch (Exception e) { + } + expectEquals(result, -10); + } + + public static void test_long() { + long result = 0; + long a = lmax; + long b = lmin; + long c = 10; + long d = c; + long tmp = 0; + int [] ia = new int[5]; + for (long i = 0; i < 100; i++) { + tmp = i*c; + result += i*i; + result = i - tmp; + } + expectEquals(result, -891L); + + result = c*c + (result - c); + expectEquals(result, -801L); + + result = a + a*a; + expectEquals(result, -9223372036854775808L); + + result = b + b*b; + expectEquals(result, -9223372036854775808L); + + result = b - a*a; + expectEquals(result, 9223372036854775807L); + + result = d*d; + d++; + result += result; + expectEquals(result, 200L); + + result = c*c; + tmp++; + result += result; + expectEquals(result, 200L); + + result = 0; + int index = 10; + try { + result = c*c; + ia[index] = 10; // array out of bound. + result += d; + } catch (Exception e) { + } + expectEquals(result, 100L); + + CA obj = new CA(); + result = a*c + obj.la; + expectEquals(result, 113L); + + result = 0; + obj = ca; + try { + result = a*c; + tmp = obj.la; + result = result + tmp; + } catch (Exception e) { + } + expectEquals(result, -10L); + } + + public static void main(String[] args) { + test_int(); + test_long(); + System.out.println("Done!"); + } + +} + +class CA { + public int ia = 12; + public long la = 123L; +} -- cgit v1.1