diff options
author | Matteo Franchin <matteo.franchin@arm.com> | 2014-10-27 13:29:30 +0000 |
---|---|---|
committer | Matteo Franchin <matteo.franchin@arm.com> | 2014-11-18 10:17:14 +0000 |
commit | 65420b244f18a3492a342ee3edaefeb26aed4230 (patch) | |
tree | ad3734113baefbfcc0d58787ad82e6000823e6ef | |
parent | 27e49ba4b67b6006284edf4d52e7c498ddb37022 (diff) | |
download | art-65420b244f18a3492a342ee3edaefeb26aed4230.zip art-65420b244f18a3492a342ee3edaefeb26aed4230.tar.gz art-65420b244f18a3492a342ee3edaefeb26aed4230.tar.bz2 |
AArch64: Addressing Cortex-A53 erratum 835769.
Some early revisions of the Cortex-A53 have an erratum (835769) whereby
it is possible for a 64-bit multiply-accumulate instruction in AArch64
state to generate an incorrect result. The conditions which a portion
of code must satisfy in order for the issue to be observed are somewhat
complex, but all cases end with a memory (load, store, or prefetch)
instruction followed immediately by the multiply-accumulate operation.
This commit makes sure to insert a nop instruction before a 64-bit msub
instruction, whenever the latter is preceded by a memory instruction.
This behaviour should make it impossible for the Arm64 backend to
generate a sequence of instructions which matches the erratum
conditions.
Change-Id: I0022eccd41180183c20231dab6e2671d001a204c
-rw-r--r-- | compiler/dex/compiler_enums.h | 31 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 3 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/assemble_arm64.cc | 44 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 5 |
4 files changed, 57 insertions, 26 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 5d877fd..b56fd6f 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -610,21 +610,22 @@ std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind); // LIR fixup kinds for Arm enum FixupKind { kFixupNone, - kFixupLabel, // For labels we just adjust the offset. - kFixupLoad, // Mostly for immediates. - kFixupVLoad, // FP load which *may* be pc-relative. - kFixupCBxZ, // Cbz, Cbnz. - kFixupTBxZ, // Tbz, Tbnz. - kFixupPushPop, // Not really pc relative, but changes size based on args. - kFixupCondBranch, // Conditional branch - kFixupT1Branch, // Thumb1 Unconditional branch - kFixupT2Branch, // Thumb2 Unconditional branch - kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair). - kFixupBl1, // Bl1 (start of Bl1/Bl2 pair). - kFixupAdr, // Adr. - kFixupMovImmLST, // kThumb2MovImm16LST. - kFixupMovImmHST, // kThumb2MovImm16HST. - kFixupAlign4, // Align to 4-byte boundary. + kFixupLabel, // For labels we just adjust the offset. + kFixupLoad, // Mostly for immediates. + kFixupVLoad, // FP load which *may* be pc-relative. + kFixupCBxZ, // Cbz, Cbnz. + kFixupTBxZ, // Tbz, Tbnz. + kFixupPushPop, // Not really pc relative, but changes size based on args. + kFixupCondBranch, // Conditional branch + kFixupT1Branch, // Thumb1 Unconditional branch + kFixupT2Branch, // Thumb2 Unconditional branch + kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair). + kFixupBl1, // Bl1 (start of Bl1/Bl2 pair). + kFixupAdr, // Adr. + kFixupMovImmLST, // kThumb2MovImm16LST. + kFixupMovImmHST, // kThumb2MovImm16HST. + kFixupAlign4, // Align to 4-byte boundary. + kFixupA53Erratum835769, // Cortex A53 Erratum 835769. }; std::ostream& operator<<(std::ostream& os, const FixupKind& kind); diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 973279e..f8a7310 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -320,6 +320,7 @@ enum A64Opcode { kA64Mul3rrr, // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0]. kA64Msub4rrrr, // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0]. kA64Neg3rro, // neg alias of "sub arg0, rzr, arg1, arg2". + kA64Nop0, // nop alias of "hint #0" [11010101000000110010000000011111]. kA64Orr3Rrl, // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. kA64Orr4rrro, // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Ret, // ret [11010110010111110000001111000000]. @@ -332,7 +333,7 @@ enum A64Opcode { kA64Scvtf2fw, // scvtf [000111100s100010000000] rn[9-5] rd[4-0]. kA64Scvtf2fx, // scvtf [100111100s100010000000] rn[9-5] rd[4-0]. kA64Sdiv3rrr, // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0]. - kA64Smaddl4xwwx, // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0]. + kA64Smull3xww, // smull [10011011001] rm[20-16] [011111] rn[9-5] rd[4-0]. kA64Smulh3xxx, // smulh [10011011010] rm[20-16] [011111] rn[9-5] rd[4-0]. kA64Stp4ffXD, // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. kA64Stp4rrXD, // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 9cdabf1..cab11cc 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -14,8 +14,10 @@ * limitations under the License. */ -#include "arm64_lir.h" #include "codegen_arm64.h" + +#include "arch/arm64/instruction_set_features_arm64.h" +#include "arm64_lir.h" #include "dex/quick/mir_to_lir-inl.h" namespace art { @@ -468,13 +470,17 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, "mul", "!0r, !1r, !2r", kFixupNone), ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000), - kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 14, 10, - kFmtRegR, 20, 16, IS_QUAD_OP | REG_DEF0_USE123, - "msub", "!0r, !1r, !3r, !2r", kFixupNone), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP, + "msub", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769), ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0), kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, "neg", "!0r, !1r!2o", kFixupNone), + ENCODING_MAP(kA64Nop0, NO_VARIANTS(0xd503201f), + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "nop", "", kFixupNone), ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000), kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, @@ -523,10 +529,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, "sdiv", "!0r, !1r, !2r", kFixupNone), - ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000), + ENCODING_MAP(kA64Smull3xww, NO_VARIANTS(0x9b207c00), kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16, - kFmtRegX, 14, 10, IS_QUAD_OP | REG_DEF0_USE123, - "smaddl", "!0x, !1w, !2w, !3x", kFixupNone), + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "smull", "!0x, !1w, !2w", kFixupNone), ENCODING_MAP(kA64Smulh3xxx, NO_VARIANTS(0x9b407c00), kFmtRegX, 4, 0, kFmtRegX, 9, 5, kFmtRegX, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, @@ -988,6 +994,30 @@ void Arm64Mir2Lir::AssembleLIR() { lir->operands[1] = delta; break; } + case kFixupA53Erratum835769: + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: madd, msub, + // smaddl, smsubl, umaddl and umsubl. + if (cu_->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures() + ->NeedFixCortexA53_835769()) { + // Check that this is a 64-bit multiply-accumulate. + if (IS_WIDE(lir->opcode)) { + uint64_t prev_insn_flags = EncodingMap[UNWIDE(lir->prev->opcode)].flags; + // Check that the instruction preceding the multiply-accumulate is a load or store. + if ((prev_insn_flags & IS_LOAD) != 0 || (prev_insn_flags & IS_STORE) != 0) { + // insert a NOP between the load/store and the multiply-accumulate. + LIR* new_lir = RawLIR(lir->dalvik_offset, kA64Nop0, 0, 0, 0, 0, 0, NULL); + new_lir->offset = lir->offset; + new_lir->flags.fixup = kFixupNone; + new_lir->flags.size = EncodingMap[kA64Nop0].size; + InsertLIRBefore(lir, new_lir); + lir->offset += new_lir->flags.size; + offset_adjustment += new_lir->flags.size; + res = kRetryAll; + } + } + } + break; default: LOG(FATAL) << "Unexpected case " << lir->flags.fixup; } diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 8a5a58c..dfdb76b 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -427,8 +427,7 @@ bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_d rl_src = LoadValue(rl_src, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage r_long_mul = AllocTemp(); - NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(), - r_magic.GetReg(), rl_src.reg.GetReg(), rxzr); + NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); switch (pattern) { case Divide3: OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32); @@ -648,7 +647,7 @@ RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegS } OpRegRegReg(kOpDiv, temp, r_src1, r_src2); NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(), - r_src1.GetReg(), r_src2.GetReg()); + r_src2.GetReg(), r_src1.GetReg()); FreeTemp(temp); } return rl_result; |