diff options
author | Ian Rogers <irogers@google.com> | 2014-01-28 00:29:31 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-01-28 00:29:31 +0000 |
commit | 7ea5dafc81b2bba7cabad26130bb75dc8f709803 (patch) | |
tree | dfd021549d31697d4c142699e38fb8fa00e64c58 | |
parent | 6e65720d99bd3387b72d528a46291f1ed8184ede (diff) | |
parent | 4708dcd68eebf1173aef1097dad8ab13466059aa (diff) | |
download | art-7ea5dafc81b2bba7cabad26130bb75dc8f709803.zip art-7ea5dafc81b2bba7cabad26130bb75dc8f709803.tar.gz art-7ea5dafc81b2bba7cabad26130bb75dc8f709803.tar.bz2 |
Merge "Improve x86 long multiply and shifts"
-rw-r--r-- | compiler/dex/quick/arm/codegen_arm.h | 1 | ||||
-rw-r--r-- | compiler/dex/quick/arm/int_arm.cc | 12 | ||||
-rw-r--r-- | compiler/dex/quick/codegen_util.cc | 24 | ||||
-rw-r--r-- | compiler/dex/quick/gen_common.cc | 20 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 23 | ||||
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 27 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 28 | ||||
-rw-r--r-- | compiler/dex/quick/x86/int_x86.cc | 266 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 3 | ||||
-rw-r--r-- | disassembler/disassembler_x86.cc | 12 |
10 files changed, 379 insertions, 37 deletions
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 3668dc0..32673db 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -192,7 +192,6 @@ class ArmMir2Lir : public Mir2Lir { MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object); MIR* SpecialIdentity(MIR* mir); LIR* LoadFPConstantValue(int r_dest, int value); - bool BadOverlap(RegLocation rl_src, RegLocation rl_dest); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void AssignDataOffsets(); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 71c3492..150794e 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -794,18 +794,6 @@ void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } - - /* - * Check to see if a result pair has a misaligned overlap with an operand pair. This - * is not usual for dx to generate, but it is legal (for now). In a future rev of - * dex, we'll want to make this case illegal. - */ -bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) { - DCHECK(rl_src.wide); - DCHECK(rl_dest.wide); - return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1); -} - void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { /* diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 12ecfff..1eb79c9 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1137,4 +1137,28 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) { new_lir->next->prev = new_lir; } +bool Mir2Lir::IsPowerOfTwo(uint64_t x) { + return (x & (x - 1)) == 0; +} + +// Returns the index of the lowest set bit in 'x'. +int32_t Mir2Lir::LowestSetBit(uint64_t x) { + int bit_posn = 0; + while ((x & 0xf) == 0) { + bit_posn += 4; + x >>= 4; + } + while ((x & 1) == 0) { + bit_posn++; + x >>= 1; + } + return bit_posn; +} + +bool Mir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) { + DCHECK(rl_src.wide); + DCHECK(rl_dest.wide); + return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1); +} + } // namespace art diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 1f00b2a..d8b9869 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1426,30 +1426,12 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, * or produce corresponding Thumb instructions directly. */ -static bool IsPowerOfTwo(int x) { - return (x & (x - 1)) == 0; -} - // Returns true if no more than two bits are set in 'x'. static bool IsPopCountLE2(unsigned int x) { x &= x - 1; return (x & (x - 1)) == 0; } -// Returns the index of the lowest set bit in 'x'. -static int32_t LowestSetBit(uint32_t x) { - int bit_posn = 0; - while ((x & 0xf) == 0) { - bit_posn += 4; - x >>= 4; - } - while ((x & 1) == 0) { - bit_posn++; - x >>= 1; - } - return bit_posn; -} - // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit' // and store the result in 'rl_dest'. bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, @@ -1741,7 +1723,7 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, break; case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: - if (cu_->instruction_set == kThumb2) { + if (cu_->instruction_set != kMips) { GenMulLong(opcode, rl_dest, rl_src1, rl_src2); return; } else { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 60b783d..10136b6 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -880,6 +880,29 @@ class Mir2Lir : public Backend { CompilationUnit* GetCompilationUnit() { return cu_; } + /* + * @brief Returns the index of the lowest set bit in 'x'. + * @param x Value to be examined. + * @returns The bit number of the lowest bit set in the value. + */ + int32_t LowestSetBit(uint64_t x); + /* + * @brief Is this value a power of two? + * @param x Value to be examined. + * @returns 'true' if only 1 bit is set in the value. + */ + bool IsPowerOfTwo(uint64_t x); + /* + * @brief Do these SRs overlap? + * @param rl_op1 One RegLocation + * @param rl_op2 The other RegLocation + * @return 'true' if the VR pairs overlap + * + * Check to see if a result pair has a misaligned overlap with an operand pair. This + * is not usual for dx to generate, but it is legal (for now). In a future rev of + * dex, we'll want to make this case illegal. + */ + bool BadOverlap(RegLocation rl_op1, RegLocation rl_op2); /* * @brief Force a location (in a register) into a temporary register diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 35bdb0f..c29d6c4 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -211,6 +211,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, #undef SHIFT_ENCODING_MAP { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" }, + { kX86Shld32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" }, + { kX86Shrd32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" }, { kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" }, { kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" }, @@ -423,6 +425,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kThreadImm: // lir operands - 0: disp, 1: imm return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm + case kRegRegImmRev: return ComputeSize(entry, 0, 0, false); case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm return ComputeSize(entry, lir->operands[1], lir->operands[2], false); @@ -643,7 +646,6 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - DCHECK_NE(rX86_SP, base); EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); @@ -756,6 +758,22 @@ void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, EmitImm(entry, imm); } +void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry, + uint8_t reg1, uint8_t reg2, int32_t imm) { + EmitRegRegImm(entry, reg2, reg1, imm); +} + +void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry, + uint8_t reg, uint8_t base, int disp, int32_t imm) { + EmitPrefixAndOpcode(entry); + DCHECK(!X86_FPREG(reg)); + DCHECK_LT(reg, 8); + EmitModrmDisp(reg, base, disp); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + EmitImm(entry, imm); +} + void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); @@ -1187,9 +1205,16 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kRegRegStore: // lir operands - 0: reg2, 1: reg1 EmitRegReg(entry, lir->operands[1], lir->operands[0]); break; + case kRegRegImmRev: + EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]); + break; case kRegRegImm: EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]); break; + case kRegMemImm: + EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3]); + break; case kRegImm: // lir operands - 0: reg, 1: immediate EmitRegImm(entry, lir->operands[0], lir->operands[1]); break; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 9cc4efd..6280b64 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -245,6 +245,8 @@ class X86Mir2Lir : public Mir2Lir { void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp); void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2); void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm); + void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm); + void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, int32_t imm); void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm); void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); @@ -337,6 +339,32 @@ class X86Mir2Lir : public Mir2Lir { * @param is_div 'true' if this is a division, 'false' for a remainder. */ RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div); + + /* + * Generate code to implement long shift operations. + * @param opcode The DEX opcode to specify the shift type. + * @param rl_dest The destination. + * @param rl_src The value to be shifted. + * @param shift_amount How much to shift. + * @returns the RegLocation of the result. + */ + RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src, int shift_amount); + /* + * Generate an imul of a register by a constant or a better sequence. + * @param dest Destination Register. + * @param src Source Register. + * @param val Constant multiplier. + */ + void GenImulRegImm(int dest, int src, int val); + /* + * Generate an imul of a memory location by a constant or a better sequence. + * @param dest Destination Register. + * @param sreg Symbolic register. + * @param displacement Displacement on stack of Symbolic Register. + * @param val Constant multiplier. + */ + void GenImulMemImm(int dest, int sreg, int displacement, int val); }; } // namespace art diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 3f5f33c..e665f70 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -845,9 +845,188 @@ LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { return NULL; } +void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) { + switch (val) { + case 0: + NewLIR2(kX86Xor32RR, dest, dest); + break; + case 1: + OpRegCopy(dest, src); + break; + default: + OpRegRegImm(kOpMul, dest, src, val); + break; + } +} + +void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) { + LIR *m; + switch (val) { + case 0: + NewLIR2(kX86Xor32RR, dest, dest); + break; + case 1: + LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg); + break; + default: + m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP, + displacement, val); + AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); + break; + } +} + void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenX86Long for x86"; + if (rl_src1.is_const) { + std::swap(rl_src1, rl_src2); + } + // Are we multiplying by a constant? + if (rl_src2.is_const) { + // Do special compare/branch against simple const operand + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + if (val == 0) { + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); + OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg); + StoreValueWide(rl_dest, rl_result); + return; + } else if (val == 1) { + rl_src1 = EvalLocWide(rl_src1, kCoreReg, true); + StoreValueWide(rl_dest, rl_src1); + return; + } else if (val == 2) { + GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); + return; + } else if (IsPowerOfTwo(val)) { + int shift_amount = LowestSetBit(val); + if (!BadOverlap(rl_src1, rl_dest)) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, + rl_src1, shift_amount); + StoreValueWide(rl_dest, rl_result); + return; + } + } + + // Okay, just bite the bullet and do it. + int32_t val_lo = Low32Bits(val); + int32_t val_hi = High32Bits(val); + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage. + rl_src1 = UpdateLocWide(rl_src1); + bool src1_in_reg = rl_src1.location == kLocPhysReg; + int displacement = SRegOffset(rl_src1.s_reg_low); + + // ECX <- 1H * 2L + // EAX <- 1L * 2H + if (src1_in_reg) { + GenImulRegImm(r1, rl_src1.high_reg, val_lo); + GenImulRegImm(r0, rl_src1.low_reg, val_hi); + } else { + GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); + GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); + } + + // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) + NewLIR2(kX86Add32RR, r1, r0); + + // EAX <- 2L + LoadConstantNoClobber(r0, val_lo); + + // EDX:EAX <- 2L * 1L (double precision) + if (src1_in_reg) { + NewLIR1(kX86Mul32DaR, rl_src1.low_reg); + } else { + LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + + // EDX <- EDX + ECX (add high words) + NewLIR2(kX86Add32RR, r2, r1); + + // Result is EDX:EAX + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); + return; + } + + // Nope. Do it the hard way + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage. + rl_src1 = UpdateLocWide(rl_src1); + rl_src2 = UpdateLocWide(rl_src2); + + // At this point, the VRs are in their home locations. + bool src1_in_reg = rl_src1.location == kLocPhysReg; + bool src2_in_reg = rl_src2.location == kLocPhysReg; + + // ECX <- 1H + if (src1_in_reg) { + NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg); + } else { + LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1, + kWord, GetSRegHi(rl_src1.s_reg_low)); + } + + // EAX <- 2H + if (src2_in_reg) { + NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg); + } else { + LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0, + kWord, GetSRegHi(rl_src2.s_reg_low)); + } + + // EAX <- EAX * 1L (2H * 1L) + if (src1_in_reg) { + NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg); + } else { + int displacement = SRegOffset(rl_src1.s_reg_low); + LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + + // ECX <- ECX * 2L (1H * 2L) + if (src2_in_reg) { + NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); + } else { + int displacement = SRegOffset(rl_src2.s_reg_low); + LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + + // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) + NewLIR2(kX86Add32RR, r1, r0); + + // EAX <- 2L + if (src2_in_reg) { + NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg); + } else { + LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0, + kWord, rl_src2.s_reg_low); + } + + // EDX:EAX <- 2L * 1L (double precision) + if (src1_in_reg) { + NewLIR1(kX86Mul32DaR, rl_src1.low_reg); + } else { + int displacement = SRegOffset(rl_src1.s_reg_low); + LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + + // EDX <- EDX + ECX (add high words) + NewLIR2(kX86Add32RR, r2, r1); + + // Result is EDX:EAX + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, @@ -1147,10 +1326,89 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } } +RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src, int shift_amount) { + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. + if (shift_amount == 32) { + OpRegCopy(rl_result.high_reg, rl_src.low_reg); + LoadConstant(rl_result.low_reg, 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.high_reg, rl_src.low_reg); + FreeTemp(rl_src.high_reg); + NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32); + LoadConstant(rl_result.low_reg, 0); + } else { + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + OpRegCopy(rl_result.high_reg, rl_src.high_reg); + NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount); + NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + OpRegCopy(rl_result.high_reg, rl_src.high_reg); + NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + OpRegCopy(rl_result.high_reg, rl_src.high_reg); + NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32); + NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); + } else { + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + OpRegCopy(rl_result.high_reg, rl_src.high_reg); + NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); + NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + LoadConstant(rl_result.high_reg, 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32); + LoadConstant(rl_result.high_reg, 0); + } else { + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + OpRegCopy(rl_result.high_reg, rl_src.high_reg); + NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); + NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + } + return rl_result; +} + void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_shift) { - // Default implementation is just to ignore the constant case. - GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); + RegLocation rl_src, RegLocation rl_shift) { + // Per spec, we only care about low 6 bits of shift amount. + int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; + if (shift_amount == 0) { + rl_src = LoadValueWide(rl_src, kCoreReg); + StoreValueWide(rl_dest, rl_src); + return; + } else if (shift_amount == 1 && + (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { + // Need to handle this here to avoid calling StoreValueWide twice. + GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); + return; + } + if (BadOverlap(rl_src, rl_dest)) { + GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); + return; + } + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); + StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index e091a84..7f35d06 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -304,6 +304,8 @@ enum X86OpCode { BinaryShiftOpCode(kX86Sar), #undef BinaryShiftOpcode kX86Cmc, + kX86Shld32RRI, + kX86Shrd32RRI, #define UnaryOpcode(opcode, reg, mem, array) \ opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \ opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \ @@ -399,6 +401,7 @@ enum X86EncodingKind { kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. kMovRegImm, // Shorter form move RI. + kRegRegImmRev, // RRI with first reg in r/m kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate. kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL. kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index ef83498..6c25e0a 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -496,6 +496,18 @@ DISASSEMBLER_ENTRY(cmp, has_modrm = true; store = true; break; + case 0xA4: + opcode << "shld"; + has_modrm = true; + load = true; + immediate_bytes = 1; + break; + case 0xAC: + opcode << "shrd"; + has_modrm = true; + load = true; + immediate_bytes = 1; + break; case 0xAE: if (prefix[0] == 0xF3) { prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode |