summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Rogers <irogers@google.com>2014-01-28 00:29:31 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2014-01-28 00:29:31 +0000
commit7ea5dafc81b2bba7cabad26130bb75dc8f709803 (patch)
treedfd021549d31697d4c142699e38fb8fa00e64c58
parent6e65720d99bd3387b72d528a46291f1ed8184ede (diff)
parent4708dcd68eebf1173aef1097dad8ab13466059aa (diff)
downloadart-7ea5dafc81b2bba7cabad26130bb75dc8f709803.zip
art-7ea5dafc81b2bba7cabad26130bb75dc8f709803.tar.gz
art-7ea5dafc81b2bba7cabad26130bb75dc8f709803.tar.bz2
Merge "Improve x86 long multiply and shifts"
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h1
-rw-r--r--compiler/dex/quick/arm/int_arm.cc12
-rw-r--r--compiler/dex/quick/codegen_util.cc24
-rw-r--r--compiler/dex/quick/gen_common.cc20
-rw-r--r--compiler/dex/quick/mir_to_lir.h23
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc27
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h28
-rw-r--r--compiler/dex/quick/x86/int_x86.cc266
-rw-r--r--compiler/dex/quick/x86/x86_lir.h3
-rw-r--r--disassembler/disassembler_x86.cc12
10 files changed, 379 insertions, 37 deletions
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 3668dc0..32673db 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -192,7 +192,6 @@ class ArmMir2Lir : public Mir2Lir {
MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
MIR* SpecialIdentity(MIR* mir);
LIR* LoadFPConstantValue(int r_dest, int value);
- bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 71c3492..150794e 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -794,18 +794,6 @@ void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
StoreValueWide(rl_dest, rl_result);
}
-
- /*
- * Check to see if a result pair has a misaligned overlap with an operand pair. This
- * is not usual for dx to generate, but it is legal (for now). In a future rev of
- * dex, we'll want to make this case illegal.
- */
-bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
- DCHECK(rl_src.wide);
- DCHECK(rl_dest.wide);
- return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
-}
-
void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2) {
/*
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 12ecfff..1eb79c9 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1137,4 +1137,28 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) {
new_lir->next->prev = new_lir;
}
+bool Mir2Lir::IsPowerOfTwo(uint64_t x) {
+ return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+int32_t Mir2Lir::LowestSetBit(uint64_t x) {
+ int bit_posn = 0;
+ while ((x & 0xf) == 0) {
+ bit_posn += 4;
+ x >>= 4;
+ }
+ while ((x & 1) == 0) {
+ bit_posn++;
+ x >>= 1;
+ }
+ return bit_posn;
+}
+
+bool Mir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
+ DCHECK(rl_src.wide);
+ DCHECK(rl_dest.wide);
+ return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
+}
+
} // namespace art
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 1f00b2a..d8b9869 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1426,30 +1426,12 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
* or produce corresponding Thumb instructions directly.
*/
-static bool IsPowerOfTwo(int x) {
- return (x & (x - 1)) == 0;
-}
-
// Returns true if no more than two bits are set in 'x'.
static bool IsPopCountLE2(unsigned int x) {
x &= x - 1;
return (x & (x - 1)) == 0;
}
-// Returns the index of the lowest set bit in 'x'.
-static int32_t LowestSetBit(uint32_t x) {
- int bit_posn = 0;
- while ((x & 0xf) == 0) {
- bit_posn += 4;
- x >>= 4;
- }
- while ((x & 1) == 0) {
- bit_posn++;
- x >>= 1;
- }
- return bit_posn;
-}
-
// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
// and store the result in 'rl_dest'.
bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1741,7 +1723,7 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
break;
case Instruction::MUL_LONG:
case Instruction::MUL_LONG_2ADDR:
- if (cu_->instruction_set == kThumb2) {
+ if (cu_->instruction_set != kMips) {
GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
return;
} else {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 60b783d..10136b6 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -880,6 +880,29 @@ class Mir2Lir : public Backend {
CompilationUnit* GetCompilationUnit() {
return cu_;
}
+ /*
+ * @brief Returns the index of the lowest set bit in 'x'.
+ * @param x Value to be examined.
+ * @returns The bit number of the lowest bit set in the value.
+ */
+ int32_t LowestSetBit(uint64_t x);
+ /*
+ * @brief Is this value a power of two?
+ * @param x Value to be examined.
+ * @returns 'true' if only 1 bit is set in the value.
+ */
+ bool IsPowerOfTwo(uint64_t x);
+ /*
+ * @brief Do these SRs overlap?
+ * @param rl_op1 One RegLocation
+ * @param rl_op2 The other RegLocation
+ * @return 'true' if the VR pairs overlap
+ *
+ * Check to see if a result pair has a misaligned overlap with an operand pair. This
+ * is not usual for dx to generate, but it is legal (for now). In a future rev of
+ * dex, we'll want to make this case illegal.
+ */
+ bool BadOverlap(RegLocation rl_op1, RegLocation rl_op2);
/*
* @brief Force a location (in a register) into a temporary register
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 35bdb0f..c29d6c4 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -211,6 +211,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
#undef SHIFT_ENCODING_MAP
{ kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
+ { kX86Shld32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
+ { kX86Shrd32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
{ kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
{ kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -423,6 +425,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
case kThreadImm: // lir operands - 0: disp, 1: imm
return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm
+ case kRegRegImmRev:
return ComputeSize(entry, 0, 0, false);
case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm
return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
@@ -643,7 +646,6 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp)
DCHECK_NE(0x0F, entry->skeleton.opcode);
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
- DCHECK_NE(rX86_SP, base);
EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -756,6 +758,22 @@ void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
EmitImm(entry, imm);
}
+void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
+ uint8_t reg1, uint8_t reg2, int32_t imm) {
+ EmitRegRegImm(entry, reg2, reg1, imm);
+}
+
+void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
+ uint8_t reg, uint8_t base, int disp, int32_t imm) {
+ EmitPrefixAndOpcode(entry);
+ DCHECK(!X86_FPREG(reg));
+ DCHECK_LT(reg, 8);
+ EmitModrmDisp(reg, base, disp);
+ DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+ DCHECK_EQ(0, entry->skeleton.ax_opcode);
+ EmitImm(entry, imm);
+}
+
void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
if (entry->skeleton.prefix1 != 0) {
code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1187,9 +1205,16 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
case kRegRegStore: // lir operands - 0: reg2, 1: reg1
EmitRegReg(entry, lir->operands[1], lir->operands[0]);
break;
+ case kRegRegImmRev:
+ EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+ break;
case kRegRegImm:
EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
+ case kRegMemImm:
+ EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+ lir->operands[3]);
+ break;
case kRegImm: // lir operands - 0: reg, 1: immediate
EmitRegImm(entry, lir->operands[0], lir->operands[1]);
break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 9cc4efd..6280b64 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -245,6 +245,8 @@ class X86Mir2Lir : public Mir2Lir {
void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+ void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+ void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, int32_t imm);
void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
@@ -337,6 +339,32 @@ class X86Mir2Lir : public Mir2Lir {
* @param is_div 'true' if this is a division, 'false' for a remainder.
*/
RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+
+ /*
+ * Generate code to implement long shift operations.
+ * @param opcode The DEX opcode to specify the shift type.
+ * @param rl_dest The destination.
+ * @param rl_src The value to be shifted.
+ * @param shift_amount How much to shift.
+ * @returns the RegLocation of the result.
+ */
+ RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src, int shift_amount);
+ /*
+ * Generate an imul of a register by a constant or a better sequence.
+ * @param dest Destination Register.
+ * @param src Source Register.
+ * @param val Constant multiplier.
+ */
+ void GenImulRegImm(int dest, int src, int val);
+ /*
+ * Generate an imul of a memory location by a constant or a better sequence.
+ * @param dest Destination Register.
+ * @param sreg Symbolic register.
+ * @param displacement Displacement on stack of Symbolic Register.
+ * @param val Constant multiplier.
+ */
+ void GenImulMemImm(int dest, int sreg, int displacement, int val);
};
} // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 3f5f33c..e665f70 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -845,9 +845,188 @@ LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
return NULL;
}
+void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) {
+ switch (val) {
+ case 0:
+ NewLIR2(kX86Xor32RR, dest, dest);
+ break;
+ case 1:
+ OpRegCopy(dest, src);
+ break;
+ default:
+ OpRegRegImm(kOpMul, dest, src, val);
+ break;
+ }
+}
+
+void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) {
+ LIR *m;
+ switch (val) {
+ case 0:
+ NewLIR2(kX86Xor32RR, dest, dest);
+ break;
+ case 1:
+ LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg);
+ break;
+ default:
+ m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP,
+ displacement, val);
+ AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
+ break;
+ }
+}
+
void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenX86Long for x86";
+ if (rl_src1.is_const) {
+ std::swap(rl_src1, rl_src2);
+ }
+ // Are we multiplying by a constant?
+ if (rl_src2.is_const) {
+ // Do special compare/branch against simple const operand
+ int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+ if (val == 0) {
+ RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+ OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+ OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg);
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ } else if (val == 1) {
+ rl_src1 = EvalLocWide(rl_src1, kCoreReg, true);
+ StoreValueWide(rl_dest, rl_src1);
+ return;
+ } else if (val == 2) {
+ GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+ return;
+ } else if (IsPowerOfTwo(val)) {
+ int shift_amount = LowestSetBit(val);
+ if (!BadOverlap(rl_src1, rl_dest)) {
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+ RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
+ rl_src1, shift_amount);
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ }
+ }
+
+ // Okay, just bite the bullet and do it.
+ int32_t val_lo = Low32Bits(val);
+ int32_t val_hi = High32Bits(val);
+ FlushAllRegs();
+ LockCallTemps(); // Prepare for explicit register usage.
+ rl_src1 = UpdateLocWide(rl_src1);
+ bool src1_in_reg = rl_src1.location == kLocPhysReg;
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+
+ // ECX <- 1H * 2L
+ // EAX <- 1L * 2H
+ if (src1_in_reg) {
+ GenImulRegImm(r1, rl_src1.high_reg, val_lo);
+ GenImulRegImm(r0, rl_src1.low_reg, val_hi);
+ } else {
+ GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
+ GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
+ }
+
+ // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
+ NewLIR2(kX86Add32RR, r1, r0);
+
+ // EAX <- 2L
+ LoadConstantNoClobber(r0, val_lo);
+
+ // EDX:EAX <- 2L * 1L (double precision)
+ if (src1_in_reg) {
+ NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+ } else {
+ LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // EDX <- EDX + ECX (add high words)
+ NewLIR2(kX86Add32RR, r2, r1);
+
+ // Result is EDX:EAX
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+ INVALID_SREG, INVALID_SREG};
+ StoreValueWide(rl_dest, rl_result);
+ return;
+ }
+
+ // Nope. Do it the hard way
+ FlushAllRegs();
+ LockCallTemps(); // Prepare for explicit register usage.
+ rl_src1 = UpdateLocWide(rl_src1);
+ rl_src2 = UpdateLocWide(rl_src2);
+
+ // At this point, the VRs are in their home locations.
+ bool src1_in_reg = rl_src1.location == kLocPhysReg;
+ bool src2_in_reg = rl_src2.location == kLocPhysReg;
+
+ // ECX <- 1H
+ if (src1_in_reg) {
+ NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1,
+ kWord, GetSRegHi(rl_src1.s_reg_low));
+ }
+
+ // EAX <- 2H
+ if (src2_in_reg) {
+ NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0,
+ kWord, GetSRegHi(rl_src2.s_reg_low));
+ }
+
+ // EAX <- EAX * 1L (2H * 1L)
+ if (src1_in_reg) {
+ NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+ LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // ECX <- ECX * 2L (1H * 2L)
+ if (src2_in_reg) {
+ NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src2.s_reg_low);
+ LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
+ NewLIR2(kX86Add32RR, r1, r0);
+
+ // EAX <- 2L
+ if (src2_in_reg) {
+ NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg);
+ } else {
+ LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0,
+ kWord, rl_src2.s_reg_low);
+ }
+
+ // EDX:EAX <- 2L * 1L (double precision)
+ if (src1_in_reg) {
+ NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+ } else {
+ int displacement = SRegOffset(rl_src1.s_reg_low);
+ LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+ AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+ true /* is_load */, true /* is_64bit */);
+ }
+
+ // EDX <- EDX + ECX (add high words)
+ NewLIR2(kX86Add32RR, r2, r1);
+
+ // Result is EDX:EAX
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+ INVALID_SREG, INVALID_SREG};
+ StoreValueWide(rl_dest, rl_result);
}
void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
@@ -1147,10 +1326,89 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
}
}
+RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src, int shift_amount) {
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ switch (opcode) {
+ case Instruction::SHL_LONG:
+ case Instruction::SHL_LONG_2ADDR:
+ DCHECK_NE(shift_amount, 1); // Prevent a double store from happening.
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+ LoadConstant(rl_result.low_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+ FreeTemp(rl_src.high_reg);
+ NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32);
+ LoadConstant(rl_result.low_reg, 0);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount);
+ NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount);
+ }
+ break;
+ case Instruction::SHR_LONG:
+ case Instruction::SHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+ NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount);
+ }
+ break;
+ case Instruction::USHR_LONG:
+ case Instruction::USHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ LoadConstant(rl_result.high_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+ NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32);
+ LoadConstant(rl_result.high_reg, 0);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+ OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+ NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+ NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected case";
+ }
+ return rl_result;
+}
+
void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
- RegLocation rl_src1, RegLocation rl_shift) {
- // Default implementation is just to ignore the constant case.
- GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+ RegLocation rl_src, RegLocation rl_shift) {
+ // Per spec, we only care about low 6 bits of shift amount.
+ int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+ if (shift_amount == 0) {
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ StoreValueWide(rl_dest, rl_src);
+ return;
+ } else if (shift_amount == 1 &&
+ (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
+ // Need to handle this here to avoid calling StoreValueWide twice.
+ GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
+ return;
+ }
+ if (BadOverlap(rl_src, rl_dest)) {
+ GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+ return;
+ }
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
+ StoreValueWide(rl_dest, rl_result);
}
void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index e091a84..7f35d06 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -304,6 +304,8 @@ enum X86OpCode {
BinaryShiftOpCode(kX86Sar),
#undef BinaryShiftOpcode
kX86Cmc,
+ kX86Shld32RRI,
+ kX86Shrd32RRI,
#define UnaryOpcode(opcode, reg, mem, array) \
opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -399,6 +401,7 @@ enum X86EncodingKind {
kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds.
kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds.
kMovRegImm, // Shorter form move RI.
+ kRegRegImmRev, // RRI with first reg in r/m
kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate.
kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL.
kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds.
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index ef83498..6c25e0a 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -496,6 +496,18 @@ DISASSEMBLER_ENTRY(cmp,
has_modrm = true;
store = true;
break;
+ case 0xA4:
+ opcode << "shld";
+ has_modrm = true;
+ load = true;
+ immediate_bytes = 1;
+ break;
+ case 0xAC:
+ opcode << "shrd";
+ has_modrm = true;
+ load = true;
+ immediate_bytes = 1;
+ break;
case 0xAE:
if (prefix[0] == 0xF3) {
prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode