summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorMark Mendell <mark.p.mendell@intel.com>2014-02-19 20:06:20 -0800
committerIan Rogers <irogers@google.com>2014-02-20 15:46:42 -0800
commit4028a6c83a339036864999fdfd2855b012a9f1a7 (patch)
treec86f355cb39adc7a14469f0a4e5727623fbda443 /compiler
parent0b2b3dbaa3db62c0af0d2f23f6aa1c539afe7443 (diff)
downloadart-4028a6c83a339036864999fdfd2855b012a9f1a7.zip
art-4028a6c83a339036864999fdfd2855b012a9f1a7.tar.gz
art-4028a6c83a339036864999fdfd2855b012a9f1a7.tar.bz2
Inline x86 String.indexOf
Take advantage of the presence of a constant search char or start index to tune the generated code. Change-Id: I0adcf184fb91b899a95aa4d8ef044a14deb51d88 Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/quick/gen_invoke.cc8
-rw-r--r--compiler/dex/quick/mir_to_lir.h2
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc31
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h9
-rw-r--r--compiler/dex/quick/x86/target_x86.cc178
-rw-r--r--compiler/dex/quick/x86/x86_lir.h4
6 files changed, 224 insertions, 8 deletions
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 5fa4596..35d193c 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1257,17 +1257,13 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
} else {
LoadValueDirectFixed(rl_start, reg_start);
}
- int r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)) : 0;
+ int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf));
GenNullCheck(rl_obj.s_reg_low, reg_ptr, info->opt_flags);
LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info));
intrinsic_launchpads_.Insert(launch_pad);
OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, launch_pad);
// NOTE: not a safepoint
- if (cu_->instruction_set != kX86) {
- OpReg(kOpBlx, r_tgt);
- } else {
- OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pIndexOf));
- }
+ OpReg(kOpBlx, r_tgt);
LIR* resume_tgt = NewLIR0(kPseudoTargetLabel);
launch_pad->operands[2] = WrapPointer(resume_tgt);
// Record that we've already inlined & null checked
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c60c394..6e97c53 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -676,7 +676,7 @@ class Mir2Lir : public Backend {
bool GenInlinedAbsDouble(CallInfo* info);
bool GenInlinedFloatCvt(CallInfo* info);
bool GenInlinedDoubleCvt(CallInfo* info);
- bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+ virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
bool GenInlinedStringCompareTo(CallInfo* info);
bool GenInlinedCurrentThread(CallInfo* info);
bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 6481589..538ce0d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -175,6 +175,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
{ kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+ { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+
{ kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
{ kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
@@ -354,6 +356,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" },
{ kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" },
{ kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" },
+ { kX86Jecxz8, kJmp, NO_OPERAND | IS_BRANCH | NEEDS_FIXUP | REG_USEC, { 0, 0, 0xE3, 0, 0, 0, 0, 0 }, "Jecxz", "!0t" },
{ kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" },
{ kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
{ kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
@@ -364,6 +367,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
{ kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" },
{ kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr", "!0r,!1d" },
+ { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
};
static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
@@ -407,6 +411,8 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
return lir->operands[0]; // length of nop is sole operand
case kNullary:
return 1; // 1 byte of opcode
+ case kPrefix2Nullary:
+ return 3; // 1 byte of opcode + 2 prefixes
case kRegOpcode: // lir operands - 0: reg
return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm
case kReg: // lir operands - 0: reg
@@ -489,7 +495,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
return 6; // 2 byte opcode + rel32
}
case kJmp:
- if (lir->opcode == kX86Jmp8) {
+ if (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jecxz8) {
return 2; // opcode + rel8
} else if (lir->opcode == kX86Jmp32) {
return 5; // opcode + rel32
@@ -957,6 +963,10 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
code_buffer_.push_back((rel >> 8) & 0xFF);
code_buffer_.push_back((rel >> 16) & 0xFF);
code_buffer_.push_back((rel >> 24) & 0xFF);
+ } else if (entry->opcode == kX86Jecxz8) {
+ DCHECK(IS_SIMM8(rel));
+ code_buffer_.push_back(0xE3);
+ code_buffer_.push_back(rel & 0xFF);
} else {
DCHECK(entry->opcode == kX86JmpR);
code_buffer_.push_back(entry->skeleton.opcode);
@@ -1148,6 +1158,17 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
lir->operands[0] = delta;
break;
}
+ case kX86Jecxz8: {
+ LIR *target_lir = lir->target;
+ DCHECK(target_lir != NULL);
+ CodeOffset pc;
+ pc = lir->offset + 2; // opcode + rel8
+ CodeOffset target = target_lir->offset;
+ int delta = target - pc;
+ lir->operands[0] = delta;
+ DCHECK(IS_SIMM8(delta));
+ break;
+ }
case kX86Jmp8: {
LIR *target_lir = lir->target;
DCHECK(target_lir != NULL);
@@ -1226,6 +1247,14 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
break;
+ case kPrefix2Nullary: // 1 byte of opcode + 2 prefixes.
+ DCHECK_NE(0, entry->skeleton.prefix1);
+ DCHECK_NE(0, entry->skeleton.prefix2);
+ EmitPrefixAndOpcode(entry);
+ DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+ DCHECK_EQ(0, entry->skeleton.ax_opcode);
+ DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+ break;
case kRegOpcode: // lir operands - 0: reg
EmitOpRegOpcode(entry, lir->operands[0]);
break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6100a1d..421d51e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -359,6 +359,15 @@ class X86Mir2Lir : public Mir2Lir {
void GenConstWide(RegLocation rl_dest, int64_t value);
/*
+ * @brief generate inline code for fast case of Strng.indexOf.
+ * @param info Call parameters
+ * @param zero_based 'true' if the index into the string is 0.
+ * @returns 'true' if the call was inlined, 'false' if a regular call needs to be
+ * generated.
+ */
+ bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+
+ /*
* @brief Return the correct x86 opcode for the Dex operation
* @param op Dex opcode for the operation
* @param loc Register location of the operand
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index fa9a944..ad5b154 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -185,6 +185,14 @@ void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
if (flags & REG_USEB) {
SetupRegMask(&lir->u.m.use_mask, rBX);
}
+
+ // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
+ if (lir->opcode == kX86RepneScasw) {
+ SetupRegMask(&lir->u.m.use_mask, rAX);
+ SetupRegMask(&lir->u.m.use_mask, rCX);
+ SetupRegMask(&lir->u.m.use_mask, rDI);
+ SetupRegMask(&lir->u.m.def_mask, rDI);
+ }
}
/* For dumping instructions */
@@ -936,4 +944,174 @@ void X86Mir2Lir::InstallLiteralPools() {
Mir2Lir::InstallLiteralPools();
}
+// Offsets within java.lang.String.
+#define STRING_VALUE_OFFSET 8
+#define STRING_COUNT_OFFSET 12
+#define STRING_OFFSET_OFFSET 20
+#define STRING_DATA_OFFSET 12
+
+/*
+ * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff,
+ * otherwise bails to standard library code.
+ */
+bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
+ ClobberCallerSave();
+ LockCallTemps(); // Using fixed registers
+
+ // EAX: 16 bit character being searched.
+ // ECX: count: number of words to be searched.
+ // EDI: String being searched.
+ // EDX: temporary during execution.
+ // EBX: temporary during execution.
+
+ RegLocation rl_obj = info->args[0];
+ RegLocation rl_char = info->args[1];
+ RegLocation rl_start = info->args[2];
+
+ uint32_t char_value =
+ rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
+
+ if (char_value > 0xFFFF) {
+ // We have to punt to the real String.indexOf.
+ return false;
+ }
+
+ // Okay, we are commited to inlining this.
+ RegLocation rl_return = GetReturn(false);
+ RegLocation rl_dest = InlineTarget(info);
+
+ // Is the string non-NULL?
+ LoadValueDirectFixed(rl_obj, rDX);
+ GenNullCheck(rl_obj.s_reg_low, rDX, info->opt_flags);
+
+ // Record that we have inlined & null checked the object.
+ info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK);
+
+ // Does the character fit in 16 bits?
+ LIR* launch_pad = nullptr;
+ if (rl_char.is_const) {
+ // We need the value in EAX.
+ LoadConstantNoClobber(rAX, char_value);
+ } else {
+ // Character is not a constant; compare at runtime.
+ LoadValueDirectFixed(rl_char, rAX);
+ launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info));
+ intrinsic_launchpads_.Insert(launch_pad);
+ OpCmpImmBranch(kCondGt, rAX, 0xFFFF, launch_pad);
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+
+ // Character is in EAX.
+ // Object pointer is in EDX.
+
+ // We need to preserve EDI, but have no spare registers, so push it on the stack.
+ // We have to remember that all stack addresses after this are offset by sizeof(EDI).
+ NewLIR1(kX86Push32R, rDI);
+
+ // Compute the number of words to search in to rCX.
+ LoadWordDisp(rDX, STRING_COUNT_OFFSET, rCX);
+ LIR *length_compare = nullptr;
+ int start_value = 0;
+ if (zero_based) {
+ // We have to handle an empty string. Use special instruction JECXZ.
+ length_compare = NewLIR0(kX86Jecxz8);
+ } else {
+ // We have to offset by the start index.
+ if (rl_start.is_const) {
+ start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
+ start_value = std::max(start_value, 0);
+
+ // Is the start > count?
+ length_compare = OpCmpImmBranch(kCondLe, rCX, start_value, nullptr);
+
+ if (start_value != 0) {
+ OpRegImm(kOpSub, rCX, start_value);
+ }
+ } else {
+ // Runtime start index.
+ rl_start = UpdateLoc(rl_start);
+ if (rl_start.location == kLocPhysReg) {
+ length_compare = OpCmpBranch(kCondLe, rCX, rl_start.low_reg, nullptr);
+ OpRegReg(kOpSub, rCX, rl_start.low_reg);
+ } else {
+ // Compare to memory to avoid a register load. Handle pushed EDI.
+ int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
+ OpRegMem(kOpCmp, rDX, rX86_SP, displacement);
+ length_compare = NewLIR2(kX86Jcc8, 0, kX86CondLe);
+ OpRegMem(kOpSub, rCX, rX86_SP, displacement);
+ }
+ }
+ }
+ DCHECK(length_compare != nullptr);
+
+ // ECX now contains the count in words to be searched.
+
+ // Load the address of the string into EBX.
+ // The string starts at VALUE(String) + 2 * OFFSET(String) + STRING_DATA_OFFSET.
+ LoadWordDisp(rDX, STRING_VALUE_OFFSET, rDI);
+ LoadWordDisp(rDX, STRING_OFFSET_OFFSET, rBX);
+ OpLea(rBX, rDI, rBX, 1, STRING_DATA_OFFSET);
+
+ // Now compute into EDI where the search will start.
+ if (zero_based || rl_start.is_const) {
+ if (start_value == 0) {
+ OpRegCopy(rDI, rBX);
+ } else {
+ NewLIR3(kX86Lea32RM, rDI, rBX, 2 * start_value);
+ }
+ } else {
+ if (rl_start.location == kLocPhysReg) {
+ if (rl_start.low_reg == rDI) {
+ // We have a slight problem here. We are already using RDI!
+ // Grab the value from the stack.
+ LoadWordDisp(rX86_SP, 0, rDX);
+ OpLea(rDI, rBX, rDX, 1, 0);
+ } else {
+ OpLea(rDI, rBX, rl_start.low_reg, 1, 0);
+ }
+ } else {
+ OpRegCopy(rDI, rBX);
+ // Load the start index from stack, remembering that we pushed EDI.
+ int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
+ LoadWordDisp(rX86_SP, displacement, rDX);
+ OpLea(rDI, rBX, rDX, 1, 0);
+ }
+ }
+
+ // EDI now contains the start of the string to be searched.
+ // We are all prepared to do the search for the character.
+ NewLIR0(kX86RepneScasw);
+
+ // Did we find a match?
+ LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
+
+ // yes, we matched. Compute the index of the result.
+ // index = ((curr_ptr - orig_ptr) / 2) - 1.
+ OpRegReg(kOpSub, rDI, rBX);
+ OpRegImm(kOpAsr, rDI, 1);
+ NewLIR3(kX86Lea32RM, rl_return.low_reg, rDI, -1);
+ LIR *all_done = NewLIR1(kX86Jmp8, 0);
+
+ // Failed to match; return -1.
+ LIR *not_found = NewLIR0(kPseudoTargetLabel);
+ length_compare->target = not_found;
+ failed_branch->target = not_found;
+ LoadConstantNoClobber(rl_return.low_reg, -1);
+
+ // And join up at the end.
+ all_done->target = NewLIR0(kPseudoTargetLabel);
+ // Restore EDI from the stack.
+ NewLIR1(kX86Pop32R, rDI);
+
+ // Out of line code returns here.
+ if (launch_pad != nullptr) {
+ LIR *return_point = NewLIR0(kPseudoTargetLabel);
+ launch_pad->operands[2] = WrapPointer(return_point);
+ }
+
+ StoreValue(rl_dest, rl_return);
+ return true;
+}
+
} // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 480d5f5..4064bd6 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -277,6 +277,7 @@ enum X86OpCode {
kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
+ kX86Lea32RM,
kX86Lea32RA,
// RRC - Register Register ConditionCode - cond_opcode reg1, reg2
// - lir operands - 0: reg1, 1: reg2, 2: CC
@@ -384,6 +385,7 @@ enum X86OpCode {
kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned
kX86JmpR, // jmp reg; lir operands - 0: reg
+ kX86Jecxz8, // jcexz rel8; jump relative if ECX is zero.
kX86CallR, // call reg; lir operands - 0: reg
kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp
kX86CallA, // call [base + index * scale + disp]
@@ -396,6 +398,7 @@ enum X86OpCode {
kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement]
// lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
+ kX86RepneScasw, // repne scasw
kX86Last
};
@@ -404,6 +407,7 @@ enum X86EncodingKind {
kData, // Special case for raw data.
kNop, // Special case for variable length nop.
kNullary, // Opcode that takes no arguments.
+ kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes.
kRegOpcode, // Shorter form of R instruction kind (opcode+rd)
kReg, kMem, kArray, // R, M and A instruction kinds.
kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds.