diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2014-02-19 20:06:20 -0800 |
---|---|---|
committer | Ian Rogers <irogers@google.com> | 2014-02-20 15:46:42 -0800 |
commit | 4028a6c83a339036864999fdfd2855b012a9f1a7 (patch) | |
tree | c86f355cb39adc7a14469f0a4e5727623fbda443 /compiler | |
parent | 0b2b3dbaa3db62c0af0d2f23f6aa1c539afe7443 (diff) | |
download | art-4028a6c83a339036864999fdfd2855b012a9f1a7.zip art-4028a6c83a339036864999fdfd2855b012a9f1a7.tar.gz art-4028a6c83a339036864999fdfd2855b012a9f1a7.tar.bz2 |
Inline x86 String.indexOf
Take advantage of the presence of a constant search char or start index
to tune the generated code.
Change-Id: I0adcf184fb91b899a95aa4d8ef044a14deb51d88
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/quick/gen_invoke.cc | 8 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 2 | ||||
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 31 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 9 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 178 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 4 |
6 files changed, 224 insertions, 8 deletions
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 5fa4596..35d193c 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1257,17 +1257,13 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } else { LoadValueDirectFixed(rl_start, reg_start); } - int r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)) : 0; + int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)); GenNullCheck(rl_obj.s_reg_low, reg_ptr, info->opt_flags); LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, launch_pad); // NOTE: not a safepoint - if (cu_->instruction_set != kX86) { - OpReg(kOpBlx, r_tgt); - } else { - OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pIndexOf)); - } + OpReg(kOpBlx, r_tgt); LIR* resume_tgt = NewLIR0(kPseudoTargetLabel); launch_pad->operands[2] = WrapPointer(resume_tgt); // Record that we've already inlined & null checked diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index c60c394..6e97c53 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -676,7 +676,7 @@ class Mir2Lir : public Backend { bool GenInlinedAbsDouble(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); bool GenInlinedDoubleCvt(CallInfo* info); - bool GenInlinedIndexOf(CallInfo* info, bool zero_based); + virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based); bool GenInlinedStringCompareTo(CallInfo* info); bool GenInlinedCurrentThread(CallInfo* info); bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile); diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 6481589..538ce0d 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -175,6 +175,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" }, { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" }, + { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" }, + { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" }, @@ -354,6 +356,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" }, { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" }, { kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" }, + { kX86Jecxz8, kJmp, NO_OPERAND | IS_BRANCH | NEEDS_FIXUP | REG_USEC, { 0, 0, 0xE3, 0, 0, 0, 0, 0 }, "Jecxz", "!0t" }, { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" }, { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" }, { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" }, @@ -364,6 +367,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" }, { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr", "!0r,!1d" }, + { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" }, }; static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) { @@ -407,6 +411,8 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { return lir->operands[0]; // length of nop is sole operand case kNullary: return 1; // 1 byte of opcode + case kPrefix2Nullary: + return 3; // 1 byte of opcode + 2 prefixes case kRegOpcode: // lir operands - 0: reg return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm case kReg: // lir operands - 0: reg @@ -489,7 +495,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { return 6; // 2 byte opcode + rel32 } case kJmp: - if (lir->opcode == kX86Jmp8) { + if (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jecxz8) { return 2; // opcode + rel8 } else if (lir->opcode == kX86Jmp32) { return 5; // opcode + rel32 @@ -957,6 +963,10 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { code_buffer_.push_back((rel >> 8) & 0xFF); code_buffer_.push_back((rel >> 16) & 0xFF); code_buffer_.push_back((rel >> 24) & 0xFF); + } else if (entry->opcode == kX86Jecxz8) { + DCHECK(IS_SIMM8(rel)); + code_buffer_.push_back(0xE3); + code_buffer_.push_back(rel & 0xFF); } else { DCHECK(entry->opcode == kX86JmpR); code_buffer_.push_back(entry->skeleton.opcode); @@ -1148,6 +1158,17 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { lir->operands[0] = delta; break; } + case kX86Jecxz8: { + LIR *target_lir = lir->target; + DCHECK(target_lir != NULL); + CodeOffset pc; + pc = lir->offset + 2; // opcode + rel8 + CodeOffset target = target_lir->offset; + int delta = target - pc; + lir->operands[0] = delta; + DCHECK(IS_SIMM8(delta)); + break; + } case kX86Jmp8: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); @@ -1226,6 +1247,14 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); break; + case kPrefix2Nullary: // 1 byte of opcode + 2 prefixes. + DCHECK_NE(0, entry->skeleton.prefix1); + DCHECK_NE(0, entry->skeleton.prefix2); + EmitPrefixAndOpcode(entry); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); + break; case kRegOpcode: // lir operands - 0: reg EmitOpRegOpcode(entry, lir->operands[0]); break; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 6100a1d..421d51e 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -359,6 +359,15 @@ class X86Mir2Lir : public Mir2Lir { void GenConstWide(RegLocation rl_dest, int64_t value); /* + * @brief generate inline code for fast case of Strng.indexOf. + * @param info Call parameters + * @param zero_based 'true' if the index into the string is 0. + * @returns 'true' if the call was inlined, 'false' if a regular call needs to be + * generated. + */ + bool GenInlinedIndexOf(CallInfo* info, bool zero_based); + + /* * @brief Return the correct x86 opcode for the Dex operation * @param op Dex opcode for the operation * @param loc Register location of the operand diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index fa9a944..ad5b154 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -185,6 +185,14 @@ void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { if (flags & REG_USEB) { SetupRegMask(&lir->u.m.use_mask, rBX); } + + // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI. + if (lir->opcode == kX86RepneScasw) { + SetupRegMask(&lir->u.m.use_mask, rAX); + SetupRegMask(&lir->u.m.use_mask, rCX); + SetupRegMask(&lir->u.m.use_mask, rDI); + SetupRegMask(&lir->u.m.def_mask, rDI); + } } /* For dumping instructions */ @@ -936,4 +944,174 @@ void X86Mir2Lir::InstallLiteralPools() { Mir2Lir::InstallLiteralPools(); } +// Offsets within java.lang.String. +#define STRING_VALUE_OFFSET 8 +#define STRING_COUNT_OFFSET 12 +#define STRING_OFFSET_OFFSET 20 +#define STRING_DATA_OFFSET 12 + +/* + * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff, + * otherwise bails to standard library code. + */ +bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { + ClobberCallerSave(); + LockCallTemps(); // Using fixed registers + + // EAX: 16 bit character being searched. + // ECX: count: number of words to be searched. + // EDI: String being searched. + // EDX: temporary during execution. + // EBX: temporary during execution. + + RegLocation rl_obj = info->args[0]; + RegLocation rl_char = info->args[1]; + RegLocation rl_start = info->args[2]; + + uint32_t char_value = + rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0; + + if (char_value > 0xFFFF) { + // We have to punt to the real String.indexOf. + return false; + } + + // Okay, we are commited to inlining this. + RegLocation rl_return = GetReturn(false); + RegLocation rl_dest = InlineTarget(info); + + // Is the string non-NULL? + LoadValueDirectFixed(rl_obj, rDX); + GenNullCheck(rl_obj.s_reg_low, rDX, info->opt_flags); + + // Record that we have inlined & null checked the object. + info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); + + // Does the character fit in 16 bits? + LIR* launch_pad = nullptr; + if (rl_char.is_const) { + // We need the value in EAX. + LoadConstantNoClobber(rAX, char_value); + } else { + // Character is not a constant; compare at runtime. + LoadValueDirectFixed(rl_char, rAX); + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); + intrinsic_launchpads_.Insert(launch_pad); + OpCmpImmBranch(kCondGt, rAX, 0xFFFF, launch_pad); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + + // Character is in EAX. + // Object pointer is in EDX. + + // We need to preserve EDI, but have no spare registers, so push it on the stack. + // We have to remember that all stack addresses after this are offset by sizeof(EDI). + NewLIR1(kX86Push32R, rDI); + + // Compute the number of words to search in to rCX. + LoadWordDisp(rDX, STRING_COUNT_OFFSET, rCX); + LIR *length_compare = nullptr; + int start_value = 0; + if (zero_based) { + // We have to handle an empty string. Use special instruction JECXZ. + length_compare = NewLIR0(kX86Jecxz8); + } else { + // We have to offset by the start index. + if (rl_start.is_const) { + start_value = mir_graph_->ConstantValue(rl_start.orig_sreg); + start_value = std::max(start_value, 0); + + // Is the start > count? + length_compare = OpCmpImmBranch(kCondLe, rCX, start_value, nullptr); + + if (start_value != 0) { + OpRegImm(kOpSub, rCX, start_value); + } + } else { + // Runtime start index. + rl_start = UpdateLoc(rl_start); + if (rl_start.location == kLocPhysReg) { + length_compare = OpCmpBranch(kCondLe, rCX, rl_start.low_reg, nullptr); + OpRegReg(kOpSub, rCX, rl_start.low_reg); + } else { + // Compare to memory to avoid a register load. Handle pushed EDI. + int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); + OpRegMem(kOpCmp, rDX, rX86_SP, displacement); + length_compare = NewLIR2(kX86Jcc8, 0, kX86CondLe); + OpRegMem(kOpSub, rCX, rX86_SP, displacement); + } + } + } + DCHECK(length_compare != nullptr); + + // ECX now contains the count in words to be searched. + + // Load the address of the string into EBX. + // The string starts at VALUE(String) + 2 * OFFSET(String) + STRING_DATA_OFFSET. + LoadWordDisp(rDX, STRING_VALUE_OFFSET, rDI); + LoadWordDisp(rDX, STRING_OFFSET_OFFSET, rBX); + OpLea(rBX, rDI, rBX, 1, STRING_DATA_OFFSET); + + // Now compute into EDI where the search will start. + if (zero_based || rl_start.is_const) { + if (start_value == 0) { + OpRegCopy(rDI, rBX); + } else { + NewLIR3(kX86Lea32RM, rDI, rBX, 2 * start_value); + } + } else { + if (rl_start.location == kLocPhysReg) { + if (rl_start.low_reg == rDI) { + // We have a slight problem here. We are already using RDI! + // Grab the value from the stack. + LoadWordDisp(rX86_SP, 0, rDX); + OpLea(rDI, rBX, rDX, 1, 0); + } else { + OpLea(rDI, rBX, rl_start.low_reg, 1, 0); + } + } else { + OpRegCopy(rDI, rBX); + // Load the start index from stack, remembering that we pushed EDI. + int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); + LoadWordDisp(rX86_SP, displacement, rDX); + OpLea(rDI, rBX, rDX, 1, 0); + } + } + + // EDI now contains the start of the string to be searched. + // We are all prepared to do the search for the character. + NewLIR0(kX86RepneScasw); + + // Did we find a match? + LIR* failed_branch = OpCondBranch(kCondNe, nullptr); + + // yes, we matched. Compute the index of the result. + // index = ((curr_ptr - orig_ptr) / 2) - 1. + OpRegReg(kOpSub, rDI, rBX); + OpRegImm(kOpAsr, rDI, 1); + NewLIR3(kX86Lea32RM, rl_return.low_reg, rDI, -1); + LIR *all_done = NewLIR1(kX86Jmp8, 0); + + // Failed to match; return -1. + LIR *not_found = NewLIR0(kPseudoTargetLabel); + length_compare->target = not_found; + failed_branch->target = not_found; + LoadConstantNoClobber(rl_return.low_reg, -1); + + // And join up at the end. + all_done->target = NewLIR0(kPseudoTargetLabel); + // Restore EDI from the stack. + NewLIR1(kX86Pop32R, rDI); + + // Out of line code returns here. + if (launch_pad != nullptr) { + LIR *return_point = NewLIR0(kPseudoTargetLabel); + launch_pad->operands[2] = WrapPointer(return_point); + } + + StoreValue(rl_dest, rl_return); + return true; +} + } // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 480d5f5..4064bd6 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -277,6 +277,7 @@ enum X86OpCode { kX86Mov32MR, kX86Mov32AR, kX86Mov32TR, kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT, kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, + kX86Lea32RM, kX86Lea32RA, // RRC - Register Register ConditionCode - cond_opcode reg1, reg2 // - lir operands - 0: reg1, 1: reg2, 2: CC @@ -384,6 +385,7 @@ enum X86OpCode { kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned kX86JmpR, // jmp reg; lir operands - 0: reg + kX86Jecxz8, // jcexz rel8; jump relative if ECX is zero. kX86CallR, // call reg; lir operands - 0: reg kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp kX86CallA, // call [base + index * scale + disp] @@ -396,6 +398,7 @@ enum X86OpCode { kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table + kX86RepneScasw, // repne scasw kX86Last }; @@ -404,6 +407,7 @@ enum X86EncodingKind { kData, // Special case for raw data. kNop, // Special case for variable length nop. kNullary, // Opcode that takes no arguments. + kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes. kRegOpcode, // Shorter form of R instruction kind (opcode+rd) kReg, kMem, kArray, // R, M and A instruction kinds. kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. |