diff options
78 files changed, 4993 insertions, 2343 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index b07e4f8..407269b 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -25,6 +25,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/base/hex_dump_test.cc \ runtime/base/histogram_test.cc \ runtime/base/mutex_test.cc \ + runtime/base/scoped_flock_test.cc \ runtime/base/timing_logger_test.cc \ runtime/base/unix_file/fd_file_test.cc \ runtime/base/unix_file/mapped_file_test.cc \ @@ -36,6 +37,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/dex_instruction_visitor_test.cc \ runtime/dex_method_iterator_test.cc \ runtime/entrypoints/math_entrypoints_test.cc \ + runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \ runtime/entrypoints_order_test.cc \ runtime/exception_test.cc \ runtime/gc/accounting/space_bitmap_test.cc \ @@ -80,6 +82,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/optimizing/codegen_test.cc \ compiler/optimizing/dominator_test.cc \ compiler/optimizing/find_loops_test.cc \ + compiler/optimizing/graph_test.cc \ compiler/optimizing/linearize_test.cc \ compiler/optimizing/liveness_test.cc \ compiler/optimizing/live_interval_test.cc \ diff --git a/compiler/Android.mk b/compiler/Android.mk index cfce9f7..3cf7368 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -91,6 +91,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/register_allocator.cc \ optimizing/ssa_builder.cc \ optimizing/ssa_liveness_analysis.cc \ + optimizing/ssa_type_propagation.cc \ trampolines/trampoline_compiler.cc \ utils/arena_allocator.cc \ utils/arena_bit_vector.cc \ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index eb48cc3..f0b4787 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -48,10 +48,16 @@ enum SpecialTargetRegister { kArg1, kArg2, kArg3, + kArg4, + kArg5, kFArg0, kFArg1, kFArg2, kFArg3, + kFArg4, + kFArg5, + kFArg6, + kFArg7, kRet0, kRet1, kInvokeTgt, diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 5b9c763..d544397 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -157,6 +157,8 @@ int arm64_support_list[] = { Instruction::GOTO, Instruction::GOTO_16, Instruction::GOTO_32, + Instruction::PACKED_SWITCH, + Instruction::SPARSE_SWITCH, Instruction::IF_EQ, Instruction::IF_NE, Instruction::IF_LT, @@ -248,8 +250,6 @@ int arm64_support_list[] = { Instruction::MOVE_OBJECT, Instruction::MOVE_OBJECT_FROM16, Instruction::MOVE_OBJECT_16, - // Instruction::PACKED_SWITCH, - // Instruction::SPARSE_SWITCH, // Instruction::MOVE_RESULT, // Instruction::MOVE_RESULT_WIDE, // Instruction::MOVE_RESULT_OBJECT, @@ -707,7 +707,7 @@ int x86_64_support_list[] = { // which has problems with long, float, double constexpr char arm64_supported_types[] = "ZBSCILVJFD"; // (x84_64) We still have troubles with compiling longs/doubles/floats -constexpr char x86_64_supported_types[] = "ZBSCILV"; +constexpr char x86_64_supported_types[] = "ZBSCILVJFD"; // TODO: Remove this when we are able to compile everything. static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) { @@ -718,7 +718,7 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) // 1 is for the return type. Currently, we only accept 2 parameters at the most. // (x86_64): For now we have the same limitation. But we might want to split this // check in future into two separate cases for arm64 and x86_64. - if (shorty_size > (1 + 2)) { + if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) { return false; } @@ -889,7 +889,9 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, // TODO(Arm64): enable optimizations once backend is mature enough. // TODO(X86_64): enable optimizations once backend is mature enough. cu.disable_opt = ~(uint32_t)0; - cu.enable_debug |= (1 << kDebugCodegenDump); + if (cu.instruction_set == kArm64) { + cu.enable_debug |= (1 << kDebugCodegenDump); + } } cu.StartTimingSplit("BuildMIRGraph"); diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index bd9c8b4..3b30cde 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -113,6 +113,7 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_r12; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index b80938a..b85f569 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -68,7 +68,7 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, // Get the switch value rl_src = LoadValue(rl_src, kCoreReg); - RegStorage r_base = AllocTemp(); + RegStorage r_base = AllocTempWide(); // Allocate key and disp temps. RegStorage r_key = AllocTemp(); RegStorage r_disp = AllocTemp(); @@ -95,7 +95,8 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, tab_rec->anchor = switch_label; // Add displacement to base branch address and go! - OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT); + // TODO(Arm64): generate "add x1, x1, w3, sxtw" rather than "add x1, x1, x3"? + OpRegRegRegShift(kOpAdd, r_base, r_base, As64BitReg(r_disp), ENCODE_NO_SHIFT); NewLIR1(kA64Br1x, r_base.GetReg()); // Loop exit label. @@ -105,7 +106,7 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { + RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); @@ -122,7 +123,7 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Get the switch value rl_src = LoadValue(rl_src, kCoreReg); - RegStorage table_base = AllocTemp(); + RegStorage table_base = AllocTempWide(); // Materialize a pointer to the switch table NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec)); int low_key = s4FromSwitchData(&table[2]); @@ -140,15 +141,17 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Load the displacement from the switch table RegStorage disp_reg = AllocTemp(); - LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32); + // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"? + LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32); // Get base branch address. - RegStorage branch_reg = AllocTemp(); + RegStorage branch_reg = AllocTempWide(); LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1); tab_rec->anchor = switch_label; // Add displacement to base branch address and go! - OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT); + // TODO(Arm64): generate "add x4, x4, w3, sxtw" rather than "add x4, x4, x3"? + OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), ENCODE_NO_SHIFT); NewLIR1(kA64Br1x, branch_reg.GetReg()); // branch_over target here diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 6251f4f..21db771 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -223,6 +223,40 @@ class Arm64Mir2Lir : public Mir2Lir { bool skip_this); private: + /** + * @brief Given register xNN (dNN), returns register wNN (sNN). + * @param reg #RegStorage containing a Solo64 input register (e.g. @c x1 or @c d2). + * @return A Solo32 with the same register number as the @p reg (e.g. @c w1 or @c s2). + * @see As64BitReg + */ + RegStorage As32BitReg(RegStorage reg) { + DCHECK(reg.Is64Bit()); + DCHECK(!reg.IsPair()); + RegStorage ret_val = RegStorage(RegStorage::k32BitSolo, + reg.GetRawBits() & RegStorage::kRegTypeMask); + DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask) + ->GetReg().GetReg(), + ret_val.GetReg()); + return ret_val; + } + + /** + * @brief Given register wNN (sNN), returns register xNN (dNN). + * @param reg #RegStorage containing a Solo32 input register (e.g. @c w1 or @c s2). + * @return A Solo64 with the same register number as the @p reg (e.g. @c x1 or @c d2). + * @see As32BitReg + */ + RegStorage As64BitReg(RegStorage reg) { + DCHECK(reg.Is32Bit()); + DCHECK(!reg.IsPair()); + RegStorage ret_val = RegStorage(RegStorage::k64BitSolo, + reg.GetRawBits() & RegStorage::kRegTypeMask); + DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask) + ->GetReg().GetReg(), + ret_val.GetReg()); + return ret_val; + } + LIR* LoadFPConstantValue(int r_dest, int32_t value); LIR* LoadFPConstantValueWide(int r_dest, int64_t value); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index acc7d17..265e8d2 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -45,6 +45,7 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest case Instruction::REM_FLOAT_2ADDR: case Instruction::REM_FLOAT: FlushAllRegs(); // Send everything to home location + // TODO: Fix xSELF. CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2, false); rl_result = GetReturn(kFPReg); @@ -88,8 +89,15 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, case Instruction::REM_DOUBLE_2ADDR: case Instruction::REM_DOUBLE: FlushAllRegs(); // Send everything to home location - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2, - false); + // TODO: Fix xSELF. + { + ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod); + RegStorage r_tgt = CallHelperSetup(helper_offset); + LoadValueDirectWideFixed(rl_src1, rs_d0); + LoadValueDirectWideFixed(rl_src2, rs_d1); + ClobberCallerSave(); + CallHelper(r_tgt, helper_offset, false); + } rl_result = GetReturnWide(kFPReg); StoreValueWide(rl_dest, rl_result); return; diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index b287399..ce95286 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -127,6 +127,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_x12; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index d0ab4f6..4f0d7bc 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -532,11 +532,11 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, switch (op) { case kOpLsl: { // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" - // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)". + // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)". // For now, we just use ubfm directly. - int max_value = (is_wide) ? 64 : 32; + int max_value = (is_wide) ? 63 : 31; return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), - (-value) & (max_value - 1), max_value - value); + (-value) & max_value, max_value - value); } case kOpLsr: return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 62c81d0..69ca715 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1959,7 +1959,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc switch (opcode) { case Instruction::NOT_LONG: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenNotLong(rl_dest, rl_src2); return; } @@ -2009,7 +2009,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); return; } @@ -2020,7 +2020,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); return; } diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 842533b..b7ea362 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -277,6 +277,8 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> if (arg1.wide == 0) { if (cu_->instruction_set == kMips) { LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1)); + } else if (cu_->instruction_set == kArm64) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); } else { LoadValueDirectFixed(arg1, TargetReg(kArg1)); } @@ -290,26 +292,51 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> } LoadValueDirectWideFixed(arg1, r_tmp); } else { - RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + RegStorage r_tmp; + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + } LoadValueDirectWideFixed(arg1, r_tmp); } } } else { RegStorage r_tmp; if (arg0.fp) { - r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1)); + } } else { - r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + } } LoadValueDirectWideFixed(arg0, r_tmp); if (arg1.wide == 0) { - LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2)); + if (cu_->instruction_set == kX86_64) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); + } else { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2)); + } } else { RegStorage r_tmp; if (arg1.fp) { - r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3)); + } } else { - r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + } } LoadValueDirectWideFixed(arg1, r_tmp); } @@ -662,7 +689,7 @@ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, case 0: // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)] CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index); - if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) { + if (cu->instruction_set == kX86) { cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg)); } break; diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 2c8b9b9..6ef7934 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -391,24 +391,34 @@ RegLocation Mir2Lir::ForceTemp(RegLocation loc) { return loc; } -// FIXME: will need an update for 64-bit core regs. RegLocation Mir2Lir::ForceTempWide(RegLocation loc) { DCHECK(loc.wide); DCHECK(loc.location == kLocPhysReg); DCHECK(!loc.reg.IsFloat()); - if (IsTemp(loc.reg.GetLow())) { - Clobber(loc.reg.GetLow()); - } else { - RegStorage temp_low = AllocTemp(); - OpRegCopy(temp_low, loc.reg.GetLow()); - loc.reg.SetLowReg(temp_low.GetReg()); - } - if (IsTemp(loc.reg.GetHigh())) { - Clobber(loc.reg.GetHigh()); + + if (!loc.reg.IsPair()) { + if (IsTemp(loc.reg)) { + Clobber(loc.reg); + } else { + RegStorage temp = AllocTempWide(); + OpRegCopy(temp, loc.reg); + loc.reg = temp; + } } else { - RegStorage temp_high = AllocTemp(); - OpRegCopy(temp_high, loc.reg.GetHigh()); - loc.reg.SetHighReg(temp_high.GetReg()); + if (IsTemp(loc.reg.GetLow())) { + Clobber(loc.reg.GetLow()); + } else { + RegStorage temp_low = AllocTemp(); + OpRegCopy(temp_low, loc.reg.GetLow()); + loc.reg.SetLowReg(temp_low.GetReg()); + } + if (IsTemp(loc.reg.GetHigh())) { + Clobber(loc.reg.GetHigh()); + } else { + RegStorage temp_high = AllocTemp(); + OpRegCopy(temp_high, loc.reg.GetHigh()); + loc.reg.SetHighReg(temp_high.GetReg()); + } } // Ensure that this doesn't represent the original SR any more. diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index c1a7c99..381c7ce 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -98,6 +98,7 @@ RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_rT0; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = rs_rMIPS_COUNT; break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 1f12b6f..a85be5e 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -68,20 +68,51 @@ void Mir2Lir::LockArg(int in_position, bool wide) { // TODO: needs revisit for 64-bit. RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { - RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : - RegStorage::InvalidReg(); - int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { + + if (cu_->instruction_set == kX86) { /* * When doing a call for x86, it moves the stack pointer in order to push return. * Thus, we add another 4 bytes to figure out the out of caller (in of callee). - * TODO: This needs revisited for 64-bit. */ offset += sizeof(uint32_t); } + if (cu_->instruction_set == kX86_64) { + /* + * When doing a call for x86, it moves the stack pointer in order to push return. + * Thus, we add another 8 bytes to figure out the out of caller (in of callee). + */ + offset += sizeof(uint64_t); + } + + if (cu_->instruction_set == kX86_64) { + RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); + if (!reg_arg.Valid()) { + RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class); + LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32); + return new_reg; + } else { + // Check if we need to copy the arg to a different reg_class. + if (!RegClassMatches(reg_class, reg_arg)) { + if (wide) { + RegStorage new_reg = AllocTypedTempWide(false, reg_class); + OpRegCopyWide(new_reg, reg_arg); + reg_arg = new_reg; + } else { + RegStorage new_reg = AllocTypedTemp(false, reg_class); + OpRegCopy(new_reg, reg_arg); + reg_arg = new_reg; + } + } + } + return reg_arg; + } + + RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); + RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : + RegStorage::InvalidReg(); + // If the VR is wide and there is no register for high part, we need to load it. if (wide && !reg_arg_high.Valid()) { // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg. @@ -129,15 +160,22 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { + if (cu_->instruction_set == kX86) { /* * When doing a call for x86, it moves the stack pointer in order to push return. * Thus, we add another 4 bytes to figure out the out of caller (in of callee). - * TODO: This needs revisited for 64-bit. */ offset += sizeof(uint32_t); } + if (cu_->instruction_set == kX86_64) { + /* + * When doing a call for x86, it moves the stack pointer in order to push return. + * Thus, we add another 8 bytes to figure out the out of caller (in of callee). + */ + offset += sizeof(uint64_t); + } + if (!rl_dest.wide) { RegStorage reg = GetArgMappingToPhysicalReg(in_position); if (reg.Valid()) { @@ -146,6 +184,16 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { Load32Disp(TargetReg(kSp), offset, rl_dest.reg); } } else { + if (cu_->instruction_set == kX86_64) { + RegStorage reg = GetArgMappingToPhysicalReg(in_position); + if (reg.Valid()) { + OpRegCopy(rl_dest.reg, reg); + } else { + LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64); + } + return; + } + RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index ed94a8d..9718acd 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -910,13 +910,13 @@ class Mir2Lir : public Backend { void GenInvoke(CallInfo* info); void GenInvokeNoInline(CallInfo* info); virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, InvokeType type, bool skip_this); - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 39a0365..0a8193a 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -23,9 +23,9 @@ namespace art { #define MAX_ASSEMBLER_RETRIES 50 const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = { - { kX8632BitData, kData, IS_UNARY_OP, { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data", "0x!0d" }, - { kX86Bkpt, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" }, - { kX86Nop, kNop, NO_OPERAND, { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop", "" }, + { kX8632BitData, kData, IS_UNARY_OP, { 0, 0, 0x00, 0, 0, 0, 0, 4, false }, "data", "0x!0d" }, + { kX86Bkpt, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0, false }, "int 3", "" }, + { kX86Nop, kNop, NO_OPERAND, { 0, 0, 0x90, 0, 0, 0, 0, 0, false }, "nop", "" }, #define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \ rm8_r8, rm32_r32, \ @@ -34,65 +34,65 @@ const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = { rm8_i8, rm8_i8_modrm, \ rm32_i32, rm32_i32_modrm, \ rm32_i8, rm32_i8_modrm) \ -{ kX86 ## opname ## 8MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 8AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ -{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8TR", "fs:[!0d],!1r" }, \ -{ kX86 ## opname ## 8RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RR", "!0r,!1r" }, \ -{ kX86 ## opname ## 8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## 8RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ -{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RT", "!0r,fs:[!1d]" }, \ -{ kX86 ## opname ## 8RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1 }, #opname "8RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 8MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 8AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 8MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0, true }, #opname "8MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 8AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0, true}, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0, 0, 0, true }, #opname "8TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 8RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 8RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0, 0, 0, true }, #opname "8RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 8RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1, true }, #opname "8RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 8MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 8AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1, true }, #opname "8TI", "fs:[!0d],!1d" }, \ \ -{ kX86 ## opname ## 16MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 16AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ -{ kX86 ## opname ## 16TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16TR", "fs:[!0d],!1r" }, \ -{ kX86 ## opname ## 16RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RR", "!0r,!1r" }, \ -{ kX86 ## opname ## 16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## 16RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ -{ kX86 ## opname ## 16RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RT", "!0r,fs:[!1d]" }, \ -{ kX86 ## opname ## 16RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2 }, #opname "16RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 16MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 16AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 16TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16TI", "fs:[!0d],!1d" }, \ -{ kX86 ## opname ## 16RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16RI8", "!0r,!1d" }, \ -{ kX86 ## opname ## 16MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16MI8", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 16AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16TI8", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 16MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "16MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 16AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 16TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "16TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 16RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "16RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "16RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 16RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 16RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "16RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 16RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2, false }, #opname "16RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2, false }, #opname "16MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2, false }, #opname "16TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 16RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "16RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "16MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "16TI8", "fs:[!0d],!1d" }, \ \ -{ kX86 ## opname ## 32MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 32AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ -{ kX86 ## opname ## 32TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32TR", "fs:[!0d],!1r" }, \ -{ kX86 ## opname ## 32RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RR", "!0r,!1r" }, \ -{ kX86 ## opname ## 32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## 32RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ -{ kX86 ## opname ## 32RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RT", "!0r,fs:[!1d]" }, \ -{ kX86 ## opname ## 32RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 32MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 32AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 32TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32TI", "fs:[!0d],!1d" }, \ -{ kX86 ## opname ## 32RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32RI8", "!0r,!1d" }, \ -{ kX86 ## opname ## 32MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 32AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32TI8", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 32MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "32MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 32AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 32TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "32TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 32RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "32RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "32RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 32RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 32RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "32RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 32RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "32RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "32MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "32TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 32RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "32RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "32MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "32TI8", "fs:[!0d],!1d" }, \ \ -{ kX86 ## opname ## 64MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 64AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ -{ kX86 ## opname ## 64TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64TR", "fs:[!0d],!1r" }, \ -{ kX86 ## opname ## 64RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RR", "!0r,!1r" }, \ -{ kX86 ## opname ## 64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## 64RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ -{ kX86 ## opname ## 64RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RT", "!0r,fs:[!1d]" }, \ -{ kX86 ## opname ## 64RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "64RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 64MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 64AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 64TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64TI", "fs:[!0d],!1d" }, \ -{ kX86 ## opname ## 64RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64RI8", "!0r,!1d" }, \ -{ kX86 ## opname ## 64MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64MI8", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 64AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64TI8", "fs:[!0d],!1d" } +{ kX86 ## opname ## 64MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "64MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 64AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 64TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0, 0, 0, false }, #opname "64TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 64RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "64RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "64RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 64RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 64RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0, 0, 0, false }, #opname "64RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 64RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4, false }, #opname "64RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "64MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4, false }, #opname "64TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 64RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "64RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "64MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1, false }, #opname "64TI8", "fs:[!0d],!1d" } ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0, 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, @@ -144,114 +144,112 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, 0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */), #undef ENCODING_MAP - { kX86Imul16RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RRI", "!0r,!1r,!2d" }, - { kX86Imul16RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RMI", "!0r,[!1r+!2d],!3d" }, - { kX86Imul16RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, - - { kX86Imul32RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RRI", "!0r,!1r,!2d" }, - { kX86Imul32RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RMI", "!0r,[!1r+!2d],!3d" }, - { kX86Imul32RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, - { kX86Imul32RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RRI8", "!0r,!1r,!2d" }, - { kX86Imul32RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" }, - { kX86Imul32RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, - - { kX86Imul64RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RRI", "!0r,!1r,!2d" }, - { kX86Imul64RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RMI", "!0r,[!1r+!2d],!3d" }, - { kX86Imul64RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, - { kX86Imul64RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RRI8", "!0r,!1r,!2d" }, - { kX86Imul64RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" }, - { kX86Imul64RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, - - { kX86Mov8MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" }, - { kX86Mov8AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" }, - { kX86Mov8RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RR", "!0r,!1r" }, - { kX86Mov8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RM", "!0r,[!1r+!2d]" }, - { kX86Mov8RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Mov8RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RT", "!0r,fs:[!1d]" }, - { kX86Mov8RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB0, 0, 0, 0, 0, 1 }, "Mov8RI", "!0r,!1d" }, - { kX86Mov8MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8MI", "[!0r+!1d],!2d" }, - { kX86Mov8AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8TI", "fs:[!0d],!1d" }, - - { kX86Mov16MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov16MR", "[!0r+!1d],!2r" }, - { kX86Mov16AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0 }, "Mov16TR", "fs:[!0d],!1r" }, - { kX86Mov16RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RR", "!0r,!1r" }, - { kX86Mov16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RM", "!0r,[!1r+!2d]" }, - { kX86Mov16RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Mov16RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RT", "!0r,fs:[!1d]" }, - { kX86Mov16RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0x66, 0, 0xB8, 0, 0, 0, 0, 2 }, "Mov16RI", "!0r,!1d" }, - { kX86Mov16MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2 }, "Mov16MI", "[!0r+!1d],!2d" }, - { kX86Mov16AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2 }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" }, - - { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" }, - { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" }, - { kX86Mov32RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" }, - { kX86Mov32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" }, - { kX86Mov32RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Mov32RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" }, - { kX86Mov32RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" }, - { kX86Mov32MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" }, - { kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" }, - - { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" }, - - { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - - { kX86Mov64MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" }, - { kX86Mov64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0 }, "Mov64TR", "fs:[!0d],!1r" }, - { kX86Mov64RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RR", "!0r,!1r" }, - { kX86Mov64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" }, - { kX86Mov64RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Mov64RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" }, - { kX86Mov64RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { REX_W, 0, 0xB8, 0, 0, 0, 0, 8 }, "Mov64RI", "!0r,!1d" }, - { kX86Mov64MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { REX_W, 0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64MI", "[!0r+!1d],!2d" }, - { kX86Mov64AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { REX_W, 0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 8 }, "Mov64TI", "fs:[!0d],!1d" }, - - { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RM", "!0r,[!1r+!2d]" }, - - { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - - { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" }, - { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RR", "!2c !0r,!1r" }, - - { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" }, - { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" }, + { kX86Imul16RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RRI", "!0r,!1r,!2d" }, + { kX86Imul16RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul16RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2, false }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + + { kX86Imul32RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RRI", "!0r,!1r,!2d" }, + { kX86Imul32RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul32RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Imul32RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RRI8", "!0r,!1r,!2d" }, + { kX86Imul32RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" }, + { kX86Imul32RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + + { kX86Imul64RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RRI", "!0r,!1r,!2d" }, + { kX86Imul64RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul64RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 4, false }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Imul64RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RRI8", "!0r,!1r,!2d" }, + { kX86Imul64RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" }, + { kX86Imul64RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1, false }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + + { kX86Mov8MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8MR", "[!0r+!1d],!2r" }, + { kX86Mov8AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0, true }, "Mov8TR", "fs:[!0d],!1r" }, + { kX86Mov8RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RR", "!0r,!1r" }, + { kX86Mov8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RM", "!0r,[!1r+!2d]" }, + { kX86Mov8RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov8RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0, true }, "Mov8RT", "!0r,fs:[!1d]" }, + { kX86Mov8RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB0, 0, 0, 0, 0, 1, true }, "Mov8RI", "!0r,!1d" }, + { kX86Mov8MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8MI", "[!0r+!1d],!2d" }, + { kX86Mov8AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1, true }, "Mov8TI", "fs:[!0d],!1d" }, + + { kX86Mov16MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov16MR", "[!0r+!1d],!2r" }, + { kX86Mov16AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0, false }, "Mov16TR", "fs:[!0d],!1r" }, + { kX86Mov16RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RR", "!0r,!1r" }, + { kX86Mov16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RM", "!0r,[!1r+!2d]" }, + { kX86Mov16RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov16RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0, false }, "Mov16RT", "!0r,fs:[!1d]" }, + { kX86Mov16RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0x66, 0, 0xB8, 0, 0, 0, 0, 2, false }, "Mov16RI", "!0r,!1d" }, + { kX86Mov16MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16MI", "[!0r+!1d],!2d" }, + { kX86Mov16AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2, false }, "Mov16TI", "fs:[!0d],!1d" }, + + { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" }, + { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" }, + { kX86Mov32RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" }, + { kX86Mov32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" }, + { kX86Mov32RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov32RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RT", "!0r,fs:[!1d]" }, + { kX86Mov32RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "Mov32RI", "!0r,!1d" }, + { kX86Mov32MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32MI", "[!0r+!1d],!2d" }, + { kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov32TI", "fs:[!0d],!1d" }, + + { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { 0, 0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RM", "!0r,[!1r+!2d]" }, + { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + + { kX86Mov64MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { REX_W, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" }, + { kX86Mov64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { REX_W, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" }, + { kX86Mov64RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" }, + { kX86Mov64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" }, + { kX86Mov64RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov64RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" }, + { kX86Mov64RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { REX_W, 0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI", "!0r,!1d" }, + { kX86Mov64MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { REX_W, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64MI", "[!0r+!1d],!2d" }, + { kX86Mov64AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { REX_W, 0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" }, + + { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RM", "!0r,[!1r+!2d]" }, + { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0, false }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + + { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RR", "!2c !0r,!1r" }, + { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RR", "!2c !0r,!1r" }, + + { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" }, + { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, { REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0, false }, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" }, #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \ -{ kX86 ## opname ## 8RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 8MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 8RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8RC", "!0r,cl" }, \ -{ kX86 ## opname ## 8MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8MC", "[!0r+!1d],cl" }, \ -{ kX86 ## opname ## 8AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \ +{ kX86 ## opname ## 8RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 8MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1, true }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 8RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1, true }, #opname "8RC", "!0r,cl" }, \ +{ kX86 ## opname ## 8MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1, true }, #opname "8MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 8AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1, true }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \ \ -{ kX86 ## opname ## 16RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 16MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 16RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16RC", "!0r,cl" }, \ -{ kX86 ## opname ## 16MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16MC", "[!0r+!1d],cl" }, \ -{ kX86 ## opname ## 16AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \ +{ kX86 ## opname ## 16RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1, false }, #opname "16RC", "!0r,cl" }, \ +{ kX86 ## opname ## 16MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1, false }, #opname "16MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 16AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1, false }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \ \ -{ kX86 ## opname ## 32RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 32MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32RC", "!0r,cl" }, \ -{ kX86 ## opname ## 32MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32MC", "[!0r+!1d],cl" }, \ -{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \ +{ kX86 ## opname ## 32RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "32RC", "!0r,cl" }, \ +{ kX86 ## opname ## 32MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "32MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \ \ -{ kX86 ## opname ## 64RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 64MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64MI", "[!0r+!1d],!2d" }, \ -{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 64RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64RC", "!0r,cl" }, \ -{ kX86 ## opname ## 64MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64MC", "[!0r+!1d],cl" }, \ -{ kX86 ## opname ## 64AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" } +{ kX86 ## opname ## 64RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1, false }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "64RC", "!0r,cl" }, \ +{ kX86 ## opname ## 64MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "64MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 64AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0, false }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" } SHIFT_ENCODING_MAP(Rol, 0x0), SHIFT_ENCODING_MAP(Ror, 0x1), @@ -262,31 +260,31 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, SHIFT_ENCODING_MAP(Sar, 0x7), #undef SHIFT_ENCODING_MAP - { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" }, - { kX86Shld32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32RRI", "!0r,!1r,!2d" }, - { kX86Shld32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" }, - { kX86Shrd32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" }, - { kX86Shrd32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" }, - { kX86Shld64RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64RRI", "!0r,!1r,!2d" }, - { kX86Shld64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64MRI", "[!0r+!1d],!2r,!3d" }, - { kX86Shrd64RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64RRI", "!0r,!1r,!2d" }, - { kX86Shrd64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64MRI", "[!0r+!1d],!2r,!3d" }, - - { kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" }, - { kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" }, - { kX86Test8AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Test16RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16RI", "!0r,!1d" }, - { kX86Test16MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16MI", "[!0r+!1d],!2d" }, - { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Test32RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" }, - { kX86Test32MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" }, - { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" }, - { kX86Test64RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64RI", "!0r,!1d" }, - { kX86Test64MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64MI", "[!0r+!1d],!2d" }, - { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" }, - - { kX86Test32RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" }, - { kX86Test64RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0}, "Test64RR", "!0r,!1r" }, + { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0, false }, "Cmc", "" }, + { kX86Shld32RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32RRI", "!0r,!1r,!2d" }, + { kX86Shld32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32MRI", "[!0r+!1d],!2r,!3d" }, + { kX86Shrd32RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32RRI", "!0r,!1r,!2d" }, + { kX86Shrd32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32MRI", "[!0r+!1d],!2r,!3d" }, + { kX86Shld64RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64RRI", "!0r,!1r,!2d" }, + { kX86Shld64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64MRI", "[!0r+!1d],!2r,!3d" }, + { kX86Shrd64RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64RRI", "!0r,!1r,!2d" }, + { kX86Shrd64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd64MRI", "[!0r+!1d],!2r,!3d" }, + + { kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8RI", "!0r,!1d" }, + { kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8MI", "[!0r+!1d],!2d" }, + { kX86Test8AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1, true }, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test16RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16RI", "!0r,!1d" }, + { kX86Test16MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16MI", "[!0r+!1d],!2d" }, + { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2, false }, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test32RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32RI", "!0r,!1d" }, + { kX86Test32MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32MI", "[!0r+!1d],!2d" }, + { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test64RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64RI", "!0r,!1d" }, + { kX86Test64MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64MI", "[!0r+!1d],!2d" }, + { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 4, false }, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" }, + + { kX86Test32RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test32RR", "!0r,!1r" }, + { kX86Test64RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0, false }, "Test64RR", "!0r,!1r" }, #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \ reg, reg_kind, reg_flags, \ @@ -294,18 +292,18 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, arr, arr_kind, arr_flags, imm, \ b_flags, hw_flags, w_flags, \ b_format, hw_format, w_format) \ -{ kX86 ## opname ## 8 ## reg, reg_kind, reg_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, b_format "!0r" }, \ -{ kX86 ## opname ## 8 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, b_format "[!0r+!1d]" }, \ -{ kX86 ## opname ## 8 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \ -{ kX86 ## opname ## 16 ## reg, reg_kind, reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, hw_format "!0r" }, \ -{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, hw_format "[!0r+!1d]" }, \ -{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \ -{ kX86 ## opname ## 32 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \ -{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \ -{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \ -{ kX86 ## opname ## 64 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #reg, w_format "!0r" }, \ -{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #mem, w_format "[!0r+!1d]" }, \ -{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" } +{ kX86 ## opname ## 8 ## reg, reg_kind, reg_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #reg, b_format "!0r" }, \ +{ kX86 ## opname ## 8 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #mem, b_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 8 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0, true }, #opname "8" #arr, b_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 16 ## reg, reg_kind, reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #reg, hw_format "!0r" }, \ +{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #mem, hw_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1, false }, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 32 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #reg, w_format "!0r" }, \ +{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #mem, w_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 64 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #reg, w_format "!0r" }, \ +{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #mem, w_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2, false }, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" } UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), @@ -316,33 +314,43 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"), #undef UNARY_ENCODING_MAP - { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA, { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" }, - { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" }, - { kX86Push32R, kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0, 0, 0, 0, 0 }, "Push32R", "!0r" }, - { kX86Pop32R, kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD, { 0, 0, 0x58, 0, 0, 0, 0, 0 }, "Pop32R", "!0r" }, + { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA, { 0, 0, 0x99, 0, 0, 0, 0, 0, false }, "Cdq", "" }, + { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA, { REX_W, 0, 0x99, 0, 0, 0, 0, 0, false }, "Cqo", "" }, + { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap32R", "!0r" }, + { kX86Push32R, kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0, 0, 0, 0, 0, false }, "Push32R", "!0r" }, + { kX86Pop32R, kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD, { 0, 0, 0x58, 0, 0, 0, 0, 0, false }, "Pop32R", "!0r" }, #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \ -{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ -{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + +#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \ -{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ -{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0), - { kX86MovsdMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" }, - { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovsdMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdMR", "[!0r+!1d],!2r" }, + { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10, REG_DEF0), - { kX86MovssMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssMR", "[!0r+!1d],!2r" }, - { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovssMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssMR", "[!0r+!1d],!2r" }, + { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Cvtsi2sd, 0xF2, 0x2A, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtsi2ss, 0xF3, 0x2A, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd, 0xF2, 0x2A, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss, 0xF3, 0x2A, REG_DEF0), EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0), EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtsd2si, 0xF2, 0x2D, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtss2si, 0xF3, 0x2D, REG_DEF0), EXT_0F_ENCODING_MAP(Ucomisd, 0x66, 0x2E, SETS_CCODES|REG_USE0), @@ -383,74 +391,84 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING2_MAP(Phaddw, 0x66, 0x38, 0x01, REG_DEF0_USE0), EXT_0F_ENCODING2_MAP(Phaddd, 0x66, 0x38, 0x02, REG_DEF0_USE0), - { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1 }, "PextbRRI", "!0r,!1r,!2d" }, - { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1 }, "PextwRRI", "!0r,!1r,!2d" }, - { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1 }, "PextdRRI", "!0r,!1r,!2d" }, + { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" }, + { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" }, + { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" }, - { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuflwRRI", "!0r,!1r,!2d" }, - { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1 }, "PshuffRRI", "!0r,!1r,!2d" }, + { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" }, + { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" }, - { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1 }, "PsrawRI", "!0r,!1d" }, - { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1 }, "PsradRI", "!0r,!1d" }, - { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1 }, "PsrlwRI", "!0r,!1d" }, - { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1 }, "PsrldRI", "!0r,!1d" }, - { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" }, - { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1 }, "PsllwRI", "!0r,!1d" }, - { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1 }, "PslldRI", "!0r,!1d" }, - { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" }, + { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" }, + { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" }, + { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" }, + { kX86PsrldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 2, 0, 1, false }, "PsrldRI", "!0r,!1d" }, + { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1, false }, "PsrlqRI", "!0r,!1d" }, + { kX86PsllwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 6, 0, 1, false }, "PsllwRI", "!0r,!1d" }, + { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" }, + { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" }, - { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" }, - { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" }, - { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" }, - { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" }, + { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" }, + { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" }, + { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" }, + { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" }, EXT_0F_ENCODING_MAP(Mova128, 0x66, 0x6F, REG_DEF0), - { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" }, - { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" }, + { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Movups, 0x0, 0x10, REG_DEF0), - { kX86MovupsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" }, - { kX86MovupsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovupsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsMR", "[!0r+!1d],!2r" }, + { kX86MovupsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0, false }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Movaps, 0x0, 0x28, REG_DEF0), - { kX86MovapsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" }, - { kX86MovapsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovapsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsMR", "[!0r+!1d],!2r" }, + { kX86MovapsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0, false }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86MovlpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" }, - { kX86MovlpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86MovlpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" }, - { kX86MovlpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovlpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRM", "!0r,[!1r+!2d]" }, + { kX86MovlpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0, false }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86MovlpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsMR", "[!0r+!1d],!2r" }, + { kX86MovlpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0, false }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86MovhpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" }, - { kX86MovhpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86MovhpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" }, - { kX86MovhpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovhpsRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRM", "!0r,[!1r+!2d]" }, + { kX86MovhpsRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0, false }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86MovhpsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsMR", "[!0r+!1d],!2r" }, + { kX86MovhpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0, false }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0), - { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" }, - { kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" }, - { kX86MovdrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0), + { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE1, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxRR", "!0r,!1r" }, + { kX86MovqrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxMR", "[!0r+!1d],!2r" }, + { kX86MovqrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + + { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxRR", "!0r,!1r" }, + { kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxMR", "[!0r+!1d],!2r" }, + { kX86MovdrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0, false }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + + { kX86MovsxdRR, kRegReg, IS_BINARY_OP | REG_DEF0 | REG_USE1, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRR", "!0r,!1r" }, + { kX86MovsxdRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRM", "!0r,[!1r+!2d]" }, + { kX86MovsxdRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0, false }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" }, - { kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" }, - { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" }, + { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, true }, "Set8R", "!1c !0r" }, + { kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8M", "!2c [!0r+!1d]" }, + { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0, false }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" }, // TODO: load/store? // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly. - { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" }, + { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" }, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), + EXT_0F_ENCODING_MAP(Imul64, REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), - { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" }, - { kX86CmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" }, - { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86LockCmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" }, - { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86LockCmpxchg8bM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" }, - { kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" }, - { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" }, + { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "!0r,!1r" }, + { kX86CmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1d],!2r" }, + { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86LockCmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1d],!2r" }, + { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86LockCmpxchg64M, kMem, IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1d]" }, + { kX86LockCmpxchg64A, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" }, + { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" }, EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0), EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0), @@ -458,28 +476,39 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0), #undef EXT_0F_ENCODING_MAP - { kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc8", "!1c !0t" }, - { kX86Jcc32, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" }, - { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" }, - { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" }, - { kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" }, - { kX86Jecxz8, kJmp, NO_OPERAND | IS_BRANCH | NEEDS_FIXUP | REG_USEC, { 0, 0, 0xE3, 0, 0, 0, 0, 0 }, "Jecxz", "!0t" }, - { kX86JmpT, kJmp, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpT", "fs:[!0d]" }, - { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" }, - { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" }, - { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" }, - { kX86CallT, kCall, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallT", "fs:[!0d]" }, - { kX86CallI, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 4 }, "CallI", "!0d" }, - { kX86Ret, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" }, - - { kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" }, - { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, - { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr", "!0r,!1d" }, - { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" }, + { kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x70, 0, 0, 0, 0, 0, false }, "Jcc8", "!1c !0t" }, + { kX86Jcc32, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x0F, 0x80, 0, 0, 0, 0, false }, "Jcc32", "!1c !0t" }, + { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0, false }, "Jmp8", "!0t" }, + { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0, false }, "Jmp32", "!0t" }, + { kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0, false }, "JmpR", "!0r" }, + { kX86Jecxz8, kJmp, NO_OPERAND | IS_BRANCH | NEEDS_FIXUP | REG_USEC, { 0, 0, 0xE3, 0, 0, 0, 0, 0, false }, "Jecxz", "!0t" }, + { kX86JmpT, kJmp, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 4, 0, 0, false }, "JmpT", "fs:[!0d]" }, + { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0, false }, "CallR", "!0r" }, + { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0, false }, "CallM", "[!0r+!1d]" }, + { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0, false }, "CallA", "[!0r+!1r<<!2d+!3d]" }, + { kX86CallT, kCall, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0, false }, "CallT", "fs:[!0d]" }, + { kX86CallI, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 4, false }, "CallI", "!0d" }, + { kX86Ret, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Ret", "" }, + + { kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" }, + { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, + { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr", "!0r,!1d" }, + { kX86RepneScasw, kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" }, }; -size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, - int reg_r, int reg_x, bool has_sib) { +static bool NeedsRex(int32_t raw_reg) { + return RegStorage::RegNum(raw_reg) > 7; +} + +static uint8_t LowRegisterBits(int32_t raw_reg) { + uint8_t low_reg = RegStorage::RegNum(raw_reg) & kRegNumMask32; // 3 bits + DCHECK_LT(low_reg, 8); + return low_reg; +} + +size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index, + int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form, + int32_t displacement) { size_t size = 0; if (entry->skeleton.prefix1 > 0) { ++size; @@ -487,9 +516,17 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa ++size; } } - if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) && - entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { - ++size; // REX_R + if (Gen64Bit() || kIsDebugBuild) { + bool registers_need_rex_prefix = + NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base) || + (r8_form && RegStorage::RegNum(raw_reg) > 4) || + (r8_reg_reg_form && RegStorage::RegNum(raw_base) > 4); + if (registers_need_rex_prefix && + entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { + DCHECK(Gen64Bit()) << "Attempt to use " << entry->name << " on a non-byte register " + << RegStorage::RegNum(raw_reg); + ++size; // rex + } } ++size; // opcode if (entry->skeleton.opcode == 0x0F) { @@ -499,16 +536,16 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa } } ++size; // modrm - if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum() + if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum() || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) { // SP requires a SIB byte. // GS access also needs a SIB byte for absolute adressing in 64-bit mode. ++size; } - if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) { + if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) { // BP requires an explicit displacement, even when it's 0. - if (entry->opcode != kX86Lea32RA) { - DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name; + if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) { + DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name; } size += IS_SIMM8(displacement) ? 1 : 4; } @@ -519,112 +556,153 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa int X86Mir2Lir::GetInsnSize(LIR* lir) { DCHECK(!IsPseudoLirOp(lir->opcode)); const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode]; + DCHECK_EQ(entry->opcode, lir->opcode) << entry->name; switch (entry->kind) { case kData: - return 4; // 4 bytes of data + return 4; // 4 bytes of data. case kNop: - return lir->operands[0]; // length of nop is sole operand + return lir->operands[0]; // Length of nop is sole operand. case kNullary: - return 1; // 1 byte of opcode - case kPrefix2Nullary: - return 3; // 1 byte of opcode + 2 prefixes + // Substract 1 for modrm which isn't used. + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0) - 1; case kRegOpcode: // lir operands - 0: reg - // substract 1 for modrm - return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1; + // Substract 1 for modrm which isn't used. + DCHECK_EQ(false, entry->skeleton.r8_form); + // Note: RegOpcode form passes reg as REX_R but encodes it as REX_B. + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - 1; case kReg: // lir operands - 0: reg - return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); + // Note: Reg form passes reg as REX_R but encodes it as REX_B. + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, + false, entry->skeleton.r8_form, false, 0); case kMem: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, + lir->operands[1]); case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], - NO_REG, lir->operands[1], true); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, + lir->operands[3]); case kMemReg: // lir operands - 0: base, 1: disp, 2: reg - return ComputeSize(entry, lir->operands[0], lir->operands[1], - lir->operands[2], NO_REG, false); + return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], + false, entry->skeleton.r8_form, false, lir->operands[1]); case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], - lir->operands[2], NO_REG, false); + return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], + false, entry->skeleton.r8_form, false, lir->operands[1]); case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], - lir->operands[4], lir->operands[1], true); + return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0], + true, entry->skeleton.r8_form, false, lir->operands[3]); case kThreadReg: // lir operands - 0: disp, 1: reg - return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false); + DCHECK_EQ(false, entry->skeleton.r8_form); + // Thread displacement size is always 32bit. + return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, false, false, false, + 0x12345678); case kRegReg: // lir operands - 0: reg1, 1: reg2 - return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); + // Note: RegReg form passes reg2 as index but encodes it using base. + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, + false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); case kRegRegStore: // lir operands - 0: reg2, 1: reg1 - return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false); + // Note: RegRegStore form passes reg1 as index but encodes it using base. + return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, + false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); case kRegMem: // lir operands - 0: reg, 1: base, 2: disp - return ComputeSize(entry, lir->operands[1], lir->operands[2], - lir->operands[0], NO_REG, false); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], + false, entry->skeleton.r8_form, false, lir->operands[2]); case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp - return ComputeSize(entry, lir->operands[1], lir->operands[4], - lir->operands[0], lir->operands[2], true); + return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], + true, entry->skeleton.r8_form, false, lir->operands[4]); case kRegThread: // lir operands - 0: reg, 1: disp - // displacement size is always 32bit - return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false); + // Thread displacement size is always 32bit. + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, + 0x12345678); case kRegImm: { // lir operands - 0: reg, 1: immediate - size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); + size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, + false, entry->skeleton.r8_form, false, 0); + // AX opcodes don't require the modrm byte. if (entry->skeleton.ax_opcode == 0) { return size; } else { - // AX opcodes don't require the modrm byte. - int reg = lir->operands[0]; - return size - (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() ? 1 : 0); + return size - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); } } case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], - NO_REG, lir->operands[0], false); + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], + false, false, false, lir->operands[1]); case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[3], - NO_REG, lir->operands[1], true); + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], + true, false, false, lir->operands[3]); case kThreadImm: // lir operands - 0: disp, 1: imm - // displacement size is always 32bit - return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); - case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm - case kRegRegImmRev: - return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); + // Thread displacement size is always 32bit. + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); + case kRegRegImm: // lir operands - 0: reg1, 1: reg2, 2: imm + // Note: RegRegImm form passes reg2 as index but encodes it using base. + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, + false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); + case kRegRegImmStore: // lir operands - 0: reg2, 1: reg1, 2: imm + // Note: RegRegImmStore form passes reg1 as index but encodes it using base. + return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, + false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm - return ComputeSize(entry, lir->operands[1], lir->operands[2], - lir->operands[0], NO_REG, false); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], + false, entry->skeleton.r8_form, false, lir->operands[2]); case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm - return ComputeSize(entry, lir->operands[1], lir->operands[4], - lir->operands[0], lir->operands[2], true); + return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], + true, entry->skeleton.r8_form, false, lir->operands[4]); case kMovRegImm: // lir operands - 0: reg, 1: immediate - return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) + - 1 + entry->skeleton.immediate_bytes; + return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 + + entry->skeleton.immediate_bytes; case kShiftRegImm: // lir operands - 0: reg, 1: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - - (lir->operands[1] == 1 ? 1 : 0); + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, + false, entry->skeleton.r8_form, false, 0) - + (lir->operands[1] == 1 ? 1 : 0); case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) - - (lir->operands[2] == 1 ? 1 : 0); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], + false, entry->skeleton.r8_form, false, lir->operands[1]) - + (lir->operands[2] == 1 ? 1 : 0); case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[3], - NO_REG, lir->operands[1], true) - - (lir->operands[4] == 1 ? 1 : 0); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], + true, entry->skeleton.r8_form, false, lir->operands[3]) - + (lir->operands[4] == 1 ? 1 : 0); case kShiftRegCl: // lir operands - 0: reg, 1: cl - return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); + DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1])); + // Note: ShiftRegCl form passes reg as reg but encodes it using base. + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, + false, entry->skeleton.r8_form, false, 0); case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); - case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], - lir->operands[4], lir->operands[1], true); + DCHECK_EQ(false, entry->skeleton.r8_form); + DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2])); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], + false, false, false, lir->operands[1]); + case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl + DCHECK_EQ(false, entry->skeleton.r8_form); + DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4])); + return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0], + true, false, false, lir->operands[3]); case kRegCond: // lir operands - 0: reg, 1: cond - return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, + false, entry->skeleton.r8_form, false, 0); case kMemCond: // lir operands - 0: base, 1: disp, 2: cond - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, + lir->operands[1]); case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond - return ComputeSize(entry, lir->operands[0], lir->operands[3], - NO_REG, lir->operands[1], true); - case kRegRegCond: // lir operands - 0: reg, 1: reg, 2: cond - return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); - case kRegMemCond: // lir operands - 0: reg, 1: reg, 2: disp, 3:cond - return ComputeSize(entry, lir->operands[1], lir->operands[2], - lir->operands[0], lir->operands[1], false); + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, + lir->operands[3]); + case kRegRegCond: // lir operands - 0: reg1, 1: reg2, 2: cond + // Note: RegRegCond form passes reg2 as index but encodes it using base. + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, false, false, false, 0); + case kRegMemCond: // lir operands - 0: reg, 1: base, 2: disp, 3:cond + DCHECK_EQ(false, entry->skeleton.r8_form); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], false, false, false, + lir->operands[2]); case kJcc: if (lir->opcode == kX86Jcc8) { return 2; // opcode + rel8 @@ -638,8 +716,8 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } else if (lir->opcode == kX86Jmp32) { return 5; // opcode + rel32 } else if (lir->opcode == kX86JmpT) { - // displacement size is always 32bit - return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); + // Thread displacement size is always 32bit. + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); } else { DCHECK(lir->opcode == kX86JmpR); if (NeedsRex(lir->operands[0])) { @@ -653,13 +731,14 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kX86CallI: return 5; // opcode 0:disp case kX86CallR: return 2; // opcode modrm case kX86CallM: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, + lir->operands[1]); case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], - NO_REG, lir->operands[1], true); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, + lir->operands[3]); case kX86CallT: // lir operands - 0: disp - // displacement size is always 32bit - return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); + // Thread displacement size is always 32bit. + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); default: break; } @@ -667,43 +746,76 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kPcRel: if (entry->opcode == kX86PcRelLoadRA) { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table - return ComputeSize(entry, lir->operands[1], 0x12345678, - lir->operands[0], lir->operands[2], true); + // Force the displacement size to 32bit, it will hold a computed offset later. + return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], + true, false, false, 0x12345678); } else { - DCHECK(entry->opcode == kX86PcRelAdr); + DCHECK_EQ(entry->opcode, kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate } case kMacro: // lir operands - 0: reg DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, - lir->operands[0], NO_REG, false) - - // shorter ax encoding - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); - default: + ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], + lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - + // Shorter ax encoding. + (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); + case kUnimplemented: break; } UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name; return 0; } -void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) { - EmitPrefix(entry, NO_REG, NO_REG, NO_REG); +static uint8_t ModrmForDisp(int base, int disp) { + // BP requires an explicit disp, so do not omit it in the 0 case + if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) { + return 0; + } else if (IS_SIMM8(disp)) { + return 1; + } else { + return 2; + } +} + +void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg) { + if (kIsDebugBuild) { + // Sanity check r8_form is correctly specified. + if (entry->skeleton.r8_form) { + CHECK(strchr(entry->name, '8') != nullptr) << entry->name; + } else { + if (entry->skeleton.immediate_bytes != 1) { // Ignore ...I8 instructions. + if (!StartsWith(entry->name, "Movzx8") && !StartsWith(entry->name, "Movsx8")) { + CHECK(strchr(entry->name, '8') == nullptr) << entry->name; + } + } + } + if (RegStorage::RegNum(raw_reg) >= 4) { + // ah, bh, ch and dh are not valid registers in 32-bit. + CHECK(Gen64Bit() || !entry->skeleton.r8_form) + << "Invalid register " << static_cast<int>(RegStorage::RegNum(raw_reg)) + << " for instruction " << entry->name << " in " + << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + } } void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, - uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, + bool r8_form) { // REX.WRXB // W - 64-bit operand // R - MODRM.reg // X - SIB.index // B - MODRM.rm/SIB.base - bool force = false; bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W); - bool r = NeedsRex(reg_r); - bool x = NeedsRex(reg_x); - bool b = NeedsRex(reg_b); - uint8_t rex = force ? 0x40 : 0; + bool r = NeedsRex(raw_reg_r); + bool x = NeedsRex(raw_reg_x); + bool b = NeedsRex(raw_reg_b); + uint8_t rex = 0; + if (r8_form && RegStorage::RegNum(raw_reg_r) > 4) { + rex |= 0x40; // REX.0000 + } if (w) { rex |= 0x48; // REX.W000 } @@ -718,7 +830,7 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, } if (entry->skeleton.prefix1 != 0) { if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) { - // 64 bit adresses by GS, not FS + // 64 bit addresses by GS, not FS. code_buffer_.push_back(THREAD_PREFIX_GS); } else { if (entry->skeleton.prefix1 == REX_W) { @@ -742,6 +854,7 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, DCHECK_EQ(0, entry->skeleton.prefix2); } if (rex != 0) { + DCHECK(Gen64Bit()); code_buffer_.push_back(rex); } } @@ -761,28 +874,14 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { } } -void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) { - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); -} - void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry, - uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { - EmitPrefix(entry, reg_r, reg_x, reg_b); + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, + bool r8_form) { + EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form); EmitOpcode(entry); } -static uint8_t ModrmForDisp(int base, int disp) { - // BP requires an explicit disp, so do not omit it in the 0 case - if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) { - return 0; - } else if (IS_SIMM8(disp)) { - return 1; - } else { - return 2; - } -} - -void X86Mir2Lir::EmitDisp(uint8_t base, int disp) { +void X86Mir2Lir::EmitDisp(uint8_t base, int32_t disp) { // BP requires an explicit disp, so do not omit it in the 0 case if (disp == 0 && RegStorage::RegNum(base) != rs_rBP.GetRegNum()) { return; @@ -809,13 +908,12 @@ void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) { } } -void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) { - DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8); - DCHECK_LT(RegStorage::RegNum(base), 8); - uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (RegStorage::RegNum(reg_or_opcode) << 3) | - RegStorage::RegNum(base); +void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) { + DCHECK_LT(reg_or_opcode, 8); + DCHECK_LT(base, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base; code_buffer_.push_back(modrm); - if (RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) { + if (base == rs_rX86_SP.GetRegNum()) { // Special SIB for SP base code_buffer_.push_back(0 << 6 | rs_rX86_SP.GetRegNum() << 3 | rs_rX86_SP.GetRegNum()); } @@ -823,7 +921,7 @@ void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) { } void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, - int scale, int disp) { + int scale, int32_t disp) { DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8); uint8_t modrm = (ModrmForDisp(base, disp) << 6) | RegStorage::RegNum(reg_or_opcode) << 3 | rs_rX86_SP.GetRegNum(); @@ -848,11 +946,7 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { code_buffer_.push_back((imm >> 8) & 0xFF); break; case 4: - if (imm <0) { - CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm); - } else { - CHECK_EQ(imm & 0x0FFFFFFFFl, imm); - } + DCHECK(IS_SIMM32(imm)); code_buffer_.push_back(imm & 0xFF); code_buffer_.push_back((imm >> 8) & 0xFF); code_buffer_.push_back((imm >> 16) & 0xFF); @@ -875,128 +969,126 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { } } -void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); +void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false); // There's no 3-byte instruction with +rd DCHECK(entry->skeleton.opcode != 0x0F || (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A)); - DCHECK(!RegStorage::IsFloat(reg)); - DCHECK_LT(RegStorage::RegNum(reg), 8); - code_buffer_.back() += RegStorage::RegNum(reg); + DCHECK(!RegStorage::IsFloat(raw_reg)); + uint8_t low_reg = LowRegisterBits(raw_reg); + code_buffer_.back() += low_reg; DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); - if (RegStorage::RegNum(reg) >= 4) { - DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " - << static_cast<int>(RegStorage::RegNum(reg)) - << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); - } - DCHECK_LT(RegStorage::RegNum(reg), 8); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); +void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefix(entry, NO_REG, NO_REG, base); - base = LowRegisterBits(base); +void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, - int scale, int disp) { - EmitPrefixAndOpcode(entry, NO_REG, index, base); - index = LowRegisterBits(index); - base = LowRegisterBits(base); - EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); +void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, + int scale, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false); + uint8_t low_index = LowRegisterBits(raw_index); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) { - uint8_t res = reg; - res = reg & kRegNumMask32; // 3 bits - return res; -} - -bool X86Mir2Lir::NeedsRex(uint8_t reg) { - return RegStorage::RegNum(reg) > 7; -} - -void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, - uint8_t base, int disp, uint8_t reg) { - EmitPrefixAndOpcode(entry, reg, NO_REG, base); - reg = LowRegisterBits(reg); - base = LowRegisterBits(base); - if (RegStorage::RegNum(reg) >= 4) { - DCHECK(strchr(entry->name, '8') == NULL || - entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM) - << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg)) - << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); - } - EmitModrmDisp(reg, base, disp); +void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, + int32_t raw_reg) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(low_reg, low_base, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, - uint8_t reg, uint8_t base, int disp) { +void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, + int32_t disp) { // Opcode will flip operands. - EmitMemReg(entry, base, disp, reg); + EmitMemReg(entry, raw_base, disp, raw_reg); } -void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, - uint8_t index, int scale, int disp) { - EmitPrefixAndOpcode(entry, reg, index, base); - reg = LowRegisterBits(reg); - index = LowRegisterBits(index); - base = LowRegisterBits(base); - EmitModrmSibDisp(reg, base, index, scale, disp); +void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, + int32_t raw_index, int scale, int32_t disp) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t low_index = LowRegisterBits(raw_index); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmSibDisp(low_reg, low_base, low_index, scale, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, - int disp, uint8_t reg) { +void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, + int scale, int32_t disp, int32_t raw_reg) { // Opcode will flip operands. - EmitRegArray(entry, reg, base, index, scale, disp); + EmitRegArray(entry, raw_reg, raw_base, raw_index, scale, disp); +} + +void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, + int32_t imm) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + EmitImm(entry, imm); } -void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, - int disp, int32_t imm) { - EmitPrefixAndOpcode(entry, NO_REG, index, base); - index = LowRegisterBits(index); - base = LowRegisterBits(base); - EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); +void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, + int32_t raw_base, int32_t raw_index, int scale, int32_t disp, + int32_t imm) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false); + uint8_t low_index = LowRegisterBits(raw_index); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); } -void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) { +void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); - if (RegStorage::RegNum(reg) >= 4) { - DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " - << static_cast<int>(RegStorage::RegNum(reg)) - << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); - } - DCHECK_LT(RegStorage::RegNum(reg), 8); - EmitModrmThread(RegStorage::RegNum(reg)); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false); + uint8_t low_reg = LowRegisterBits(raw_reg); + EmitModrmThread(low_reg); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); code_buffer_.push_back((disp >> 16) & 0xFF); @@ -1006,79 +1098,67 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) { - EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); - reg1 = LowRegisterBits(reg1); - reg2 = LowRegisterBits(reg2); - DCHECK_LT(RegStorage::RegNum(reg1), 8); - DCHECK_LT(RegStorage::RegNum(reg2), 8); - uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); +void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) { + CheckValidByteRegister(entry, raw_reg1); + CheckValidByteRegister(entry, raw_reg2); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form); + uint8_t low_reg1 = LowRegisterBits(raw_reg1); + uint8_t low_reg2 = LowRegisterBits(raw_reg2); + uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2; code_buffer_.push_back(modrm); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, - uint8_t reg1, uint8_t reg2, int32_t imm) { - EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); - reg1 = LowRegisterBits(reg1); - reg2 = LowRegisterBits(reg2); - DCHECK_LT(RegStorage::RegNum(reg1), 8); - DCHECK_LT(RegStorage::RegNum(reg2), 8); - uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); +void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, + int32_t imm) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false); + uint8_t low_reg1 = LowRegisterBits(raw_reg1); + uint8_t low_reg2 = LowRegisterBits(raw_reg2); + uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2; code_buffer_.push_back(modrm); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); } -void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry, - uint8_t reg1, uint8_t reg2, int32_t imm) { - EmitRegRegImm(entry, reg2, reg1, imm); -} - void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry, - uint8_t reg, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry, reg, NO_REG, base); - reg = LowRegisterBits(reg); - base = LowRegisterBits(base); - DCHECK(!RegStorage::IsFloat(reg)); - DCHECK_LT(RegStorage::RegNum(reg), 8); - EmitModrmDisp(reg, base, disp); + int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) { + DCHECK(!RegStorage::IsFloat(raw_reg)); + CheckValidByteRegister(entry, raw_reg); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(low_reg, low_base, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); } void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry, - uint8_t base, int disp, uint8_t reg, int32_t imm) { - EmitRegMemImm(entry, reg, base, disp, imm); + int32_t raw_base, int32_t disp, int32_t raw_reg, int32_t imm) { + // Opcode will flip operands. + EmitRegMemImm(entry, raw_reg, raw_base, disp, imm); } -void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry, NO_REG, NO_REG, reg); - if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) { +void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) { code_buffer_.push_back(entry->skeleton.ax_opcode); } else { - reg = LowRegisterBits(reg); + uint8_t low_reg = LowRegisterBits(raw_reg); EmitOpcode(entry); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); } EmitImm(entry, imm); } -void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); - base = LowRegisterBits(base); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); - DCHECK_EQ(0, entry->skeleton.ax_opcode); - EmitImm(entry, imm); -} - -void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { - EmitPrefixAndOpcode(entry); +void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) { + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); EmitModrmThread(entry->skeleton.modrm_opcode); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1088,11 +1168,11 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { DCHECK_EQ(entry->skeleton.ax_opcode, 0); } -void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) { - EmitPrefix(entry, NO_REG, NO_REG, reg); - reg = LowRegisterBits(reg); - DCHECK_LT(RegStorage::RegNum(reg), 8); - code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg)); +void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false); + uint8_t low_reg = LowRegisterBits(raw_reg); + code_buffer_.push_back(0xB8 + low_reg); switch (entry->skeleton.immediate_bytes) { case 4: code_buffer_.push_back(imm & 0xFF); @@ -1116,9 +1196,9 @@ void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t } } -void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry, NO_REG, NO_REG, reg); - reg = LowRegisterBits(reg); +void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1128,13 +1208,8 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - if (RegStorage::RegNum(reg) >= 4) { - DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " - << static_cast<int>(RegStorage::RegNum(reg)) - << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); - } - DCHECK_LT(RegStorage::RegNum(reg), 8); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); if (imm != 1) { DCHECK_EQ(entry->skeleton.immediate_bytes, 1); @@ -1143,40 +1218,40 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i } } -void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) { - DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); +void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) { + CheckValidByteRegister(entry, raw_reg); + DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl)); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - DCHECK_LT(RegStorage::RegNum(reg), 8); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, - int displacement, uint8_t cl) { - DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry, NO_REG, NO_REG, base); - base = LowRegisterBits(base); +void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, + int32_t displacement, int32_t raw_cl) { + DCHECK_EQ(false, entry->skeleton.r8_form); + DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl)); + EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - DCHECK_LT(RegStorage::RegNum(base), 8); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, displacement); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, - int displacement, int imm) { - EmitPrefix(entry, NO_REG, NO_REG, base); - base = LowRegisterBits(base); +void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, + int32_t imm) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1186,7 +1261,8 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); if (imm != 1) { DCHECK_EQ(entry->skeleton.immediate_bytes, 1); DCHECK(IS_SIMM8(imm)); @@ -1194,23 +1270,26 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, } } -void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) { - EmitPrefix(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); +void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) { + CheckValidByteRegister(entry, raw_reg); + EmitPrefix(entry, raw_reg, NO_REG, NO_REG, entry->skeleton.r8_form); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0x0F, entry->skeleton.opcode); code_buffer_.push_back(0x0F); DCHECK_EQ(0x90, entry->skeleton.extra_opcode1); - code_buffer_.push_back(0x90 | condition); + DCHECK_GE(cc, 0); + DCHECK_LT(cc, 16); + code_buffer_.push_back(0x90 | cc); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - DCHECK_LT(RegStorage::RegNum(reg), 8); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); DCHECK_EQ(entry->skeleton.immediate_bytes, 0); } -void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, - uint8_t condition) { +void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, + int32_t cc) { + DCHECK_EQ(false, entry->skeleton.r8_form); if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); if (entry->skeleton.prefix2 != 0) { @@ -1223,61 +1302,63 @@ void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int disp DCHECK_EQ(0x0F, entry->skeleton.opcode); code_buffer_.push_back(0x0F); DCHECK_EQ(0x90, entry->skeleton.extra_opcode1); - code_buffer_.push_back(0x90 | condition); + DCHECK_GE(cc, 0); + DCHECK_LT(cc, 16); + code_buffer_.push_back(0x90 | cc); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); DCHECK_EQ(entry->skeleton.immediate_bytes, 0); } -void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, - uint8_t condition) { - // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); - reg1 = LowRegisterBits(reg1); - reg2 = LowRegisterBits(reg2); +void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, + int32_t cc) { + // Generate prefix and opcode without the condition. + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false); // Now add the condition. The last byte of opcode is the one that receives it. - DCHECK_LE(condition, 0xF); - code_buffer_.back() += condition; + DCHECK_GE(cc, 0); + DCHECK_LT(cc, 16); + code_buffer_.back() += cc; - // Not expecting to have to encode immediate or do anything special for ModR/M since there are two registers. + // Not expecting to have to encode immediate or do anything special for ModR/M since there are + // two registers. DCHECK_EQ(0, entry->skeleton.immediate_bytes); DCHECK_EQ(0, entry->skeleton.modrm_opcode); - // Check that registers requested for encoding are sane. - DCHECK_LT(RegStorage::RegNum(reg1), 8); - DCHECK_LT(RegStorage::RegNum(reg2), 8); - // For register to register encoding, the mod is 3. const uint8_t mod = (3 << 6); // Encode the ModR/M byte now. - const uint8_t modrm = mod | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); + uint8_t low_reg1 = LowRegisterBits(raw_reg1); + uint8_t low_reg2 = LowRegisterBits(raw_reg2); + const uint8_t modrm = mod | (low_reg1 << 3) | low_reg2; code_buffer_.push_back(modrm); } -void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, - int displacement, uint8_t condition) { - // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry, reg1, NO_REG, base); - reg1 = LowRegisterBits(reg1); - base = LowRegisterBits(base); +void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, + int32_t disp, int32_t cc) { + // Generate prefix and opcode without the condition. + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false); // Now add the condition. The last byte of opcode is the one that receives it. - DCHECK_LE(condition, 0xF); - code_buffer_.back() += condition; + DCHECK_GE(cc, 0); + DCHECK_LT(cc, 16); + code_buffer_.back() += cc; + // Not expecting to have to encode immediate or do anything special for ModR/M since there are + // two registers. DCHECK_EQ(0, entry->skeleton.immediate_bytes); DCHECK_EQ(0, entry->skeleton.modrm_opcode); - // Check that registers requested for encoding are sane. - DCHECK_LT(reg1, 8); - DCHECK_LT(base, 8); - - EmitModrmDisp(reg1, base, displacement); + uint8_t low_reg1 = LowRegisterBits(raw_reg1); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(low_reg1, low_base, disp); } -void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { +void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) { if (entry->opcode == kX86Jmp8) { DCHECK(IS_SIMM8(rel)); code_buffer_.push_back(0xEB); @@ -1294,17 +1375,17 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { code_buffer_.push_back(rel & 0xFF); } else { DCHECK(entry->opcode == kX86JmpR); - uint8_t reg = static_cast<uint8_t>(rel); - EmitPrefix(entry, NO_REG, NO_REG, reg); + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, rel, false); code_buffer_.push_back(entry->skeleton.opcode); - reg = LowRegisterBits(reg); - DCHECK_LT(RegStorage::RegNum(reg), 8); - uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); + uint8_t low_reg = LowRegisterBits(rel); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); } } -void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) { +void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) { + DCHECK_GE(cc, 0); DCHECK_LT(cc, 16); if (entry->opcode == kX86Jcc8) { DCHECK(IS_SIMM8(rel)); @@ -1321,16 +1402,18 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) { } } -void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); - base = LowRegisterBits(base); - EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); +void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false); + uint8_t low_base = LowRegisterBits(raw_base); + EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) { - EmitPrefixAndOpcode(entry); +void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); DCHECK_EQ(4, entry->skeleton.immediate_bytes); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1339,9 +1422,10 @@ void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) { DCHECK_EQ(0, entry->skeleton.ax_opcode); } -void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) { +void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) { + DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); EmitModrmThread(entry->skeleton.modrm_opcode); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1351,8 +1435,8 @@ void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) { DCHECK_EQ(0, entry->skeleton.immediate_bytes); } -void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, - int base_or_table, uint8_t index, int scale, int table_or_disp) { +void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table, + int32_t raw_index, int scale, int32_t table_or_disp) { int disp; if (entry->opcode == kX86PcRelLoadRA) { Mir2Lir::EmbeddedData *tab_rec = @@ -1361,31 +1445,28 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, } else { DCHECK(entry->opcode == kX86PcRelAdr); Mir2Lir::EmbeddedData *tab_rec = - reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table)); + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(raw_base_or_table)); disp = tab_rec->offset; } if (entry->opcode == kX86PcRelLoadRA) { - EmitPrefix(entry, reg, index, base_or_table); - reg = LowRegisterBits(reg); - base_or_table = LowRegisterBits(base_or_table); - index = LowRegisterBits(index); - DCHECK_LT(RegStorage::RegNum(reg), 8); + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); DCHECK_EQ(0, entry->skeleton.extra_opcode2); - uint8_t modrm = (2 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rX86_SP.GetRegNum(); + uint8_t low_reg = LowRegisterBits(raw_reg); + uint8_t modrm = (2 << 6) | (low_reg << 3) | rs_rX86_SP.GetRegNum(); code_buffer_.push_back(modrm); DCHECK_LT(scale, 4); - DCHECK_LT(RegStorage::RegNum(index), 8); - DCHECK_LT(RegStorage::RegNum(base_or_table), 8); - uint8_t base = static_cast<uint8_t>(base_or_table); - uint8_t sib = (scale << 6) | (RegStorage::RegNum(index) << 3) | RegStorage::RegNum(base); + uint8_t low_base_or_table = LowRegisterBits(raw_base_or_table); + uint8_t low_index = LowRegisterBits(raw_index); + uint8_t sib = (scale << 6) | (low_index << 3) | low_base_or_table; code_buffer_.push_back(sib); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } else { - DCHECK_LT(RegStorage::RegNum(reg), 8); - code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg)); + uint8_t low_reg = LowRegisterBits(raw_reg); + code_buffer_.push_back(entry->skeleton.opcode + low_reg); } code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1395,21 +1476,21 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, DCHECK_EQ(0, entry->skeleton.ax_opcode); } -void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) { - DCHECK(entry->opcode == kX86StartOfMethod) << entry->name; - EmitPrefix(entry, reg, NO_REG, NO_REG); - reg = LowRegisterBits(reg); +void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) { + DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name; + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false); code_buffer_.push_back(0xE8); // call +0 code_buffer_.push_back(0); code_buffer_.push_back(0); code_buffer_.push_back(0); code_buffer_.push_back(0); - DCHECK_LT(RegStorage::RegNum(reg), 8); - code_buffer_.push_back(0x58 + RegStorage::RegNum(reg)); // pop reg + uint8_t low_reg = LowRegisterBits(raw_reg); + code_buffer_.push_back(0x58 + low_reg); // pop reg - EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg), - offset + 5 /* size of call +0 */); + EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], + raw_reg, offset + 5 /* size of call +0 */); } void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { @@ -1570,21 +1651,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kData: // 4 bytes of data code_buffer_.push_back(lir->operands[0]); break; - case kNullary: // 1 byte of opcode - DCHECK_EQ(0, entry->skeleton.prefix1); - DCHECK_EQ(0, entry->skeleton.prefix2); - EmitOpcode(entry); - DCHECK_EQ(0, entry->skeleton.modrm_opcode); - DCHECK_EQ(0, entry->skeleton.ax_opcode); - DCHECK_EQ(0, entry->skeleton.immediate_bytes); - break; - case kPrefix2Nullary: // 1 byte of opcode + 2 prefixes. - DCHECK_NE(0, entry->skeleton.prefix1); - DCHECK_NE(0, entry->skeleton.prefix2); - EmitPrefixAndOpcode(entry); - DCHECK_EQ(0, entry->skeleton.modrm_opcode); - DCHECK_EQ(0, entry->skeleton.ax_opcode); - DCHECK_EQ(0, entry->skeleton.immediate_bytes); + case kNullary: // 1 byte of opcode and possible prefixes. + EmitNullary(entry); break; case kRegOpcode: // lir operands - 0: reg EmitOpRegOpcode(entry, lir->operands[0]); @@ -1628,17 +1696,17 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kRegRegStore: // lir operands - 0: reg2, 1: reg1 EmitRegReg(entry, lir->operands[1], lir->operands[0]); break; - case kRegRegImmRev: - EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]); - break; - case kMemRegImm: + case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate EmitMemRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); break; - case kRegRegImm: + case kRegRegImm: // lir operands - 0: reg1, 1: reg2, 2: imm EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]); break; - case kRegMemImm: + case kRegRegImmStore: // lir operands - 0: reg2, 1: reg1, 2: imm + EmitRegRegImm(entry, lir->operands[1], lir->operands[0], lir->operands[2]); + break; + case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); break; @@ -1711,7 +1779,13 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kMacro: // lir operands - 0: reg EmitMacro(entry, lir->operands[0], lir->offset); break; - default: + case kNop: // TODO: these instruction kinds are missing implementations. + case kThreadReg: + case kRegArrayImm: + case kShiftArrayImm: + case kShiftArrayCl: + case kArrayCond: + case kUnimplemented: EmitUnimplemented(entry, lir); break; } diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index fc0b305..f5fce34 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -86,11 +86,19 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, if (base_of_code_ != nullptr) { // We can use the saved value. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } start_of_method_reg = rl_method.reg; store_method_addr_used_ = true; } else { - start_of_method_reg = AllocTemp(); + if (Gen64Bit()) { + start_of_method_reg = AllocTempWide(); + } else { + start_of_method_reg = AllocTemp(); + } NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); } int low_key = s4FromSwitchData(&table[2]); @@ -108,9 +116,14 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, // Load the displacement from the switch table RegStorage disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec)); + NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + 2, WrapPointer(tab_rec)); // Add displacement to start of method - OpRegReg(kOpAdd, start_of_method_reg, disp_reg); + if (Gen64Bit()) { + NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg()); + } else { + OpRegReg(kOpAdd, start_of_method_reg, disp_reg); + } // ..and go! LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg()); tab_rec->anchor = switch_branch; @@ -150,13 +163,18 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { if (base_of_code_ != nullptr) { // We can use the saved value. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - LoadValueDirect(rl_method, rs_rX86_ARG2); + if (rl_method.wide) { + LoadValueDirectWide(rl_method, rs_rX86_ARG2); + } else { + LoadValueDirect(rl_method, rs_rX86_ARG2); + } store_method_addr_used_ = true; } else { + // TODO(64) force to be 64-bit NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg()); } NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec)); - NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg()); + NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg()); if (Is64BitInstructionSet(cu_->instruction_set)) { CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0, rs_rX86_ARG1, true); @@ -264,9 +282,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>()); } LIR* branch = OpCondBranch(kCondUlt, nullptr); - AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, - frame_size_ - - GetInstructionSetPointerSize(cu_->instruction_set))); + AddSlowPath( + new(arena_)StackOverflowSlowPath(this, branch, + frame_size_ - + GetInstructionSetPointerSize(cu_->instruction_set))); } FlushIns(ArgLocs, rl_method); @@ -276,7 +295,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg()); int displacement = SRegOffset(base_of_code_->s_reg_low); // Native pointer - must be natural word size. - setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0); + setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32); } FreeTemp(rs_rX86_ARG0); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 648c148..61c9f4f 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -20,780 +20,827 @@ #include "dex/compiler_internals.h" #include "x86_lir.h" +#include <map> + namespace art { class X86Mir2Lir : public Mir2Lir { - public: - X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit); - - // Required for target - codegen helpers. - bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, - RegLocation rl_dest, int lit); - bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; - LIR* CheckSuspendUsingLoad() OVERRIDE; - RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; - RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; - LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; - LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, - OpSize size) OVERRIDE; - LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, - OpSize size) OVERRIDE; - LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, - RegStorage r_dest, OpSize size) OVERRIDE; - LIR* LoadConstantNoClobber(RegStorage r_dest, int value); - LIR* LoadConstantWide(RegStorage r_dest, int64_t value); - LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) OVERRIDE; - LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, + protected: + class InToRegStorageMapper { + public: + virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0; + virtual ~InToRegStorageMapper() {} + }; + + class InToRegStorageX86_64Mapper : public InToRegStorageMapper { + public: + InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {} + virtual ~InToRegStorageX86_64Mapper() {} + virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide); + private: + int cur_core_reg_; + int cur_fp_reg_; + }; + + class InToRegStorageMapping { + public: + InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false), + initialized_(false) {} + void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); + int GetMaxMappedIn() { return max_mapped_in_; } + bool IsThereStackMapped() { return is_there_stack_mapped_; } + RegStorage Get(int in_position); + bool IsInitialized() { return initialized_; } + private: + std::map<int, RegStorage> mapping_; + int max_mapped_in_; + bool is_there_stack_mapped_; + bool initialized_; + }; + + public: + X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit); + + // Required for target - codegen helpers. + bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, + RegLocation rl_dest, int lit); + bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; + LIR* CheckSuspendUsingLoad() OVERRIDE; + RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE; + RegStorage LoadHelper(ThreadOffset<8> offset) OVERRIDE; + LIR* LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size) OVERRIDE; + LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size) OVERRIDE; + LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; - LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, - OpSize size) OVERRIDE; - LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, - RegStorage r_src, OpSize size) OVERRIDE; - void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg); - - // Required for target - register utilities. - RegStorage TargetReg(SpecialTargetRegister reg); - RegStorage GetArgMappingToPhysicalReg(int arg_num); - RegLocation GetReturnAlt(); - RegLocation GetReturnWideAlt(); - RegLocation LocCReturn(); - RegLocation LocCReturnRef(); - RegLocation LocCReturnDouble(); - RegLocation LocCReturnFloat(); - RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); - void AdjustSpillMask(); - void ClobberCallerSave(); - void FreeCallTemps(); - void LockCallTemps(); - void MarkPreservedSingle(int v_reg, RegStorage reg); - void MarkPreservedDouble(int v_reg, RegStorage reg); - void CompilerInitializeRegAlloc(); - - // Required for target - miscellaneous. - void AssembleLIR(); - int AssignInsnOffsets(); - void AssignOffsets(); - AssemblerStatus AssembleInstructions(CodeOffset start_addr); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); - const char* GetTargetInstFmt(int opcode); - const char* GetTargetInstName(int opcode); - std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); - uint64_t GetTargetInstFlags(int opcode); - int GetInsnSize(LIR* lir); - bool IsUnconditionalBranch(LIR* lir); - - // Check support for volatile load/store of a given size. - bool SupportsVolatileLoadStore(OpSize size) OVERRIDE; - // Get the register class for load/store of a field. - RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; - - // Required for target - Dalvik-level generators. - void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_dest, int scale); - void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); - void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_shift); - void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, + RegStorage r_dest, OpSize size) OVERRIDE; + LIR* LoadConstantNoClobber(RegStorage r_dest, int value); + LIR* LoadConstantWide(RegStorage r_dest, int64_t value); + LIR* StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size) OVERRIDE; + LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size) OVERRIDE; + LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, + OpSize size) OVERRIDE; + LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, + RegStorage r_src, OpSize size) OVERRIDE; + void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg); + + // Required for target - register utilities. + RegStorage TargetReg(SpecialTargetRegister reg); + RegStorage GetArgMappingToPhysicalReg(int arg_num); + RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num); + RegLocation GetReturnAlt(); + RegLocation GetReturnWideAlt(); + RegLocation LocCReturn(); + RegLocation LocCReturnRef(); + RegLocation LocCReturnDouble(); + RegLocation LocCReturnFloat(); + RegLocation LocCReturnWide(); + uint64_t GetRegMaskCommon(RegStorage reg); + void AdjustSpillMask(); + void ClobberCallerSave(); + void FreeCallTemps(); + void LockCallTemps(); + void MarkPreservedSingle(int v_reg, RegStorage reg); + void MarkPreservedDouble(int v_reg, RegStorage reg); + void CompilerInitializeRegAlloc(); + + // Required for target - miscellaneous. + void AssembleLIR(); + int AssignInsnOffsets(); + void AssignOffsets(); + AssemblerStatus AssembleInstructions(CodeOffset start_addr); + void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + const char* GetTargetInstFmt(int opcode); + const char* GetTargetInstName(int opcode); + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); + uint64_t GetPCUseDefEncoding(); + uint64_t GetTargetInstFlags(int opcode); + int GetInsnSize(LIR* lir); + bool IsUnconditionalBranch(LIR* lir); + + // Check support for volatile load/store of a given size. + bool SupportsVolatileLoadStore(OpSize size) OVERRIDE; + // Get the register class for load/store of a field. + RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE; + + // Required for target - Dalvik-level generators. + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_dest, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); + bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); + bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); + bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenNotLong(RegLocation rl_dest, RegLocation rl_src); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); - bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); - bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); - bool GenInlinedSqrt(CallInfo* info); - bool GenInlinedPeek(CallInfo* info, OpSize size); - bool GenInlinedPoke(CallInfo* info, OpSize size); - void GenNotLong(RegLocation rl_dest, RegLocation rl_src); - void GenNegLong(RegLocation rl_dest, RegLocation rl_src); - void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2); - void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2, bool is_div); - // TODO: collapse reg_lo, reg_hi - RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); - RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); - void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenDivZeroCheckWide(RegStorage reg); - void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset); - void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset); - void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); - void GenExitSequence(); - void GenSpecialExitSequence(); - void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); - void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); - void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); - void GenSelect(BasicBlock* bb, MIR* mir); - bool GenMemBarrier(MemBarrierKind barrier_kind); - void GenMoveException(RegLocation rl_dest); - void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, - int first_bit, int second_bit); - void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); - void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); - - /* - * @brief Generate a two address long operation with a constant value - * @param rl_dest location of result - * @param rl_src constant source operand - * @param op Opcode to be generated - */ - void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); - /* - * @brief Generate a three address long operation with a constant value - * @param rl_dest location of result - * @param rl_src1 source operand - * @param rl_src2 constant source operand - * @param op Opcode to be generated - */ - void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, - Instruction::Code op); - - /** - * @brief Generate a long arithmetic operation. - * @param rl_dest The destination. - * @param rl_src1 First operand. - * @param rl_src2 Second operand. - * @param op The DEX opcode for the operation. - * @param is_commutative The sources can be swapped if needed. - */ - virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, - Instruction::Code op, bool is_commutative); - - /** - * @brief Generate a two operand long arithmetic operation. - * @param rl_dest The destination. - * @param rl_src Second operand. - * @param op The DEX opcode for the operation. - */ - void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); - - /** - * @brief Generate a long operation. - * @param rl_dest The destination. Must be in a register - * @param rl_src The other operand. May be in a register or in memory. - * @param op The DEX opcode for the operation. - */ - virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); - - /** - * @brief Implement instanceof a final class with x86 specific code. - * @param use_declaring_class 'true' if we can use the class itself. - * @param type_idx Type index to use if use_declaring_class is 'false'. - * @param rl_dest Result to be set to 0 or 1. - * @param rl_src Object to be tested. - */ - void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, - RegLocation rl_src); - /* - * - * @brief Implement Set up instanceof a class with x86 specific code. - * @param needs_access_check 'true' if we must check the access. - * @param type_known_final 'true' if the type is known to be a final class. - * @param type_known_abstract 'true' if the type is known to be an abstract class. - * @param use_declaring_class 'true' if the type can be loaded off the current Method*. - * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache. - * @param type_idx Type index to use if use_declaring_class is 'false'. - * @param rl_dest Result to be set to 0 or 1. - * @param rl_src Object to be tested. - */ - void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, - bool type_known_abstract, bool use_declaring_class, - bool can_assume_type_is_in_dex_cache, - uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); - - // Single operation generators. - LIR* OpUnconditionalBranch(LIR* target); - LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target); - LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target); - LIR* OpCondBranch(ConditionCode cc, LIR* target); - LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target); - LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src); - LIR* OpIT(ConditionCode cond, const char* guide); - void OpEndIT(LIR* it); - LIR* OpMem(OpKind op, RegStorage r_base, int disp); - LIR* OpPcRelLoad(RegStorage reg, LIR* target); - LIR* OpReg(OpKind op, RegStorage r_dest_src); - void OpRegCopy(RegStorage r_dest, RegStorage r_src); - LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); - LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); - LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset); - LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value); - LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value); - LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); - LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); - LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); - LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); - LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); - LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); - LIR* OpTestSuspend(LIR* target); - LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE; - LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE; - LIR* OpVldm(RegStorage r_base, int count); - LIR* OpVstm(RegStorage r_base, int count); - void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset); - void OpRegCopyWide(RegStorage dest, RegStorage src); - void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE; - void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE; - - void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset); - void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset); - void SpillCoreRegs(); - void UnSpillCoreRegs(); - static const X86EncodingMap EncodingMap[kX86Last]; - bool InexpensiveConstantInt(int32_t value); - bool InexpensiveConstantFloat(int32_t value); - bool InexpensiveConstantLong(int64_t value); - bool InexpensiveConstantDouble(int64_t value); - - /* - * @brief Should try to optimize for two address instructions? - * @return true if we try to avoid generating three operand instructions. - */ - virtual bool GenerateTwoOperandInstructions() const { return true; } - - /* - * @brief x86 specific codegen for int operations. - * @param opcode Operation to perform. - * @param rl_dest Destination for the result. - * @param rl_lhs Left hand operand. - * @param rl_rhs Right hand operand. - */ - void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs, - RegLocation rl_rhs); - - /* - * @brief Dump a RegLocation using printf - * @param loc Register location to dump - */ - static void DumpRegLocation(RegLocation loc); - - /* - * @brief Load the Method* of a dex method into the register. - * @param target_method The MethodReference of the method to be invoked. - * @param type How the method will be invoked. - * @param register that will contain the code address. - * @note register will be passed to TargetReg to get physical register. - */ - void LoadMethodAddress(const MethodReference& target_method, InvokeType type, - SpecialTargetRegister symbolic_reg); - - /* - * @brief Load the Class* of a Dex Class type into the register. - * @param type How the method will be invoked. - * @param register that will contain the code address. - * @note register will be passed to TargetReg to get physical register. - */ - void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg); - - /* - * @brief Generate a relative call to the method that will be patched at link time. - * @param target_method The MethodReference of the method to be invoked. - * @param type How the method will be invoked. - * @returns Call instruction - */ - virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type); - - /* - * @brief Handle x86 specific literals - */ - void InstallLiteralPools(); - - /* - * @brief Generate the debug_frame CFI information. - * @returns pointer to vector containing CFE information - */ - static std::vector<uint8_t>* ReturnCommonCallFrameInformation(); - - /* - * @brief Generate the debug_frame FDE information. - * @returns pointer to vector containing CFE information - */ - std::vector<uint8_t>* ReturnCallFrameInformation(); - - protected: - size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, - int reg_r, int reg_x, bool has_sib); - uint8_t LowRegisterBits(uint8_t reg); - bool NeedsRex(uint8_t reg); - void EmitPrefix(const X86EncodingMap* entry); - void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); - void EmitOpcode(const X86EncodingMap* entry); - void EmitPrefixAndOpcode(const X86EncodingMap* entry); - void EmitPrefixAndOpcode(const X86EncodingMap* entry, - uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); - void EmitDisp(uint8_t base, int disp); - void EmitModrmThread(uint8_t reg_or_opcode); - void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp); - void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp); - void EmitImm(const X86EncodingMap* entry, int64_t imm); - void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg); - void EmitOpReg(const X86EncodingMap* entry, uint8_t reg); - void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp); - void EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp); - void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg); - void EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm); - void EmitRegMem(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp); - void EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index, - int scale, int disp); - void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, - uint8_t reg); - void EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, - int32_t imm); - void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp); - void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2); - void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm); - void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm); - void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, - int32_t imm); - void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm); - void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); - void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm); - void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm); - void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); - void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm); - void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl); - void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl); - void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition); - void EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition); - - /** - * @brief Used for encoding conditional register to register operation. - * @param entry The entry in the encoding map for the opcode. - * @param reg1 The first physical register. - * @param reg2 The second physical register. - * @param condition The condition code for operation. - */ - void EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition); - - /** - * @brief Used for encoding conditional register to memory operation. - * @param entry The entry in the encoding map for the opcode. - * @param reg1 The first physical register. - * @param base The memory base register. - * @param displacement The memory displacement. - * @param condition The condition code for operation. - */ - void EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition); - - void EmitJmp(const X86EncodingMap* entry, int rel); - void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc); - void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp); - void EmitCallImmediate(const X86EncodingMap* entry, int disp); - void EmitCallThread(const X86EncodingMap* entry, int disp); - void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index, - int scale, int table_or_disp); - void EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset); - void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); - void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, - int64_t val, ConditionCode ccode); - void GenConstWide(RegLocation rl_dest, int64_t value); - - static bool ProvidesFullMemoryBarrier(X86OpCode opcode); - - /* - * @brief Ensure that a temporary register is byte addressable. - * @returns a temporary guarenteed to be byte addressable. - */ - virtual RegStorage AllocateByteRegister(); - - /* - * @brief generate inline code for fast case of Strng.indexOf. - * @param info Call parameters - * @param zero_based 'true' if the index into the string is 0. - * @returns 'true' if the call was inlined, 'false' if a regular call needs to be - * generated. - */ - bool GenInlinedIndexOf(CallInfo* info, bool zero_based); - - /* - * @brief Load 128 bit constant into vector register. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector - * @note vA is the TypeSize for the register. - * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values. - */ - void GenConst128(BasicBlock* bb, MIR* mir); - - /* - * @brief MIR to move a vectorized register to another. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination - * @note vC: source - */ - void GenMoveVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenMultiplyVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenAddVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenSubtractVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: immediate - */ - void GenShiftLeftVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: immediate - */ - void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from.. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: immediate - */ - void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenAndVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenOrVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination and source - * @note vC: source - */ - void GenXorVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Reduce a 128-bit packed element into a single VR by taking lower bits - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @details Instruction does a horizontal addition of the packed elements and then adds it to VR. - * @note vA: TypeSize - * @note vB: destination and source VR (not vector register) - * @note vC: source (vector register) - */ - void GenAddReduceVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Extract a packed element into a single VR. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize - * @note vB: destination VR (not vector register) - * @note vC: source (vector register) - * @note arg[0]: The index to use for extraction from vector register (which packed element). - */ - void GenReduceVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Create a vector value, with all TypeSize values equal to vC - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is kMirConstVector. - * @note vA: TypeSize. - * @note vB: destination vector register. - * @note vC: source VR (not vector register). - */ - void GenSetVector(BasicBlock *bb, MIR *mir); - - /* - * @brief Generate code for a vector opcode. - * @param bb The basic block in which the MIR is from. - * @param mir The MIR whose opcode is a non-standard opcode. - */ - void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); - - /* - * @brief Return the correct x86 opcode for the Dex operation - * @param op Dex opcode for the operation - * @param loc Register location of the operand - * @param is_high_op 'true' if this is an operation on the high word - * @param value Immediate value for the operation. Used for byte variants - * @returns the correct x86 opcode to perform the operation - */ - X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value); - - /* - * @brief Return the correct x86 opcode for the Dex operation - * @param op Dex opcode for the operation - * @param dest location of the destination. May be register or memory. - * @param rhs Location for the rhs of the operation. May be in register or memory. - * @param is_high_op 'true' if this is an operation on the high word - * @returns the correct x86 opcode to perform the operation - * @note at most one location may refer to memory - */ - X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, - bool is_high_op); - - /* - * @brief Is this operation a no-op for this opcode and value - * @param op Dex opcode for the operation - * @param value Immediate value for the operation. - * @returns 'true' if the operation will have no effect - */ - bool IsNoOp(Instruction::Code op, int32_t value); - - /** - * @brief Calculate magic number and shift for a given divisor - * @param divisor divisor number for calculation - * @param magic hold calculated magic number - * @param shift hold calculated shift - */ - void CalculateMagicAndShift(int divisor, int& magic, int& shift); - - /* - * @brief Generate an integer div or rem operation. - * @param rl_dest Destination Location. - * @param rl_src1 Numerator Location. - * @param rl_src2 Divisor Location. - * @param is_div 'true' if this is a division, 'false' for a remainder. - * @param check_zero 'true' if an exception should be generated if the divisor is 0. - */ - RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, - bool is_div, bool check_zero); - - /* - * @brief Generate an integer div or rem operation by a literal. - * @param rl_dest Destination Location. - * @param rl_src Numerator Location. - * @param lit Divisor. - * @param is_div 'true' if this is a division, 'false' for a remainder. - */ - RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div); - - /* - * Generate code to implement long shift operations. - * @param opcode The DEX opcode to specify the shift type. - * @param rl_dest The destination. - * @param rl_src The value to be shifted. - * @param shift_amount How much to shift. - * @returns the RegLocation of the result. - */ - RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src, int shift_amount); - /* - * Generate an imul of a register by a constant or a better sequence. - * @param dest Destination Register. - * @param src Source Register. - * @param val Constant multiplier. - */ - void GenImulRegImm(RegStorage dest, RegStorage src, int val); - - /* - * Generate an imul of a memory location by a constant or a better sequence. - * @param dest Destination Register. - * @param sreg Symbolic register. - * @param displacement Displacement on stack of Symbolic Register. - * @param val Constant multiplier. - */ - void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val); - - /* - * @brief Compare memory to immediate, and branch if condition true. - * @param cond The condition code that when true will branch to the target. - * @param temp_reg A temporary register that can be used if compare memory is not - * supported by the architecture. - * @param base_reg The register holding the base address. - * @param offset The offset from the base. - * @param check_value The immediate to compare to. - */ - LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, - int offset, int check_value, LIR* target); - - /* - * Can this operation be using core registers without temporaries? - * @param rl_lhs Left hand operand. - * @param rl_rhs Right hand operand. - * @returns 'true' if the operation can proceed without needing temporary regs. - */ - bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs); - - /** - * @brief Generates inline code for conversion of long to FP by using x87/ - * @param rl_dest The destination of the FP. - * @param rl_src The source of the long. - * @param is_double 'true' if dealing with double, 'false' for float. - */ - virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double); - - /* - * @brief Perform MIR analysis before compiling method. - * @note Invokes Mir2LiR::Materialize after analysis. - */ - void Materialize(); - - /* - * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register - * without regard to data type. In practice, this can result in UpdateLoc returning a - * location record for a Dalvik float value in a core register, and vis-versa. For targets - * which can inexpensively move data between core and float registers, this can often be a win. - * However, for x86 this is generally not a win. These variants of UpdateLoc() - * take a register class argument - and will return an in-register location record only if - * the value is live in a temp register of the correct class. Additionally, if the value is in - * a temp register of the wrong register class, it will be clobbered. - */ - RegLocation UpdateLocTyped(RegLocation loc, int reg_class); - RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class); - - /* - * @brief Analyze MIR before generating code, to prepare for the code generation. - */ - void AnalyzeMIR(); - - /* - * @brief Analyze one basic block. - * @param bb Basic block to analyze. - */ - void AnalyzeBB(BasicBlock * bb); - - /* - * @brief Analyze one extended MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Extended instruction to analyze. - */ - void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir); - - /* - * @brief Analyze one MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Instruction to analyze. - */ - virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir); - - /* - * @brief Analyze one MIR float/double instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Instruction to analyze. - */ - void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir); - - /* - * @brief Analyze one use of a double operand. - * @param rl_use Double RegLocation for the operand. - */ - void AnalyzeDoubleUse(RegLocation rl_use); - - bool Gen64Bit() const { return gen64bit_; } - - // Information derived from analysis of MIR - - // The compiler temporary for the code address of the method. - CompilerTemp *base_of_code_; - - // Have we decided to compute a ptr to code and store in temporary VR? - bool store_method_addr_; - - // Have we used the stored method address? - bool store_method_addr_used_; - - // Instructions to remove if we didn't use the stored method address. - LIR* setup_method_address_[2]; - - // Instructions needing patching with Method* values. - GrowableArray<LIR*> method_address_insns_; - - // Instructions needing patching with Class Type* values. - GrowableArray<LIR*> class_type_address_insns_; - - // Instructions needing patching with PC relative code addresses. - GrowableArray<LIR*> call_method_insns_; - - // Prologue decrement of stack pointer. - LIR* stack_decrement_; - - // Epilogue increment of stack pointer. - LIR* stack_increment_; - - // 64-bit mode - bool gen64bit_; - - // The list of const vector literals. - LIR *const_vectors_; - - /* - * @brief Search for a matching vector literal - * @param mir A kMirOpConst128b MIR instruction to match. - * @returns pointer to matching LIR constant, or nullptr if not found. - */ - LIR *ScanVectorLiteral(MIR *mir); - - /* - * @brief Add a constant vector literal - * @param mir A kMirOpConst128b MIR instruction to match. - */ - LIR *AddVectorLiteral(MIR *mir); + void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div); + // TODO: collapse reg_lo, reg_hi + RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); + RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivZeroCheckWide(RegStorage reg); + void GenArrayBoundsCheck(RegStorage index, RegStorage array_base, int32_t len_offset); + void GenArrayBoundsCheck(int32_t index, RegStorage array_base, int32_t len_offset); + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); + void GenExitSequence(); + void GenSpecialExitSequence(); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); + void GenSelect(BasicBlock* bb, MIR* mir); + bool GenMemBarrier(MemBarrierKind barrier_kind); + void GenMoveException(RegLocation rl_dest); + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, + int first_bit, int second_bit); + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); + + /* + * @brief Generate a two address long operation with a constant value + * @param rl_dest location of result + * @param rl_src constant source operand + * @param op Opcode to be generated + * @return success or not + */ + bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); + /* + * @brief Generate a three address long operation with a constant value + * @param rl_dest location of result + * @param rl_src1 source operand + * @param rl_src2 constant source operand + * @param op Opcode to be generated + * @return success or not + */ + bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + Instruction::Code op); + + /** + * @brief Generate a long arithmetic operation. + * @param rl_dest The destination. + * @param rl_src1 First operand. + * @param rl_src2 Second operand. + * @param op The DEX opcode for the operation. + * @param is_commutative The sources can be swapped if needed. + */ + virtual void GenLongArith(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + Instruction::Code op, bool is_commutative); + + /** + * @brief Generate a two operand long arithmetic operation. + * @param rl_dest The destination. + * @param rl_src Second operand. + * @param op The DEX opcode for the operation. + */ + void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); + + /** + * @brief Generate a long operation. + * @param rl_dest The destination. Must be in a register + * @param rl_src The other operand. May be in a register or in memory. + * @param op The DEX opcode for the operation. + */ + virtual void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); + + /** + * @brief Implement instanceof a final class with x86 specific code. + * @param use_declaring_class 'true' if we can use the class itself. + * @param type_idx Type index to use if use_declaring_class is 'false'. + * @param rl_dest Result to be set to 0 or 1. + * @param rl_src Object to be tested. + */ + void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src); + /* + * + * @brief Implement Set up instanceof a class with x86 specific code. + * @param needs_access_check 'true' if we must check the access. + * @param type_known_final 'true' if the type is known to be a final class. + * @param type_known_abstract 'true' if the type is known to be an abstract class. + * @param use_declaring_class 'true' if the type can be loaded off the current Method*. + * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache. + * @param type_idx Type index to use if use_declaring_class is 'false'. + * @param rl_dest Result to be set to 0 or 1. + * @param rl_src Object to be tested. + */ + void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, + bool type_known_abstract, bool use_declaring_class, + bool can_assume_type_is_in_dex_cache, + uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); + + void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + + // Single operation generators. + LIR* OpUnconditionalBranch(LIR* target); + LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target); + LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target); + LIR* OpCondBranch(ConditionCode cc, LIR* target); + LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target); + LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src); + LIR* OpIT(ConditionCode cond, const char* guide); + void OpEndIT(LIR* it); + LIR* OpMem(OpKind op, RegStorage r_base, int disp); + LIR* OpPcRelLoad(RegStorage reg, LIR* target); + LIR* OpReg(OpKind op, RegStorage r_dest_src); + void OpRegCopy(RegStorage r_dest, RegStorage r_src); + LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); + LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); + LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset); + LIR* OpRegMem(OpKind op, RegStorage r_dest, RegLocation value); + LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value); + LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); + LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); + LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); + LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); + LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); + LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); + LIR* OpTestSuspend(LIR* target); + LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) OVERRIDE; + LIR* OpThreadMem(OpKind op, ThreadOffset<8> thread_offset) OVERRIDE; + LIR* OpVldm(RegStorage r_base, int count); + LIR* OpVstm(RegStorage r_base, int count); + void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset); + void OpRegCopyWide(RegStorage dest, RegStorage src); + void OpTlsCmp(ThreadOffset<4> offset, int val) OVERRIDE; + void OpTlsCmp(ThreadOffset<8> offset, int val) OVERRIDE; + + void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset); + void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset); + void SpillCoreRegs(); + void UnSpillCoreRegs(); + static const X86EncodingMap EncodingMap[kX86Last]; + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + /* + * @brief Should try to optimize for two address instructions? + * @return true if we try to avoid generating three operand instructions. + */ + virtual bool GenerateTwoOperandInstructions() const { return true; } + + /* + * @brief x86 specific codegen for int operations. + * @param opcode Operation to perform. + * @param rl_dest Destination for the result. + * @param rl_lhs Left hand operand. + * @param rl_rhs Right hand operand. + */ + void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs, + RegLocation rl_rhs); + + /* + * @brief Dump a RegLocation using printf + * @param loc Register location to dump + */ + static void DumpRegLocation(RegLocation loc); + + /* + * @brief Load the Method* of a dex method into the register. + * @param target_method The MethodReference of the method to be invoked. + * @param type How the method will be invoked. + * @param register that will contain the code address. + * @note register will be passed to TargetReg to get physical register. + */ + void LoadMethodAddress(const MethodReference& target_method, InvokeType type, + SpecialTargetRegister symbolic_reg); + + /* + * @brief Load the Class* of a Dex Class type into the register. + * @param type How the method will be invoked. + * @param register that will contain the code address. + * @note register will be passed to TargetReg to get physical register. + */ + void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg); + + void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); + + int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + + int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + + /* + * @brief Generate a relative call to the method that will be patched at link time. + * @param target_method The MethodReference of the method to be invoked. + * @param type How the method will be invoked. + * @returns Call instruction + */ + virtual LIR * CallWithLinkerFixup(const MethodReference& target_method, InvokeType type); + + /* + * @brief Handle x86 specific literals + */ + void InstallLiteralPools(); + + /* + * @brief Generate the debug_frame CFI information. + * @returns pointer to vector containing CFE information + */ + static std::vector<uint8_t>* ReturnCommonCallFrameInformation(); + + /* + * @brief Generate the debug_frame FDE information. + * @returns pointer to vector containing CFE information + */ + std::vector<uint8_t>* ReturnCallFrameInformation(); + + protected: + size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index, + int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form, + int32_t displacement); + void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg); + void EmitPrefix(const X86EncodingMap* entry, + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, + bool r8_form); + void EmitOpcode(const X86EncodingMap* entry); + void EmitPrefixAndOpcode(const X86EncodingMap* entry, + int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form); + void EmitDisp(uint8_t base, int32_t disp); + void EmitModrmThread(uint8_t reg_or_opcode); + void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp); + void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, + int32_t disp); + void EmitImm(const X86EncodingMap* entry, int64_t imm); + void EmitNullary(const X86EncodingMap* entry); + void EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg); + void EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg); + void EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp); + void EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale, + int32_t disp); + void EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_reg); + void EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int32_t disp); + void EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, + int32_t raw_index, int scale, int32_t disp); + void EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale, + int32_t disp, int32_t raw_reg); + void EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm); + void EmitArrayImm(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale, + int32_t raw_disp, int32_t imm); + void EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp); + void EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2); + void EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t imm); + void EmitRegMemImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp, + int32_t imm); + void EmitMemRegImm(const X86EncodingMap* entry, int32_t base, int32_t disp, int32_t raw_reg1, + int32_t imm); + void EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm); + void EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm); + void EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm); + void EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm); + void EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl); + void EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_cl); + void EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm); + void EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc); + void EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t cc); + void EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t cc); + void EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_base, int32_t disp, + int32_t cc); + + void EmitJmp(const X86EncodingMap* entry, int32_t rel); + void EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc); + void EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp); + void EmitCallImmediate(const X86EncodingMap* entry, int32_t disp); + void EmitCallThread(const X86EncodingMap* entry, int32_t disp); + void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table, + int32_t raw_index, int scale, int32_t table_or_disp); + void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset); + void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); + void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode); + void GenConstWide(RegLocation rl_dest, int64_t value); + + static bool ProvidesFullMemoryBarrier(X86OpCode opcode); + + /* + * @brief Ensure that a temporary register is byte addressable. + * @returns a temporary guarenteed to be byte addressable. + */ + virtual RegStorage AllocateByteRegister(); + + /* + * @brief generate inline code for fast case of Strng.indexOf. + * @param info Call parameters + * @param zero_based 'true' if the index into the string is 0. + * @returns 'true' if the call was inlined, 'false' if a regular call needs to be + * generated. + */ + bool GenInlinedIndexOf(CallInfo* info, bool zero_based); + + /* + * @brief Load 128 bit constant into vector register. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector + * @note vA is the TypeSize for the register. + * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values. + */ + void GenConst128(BasicBlock* bb, MIR* mir); + + /* + * @brief MIR to move a vectorized register to another. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination + * @note vC: source + */ + void GenMoveVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenMultiplyVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenAddVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenSubtractVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenShiftLeftVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from.. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: immediate + */ + void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenAndVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenOrVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination and source + * @note vC: source + */ + void GenXorVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Reduce a 128-bit packed element into a single VR by taking lower bits + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @details Instruction does a horizontal addition of the packed elements and then adds it to VR. + * @note vA: TypeSize + * @note vB: destination and source VR (not vector register) + * @note vC: source (vector register) + */ + void GenAddReduceVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Extract a packed element into a single VR. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize + * @note vB: destination VR (not vector register) + * @note vC: source (vector register) + * @note arg[0]: The index to use for extraction from vector register (which packed element). + */ + void GenReduceVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Create a vector value, with all TypeSize values equal to vC + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector. + * @note vA: TypeSize. + * @note vB: destination vector register. + * @note vC: source VR (not vector register). + */ + void GenSetVector(BasicBlock *bb, MIR *mir); + + /* + * @brief Generate code for a vector opcode. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is a non-standard opcode. + */ + void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); + + /* + * @brief Return the correct x86 opcode for the Dex operation + * @param op Dex opcode for the operation + * @param loc Register location of the operand + * @param is_high_op 'true' if this is an operation on the high word + * @param value Immediate value for the operation. Used for byte variants + * @returns the correct x86 opcode to perform the operation + */ + X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value); + + /* + * @brief Return the correct x86 opcode for the Dex operation + * @param op Dex opcode for the operation + * @param dest location of the destination. May be register or memory. + * @param rhs Location for the rhs of the operation. May be in register or memory. + * @param is_high_op 'true' if this is an operation on the high word + * @returns the correct x86 opcode to perform the operation + * @note at most one location may refer to memory + */ + X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, + bool is_high_op); + + /* + * @brief Is this operation a no-op for this opcode and value + * @param op Dex opcode for the operation + * @param value Immediate value for the operation. + * @returns 'true' if the operation will have no effect + */ + bool IsNoOp(Instruction::Code op, int32_t value); + + /** + * @brief Calculate magic number and shift for a given divisor + * @param divisor divisor number for calculation + * @param magic hold calculated magic number + * @param shift hold calculated shift + */ + void CalculateMagicAndShift(int divisor, int& magic, int& shift); + + /* + * @brief Generate an integer div or rem operation. + * @param rl_dest Destination Location. + * @param rl_src1 Numerator Location. + * @param rl_src2 Divisor Location. + * @param is_div 'true' if this is a division, 'false' for a remainder. + * @param check_zero 'true' if an exception should be generated if the divisor is 0. + */ + RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + bool is_div, bool check_zero); + + /* + * @brief Generate an integer div or rem operation by a literal. + * @param rl_dest Destination Location. + * @param rl_src Numerator Location. + * @param lit Divisor. + * @param is_div 'true' if this is a division, 'false' for a remainder. + */ + RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div); + + /* + * Generate code to implement long shift operations. + * @param opcode The DEX opcode to specify the shift type. + * @param rl_dest The destination. + * @param rl_src The value to be shifted. + * @param shift_amount How much to shift. + * @returns the RegLocation of the result. + */ + RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src, int shift_amount); + /* + * Generate an imul of a register by a constant or a better sequence. + * @param dest Destination Register. + * @param src Source Register. + * @param val Constant multiplier. + */ + void GenImulRegImm(RegStorage dest, RegStorage src, int val); + + /* + * Generate an imul of a memory location by a constant or a better sequence. + * @param dest Destination Register. + * @param sreg Symbolic register. + * @param displacement Displacement on stack of Symbolic Register. + * @param val Constant multiplier. + */ + void GenImulMemImm(RegStorage dest, int sreg, int displacement, int val); + + /* + * @brief Compare memory to immediate, and branch if condition true. + * @param cond The condition code that when true will branch to the target. + * @param temp_reg A temporary register that can be used if compare memory is not + * supported by the architecture. + * @param base_reg The register holding the base address. + * @param offset The offset from the base. + * @param check_value The immediate to compare to. + */ + LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, + int offset, int check_value, LIR* target); + + /* + * Can this operation be using core registers without temporaries? + * @param rl_lhs Left hand operand. + * @param rl_rhs Right hand operand. + * @returns 'true' if the operation can proceed without needing temporary regs. + */ + bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs); + + /** + * @brief Generates inline code for conversion of long to FP by using x87/ + * @param rl_dest The destination of the FP. + * @param rl_src The source of the long. + * @param is_double 'true' if dealing with double, 'false' for float. + */ + virtual void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double); + + /* + * @brief Perform MIR analysis before compiling method. + * @note Invokes Mir2LiR::Materialize after analysis. + */ + void Materialize(); + + /* + * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register + * without regard to data type. In practice, this can result in UpdateLoc returning a + * location record for a Dalvik float value in a core register, and vis-versa. For targets + * which can inexpensively move data between core and float registers, this can often be a win. + * However, for x86 this is generally not a win. These variants of UpdateLoc() + * take a register class argument - and will return an in-register location record only if + * the value is live in a temp register of the correct class. Additionally, if the value is in + * a temp register of the wrong register class, it will be clobbered. + */ + RegLocation UpdateLocTyped(RegLocation loc, int reg_class); + RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class); + + /* + * @brief Analyze MIR before generating code, to prepare for the code generation. + */ + void AnalyzeMIR(); + + /* + * @brief Analyze one basic block. + * @param bb Basic block to analyze. + */ + void AnalyzeBB(BasicBlock * bb); + + /* + * @brief Analyze one extended MIR instruction + * @param opcode MIR instruction opcode. + * @param bb Basic block containing instruction. + * @param mir Extended instruction to analyze. + */ + void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir); + + /* + * @brief Analyze one MIR instruction + * @param opcode MIR instruction opcode. + * @param bb Basic block containing instruction. + * @param mir Instruction to analyze. + */ + virtual void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir); + + /* + * @brief Analyze one MIR float/double instruction + * @param opcode MIR instruction opcode. + * @param bb Basic block containing instruction. + * @param mir Instruction to analyze. + */ + void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir); + + /* + * @brief Analyze one use of a double operand. + * @param rl_use Double RegLocation for the operand. + */ + void AnalyzeDoubleUse(RegLocation rl_use); + + bool Gen64Bit() const { return gen64bit_; } + + // Information derived from analysis of MIR + + // The compiler temporary for the code address of the method. + CompilerTemp *base_of_code_; + + // Have we decided to compute a ptr to code and store in temporary VR? + bool store_method_addr_; + + // Have we used the stored method address? + bool store_method_addr_used_; + + // Instructions to remove if we didn't use the stored method address. + LIR* setup_method_address_[2]; + + // Instructions needing patching with Method* values. + GrowableArray<LIR*> method_address_insns_; + + // Instructions needing patching with Class Type* values. + GrowableArray<LIR*> class_type_address_insns_; + + // Instructions needing patching with PC relative code addresses. + GrowableArray<LIR*> call_method_insns_; + + // Prologue decrement of stack pointer. + LIR* stack_decrement_; + + // Epilogue increment of stack pointer. + LIR* stack_increment_; + + // 64-bit mode + bool gen64bit_; + + // The list of const vector literals. + LIR *const_vectors_; + + /* + * @brief Search for a matching vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + * @returns pointer to matching LIR constant, or nullptr if not found. + */ + LIR *ScanVectorLiteral(MIR *mir); + + /* + * @brief Add a constant vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + */ + LIR *AddVectorLiteral(MIR *mir); + + InToRegStorageMapping in_to_reg_storage_mapping_; }; } // namespace art diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index 0421a59..c3580f7 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -272,21 +272,67 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, return; } case Instruction::LONG_TO_DOUBLE: + if (Gen64Bit()) { + rcSrc = kCoreReg; + op = kX86Cvtsqi2sdRR; + break; + } GenLongToFP(rl_dest, rl_src, true /* is_double */); return; case Instruction::LONG_TO_FLOAT: + if (Gen64Bit()) { + rcSrc = kCoreReg; + op = kX86Cvtsqi2ssRR; + break; + } GenLongToFP(rl_dest, rl_src, false /* is_double */); return; case Instruction::FLOAT_TO_LONG: - if (Is64BitInstructionSet(cu_->instruction_set)) { - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src); + if (Gen64Bit()) { + rl_src = LoadValue(rl_src, kFPReg); + // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage temp_reg = AllocTempSingle(); + + // Set 0x7fffffffffffffff to rl_result + LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); + NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); + NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); } else { GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src); } return; case Instruction::DOUBLE_TO_LONG: - if (Is64BitInstructionSet(cu_->instruction_set)) { - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src); + if (Gen64Bit()) { + rl_src = LoadValueWide(rl_src, kFPReg); + // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage temp_reg = AllocTempDouble(); + + // Set 0x7fffffffffffffff to rl_result + LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); + NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); + NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); } else { GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src); } @@ -434,9 +480,14 @@ void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { RegLocation rl_result; rl_src = LoadValueWide(rl_src, kCoreReg); - rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000); - OpRegCopy(rl_result.reg, rl_src.reg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + if (Gen64Bit()) { + LoadConstantWide(rl_result.reg, 0x8000000000000000); + OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); + } else { + OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000); + OpRegCopy(rl_result.reg, rl_src.reg); + } StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 1cc16b9..8093fd7 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -31,6 +31,23 @@ namespace art { */ void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + if (Gen64Bit()) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(kOpXor, rl_result.reg, rl_result.reg); // result = 0 + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe); // result = (src1 != src2) ? 1 : result + RegStorage temp_reg = AllocTemp(); + OpRegReg(kOpNeg, temp_reg, rl_result.reg); + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + // result = (src1 < src2) ? -result : result + OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg); + StoreValue(rl_dest, rl_result); + FreeTemp(temp_reg); + return; + } + FlushAllRegs(); LockCallTemps(); // Prepare for explicit register usage RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); @@ -108,7 +125,7 @@ LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { } if (r_dest.IsFloat() || r_src.IsFloat()) return OpFpRegCopy(r_dest, r_src); - LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, + LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, r_dest.GetReg(), r_src.GetReg()); if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { res->flags.is_nop = true; @@ -133,36 +150,51 @@ void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { } else { // TODO: Prevent this from happening in the code. The result is often // unused or could have been loaded more easily from memory. - NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); - RegStorage r_tmp = AllocTempDouble(); - NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); - NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); - FreeTemp(r_tmp); + if (!r_src.IsPair()) { + DCHECK(!r_dest.IsPair()); + NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); + } else { + NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); + RegStorage r_tmp = AllocTempDouble(); + NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); + NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); + FreeTemp(r_tmp); + } } } else { if (src_fp) { - NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); - RegStorage temp_reg = AllocTempDouble(); - NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); - NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); - NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); + if (!r_dest.IsPair()) { + DCHECK(!r_src.IsPair()); + NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); + } else { + NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); + RegStorage temp_reg = AllocTempDouble(); + NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); + NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); + NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); + } } else { - DCHECK(r_dest.IsPair()); - DCHECK(r_src.IsPair()); - // Handle overlap - if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) { - // Deal with cycles. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, r_dest.GetHigh()); - OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); - OpRegCopy(r_dest.GetLow(), temp_reg); - FreeTemp(temp_reg); - } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); + if (!r_src.IsPair()) { + // Just copy the register directly. + OpRegCopy(r_dest, r_src); } else { - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + // Handle overlap + if (r_src.GetHighReg() == r_dest.GetLowReg() && + r_src.GetLowReg() == r_dest.GetHighReg()) { + // Deal with cycles. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, r_dest.GetHigh()); + OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); + OpRegCopy(r_dest.GetLow(), temp_reg); + FreeTemp(temp_reg); + } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + } else { + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + } } } } @@ -778,7 +810,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { : (IsInReg(this, rl_src_offset, rs_rDI) ? 4 : (SRegOffset(rl_src_offset.s_reg_low) + push_offset)); LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI); - NewLIR4(kX86LockCmpxchg8bA, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0); + NewLIR4(kX86LockCmpxchg64A, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0); // After a store we need to insert barrier in case of potential load. Since the // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. @@ -821,8 +853,18 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // Convert ZF to boolean RegLocation rl_dest = InlineTarget(info); // boolean place for result RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondZ); - NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + RegStorage result_reg = rl_result.reg; + + // SETcc only works with EAX..EDX. + if (result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) { + result_reg = AllocateByteRegister(); + DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum()); + } + NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); + NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); + if (IsTemp(result_reg)) { + FreeTemp(result_reg); + } StoreValue(rl_dest, rl_result); return true; } @@ -832,7 +874,11 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { // Address the start of the method RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - LoadValueDirectFixed(rl_method, reg); + if (rl_method.wide) { + LoadValueDirectWideFixed(rl_method, reg); + } else { + LoadValueDirectFixed(rl_method, reg); + } store_method_addr_used_ = true; // Load the proper value from the literal area. @@ -871,18 +917,23 @@ void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, } void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { - DCHECK(reg.IsPair()); // TODO: allow 64BitSolo. - // We are not supposed to clobber the incoming storage, so allocate a temporary. - RegStorage t_reg = AllocTemp(); + if (Gen64Bit()) { + DCHECK(reg.Is64Bit()); + + NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); + } else { + DCHECK(reg.IsPair()); - // Doing an OR is a quick way to check if both registers are zero. This will set the flags. - OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); + // We are not supposed to clobber the incoming storage, so allocate a temporary. + RegStorage t_reg = AllocTemp(); + // Doing an OR is a quick way to check if both registers are zero. This will set the flags. + OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); + // The temp is no longer needed so free it at this time. + FreeTemp(t_reg); + } // In case of zero, throw ArithmeticException. GenDivZeroCheck(kCondEq); - - // The temp is no longer needed so free it at this time. - FreeTemp(t_reg); } void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, @@ -1221,18 +1272,22 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, if (rl_src.location == kLocPhysReg) { // Both operands are in registers. // But we must ensure that rl_src is in pair - rl_src = LoadValueWide(rl_src, kCoreReg); - if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { - // The registers are the same, so we would clobber it before the use. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, rl_dest.reg); - rl_src.reg.SetHighReg(temp_reg.GetReg()); - } - NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); + if (Gen64Bit()) { + NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); + } else { + rl_src = LoadValueWide(rl_src, kCoreReg); + if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { + // The registers are the same, so we would clobber it before the use. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, rl_dest.reg); + rl_src.reg.SetHighReg(temp_reg.GetReg()); + } + NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); - x86op = GetOpcode(op, rl_dest, rl_src, true); - NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); - FreeTemp(rl_src.reg); + x86op = GetOpcode(op, rl_dest, rl_src, true); + NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); + FreeTemp(rl_src.reg); // ??? + } return; } @@ -1242,11 +1297,13 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_src.s_reg_low); - LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); + LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); - x86op = GetOpcode(op, rl_dest, rl_src, true); - lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); + if (!Gen64Bit()) { + x86op = GetOpcode(op, rl_dest, rl_src, true); + lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); + } AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); } @@ -1273,13 +1330,16 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); - LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg()); + LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, + Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); - x86op = GetOpcode(op, rl_dest, rl_src, true); - lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); + if (!Gen64Bit()) { + x86op = GetOpcode(op, rl_dest, rl_src, true); + lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); + } AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, @@ -1330,23 +1390,44 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, // Get one of the source operands into temporary register. rl_src1 = LoadValueWide(rl_src1, kCoreReg); - if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { - GenLongRegOrMemOp(rl_src1, rl_src2, op); - } else if (is_commutative) { - rl_src2 = LoadValueWide(rl_src2, kCoreReg); - // We need at least one of them to be a temporary. - if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { - rl_src1 = ForceTempWide(rl_src1); + if (Gen64Bit()) { + if (IsTemp(rl_src1.reg)) { GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else if (is_commutative) { + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + // We need at least one of them to be a temporary. + if (!IsTemp(rl_src2.reg)) { + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else { + GenLongRegOrMemOp(rl_src2, rl_src1, op); + StoreFinalValueWide(rl_dest, rl_src2); + return; + } } else { - GenLongRegOrMemOp(rl_src2, rl_src1, op); - StoreFinalValueWide(rl_dest, rl_src2); - return; + // Need LHS to be the temp. + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); } } else { - // Need LHS to be the temp. - rl_src1 = ForceTempWide(rl_src1); - GenLongRegOrMemOp(rl_src1, rl_src2, op); + if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else if (is_commutative) { + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + // We need at least one of them to be a temporary. + if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else { + GenLongRegOrMemOp(rl_src2, rl_src1, op); + StoreFinalValueWide(rl_dest, rl_src2); + return; + } + } else { + // Need LHS to be the temp. + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } } StoreFinalValueWide(rl_dest, rl_src1); @@ -1378,27 +1459,91 @@ void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, } void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { - LOG(FATAL) << "Unexpected use GenNotLong()"; + if (Gen64Bit()) { + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result; + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegCopy(rl_result.reg, rl_src.reg); + OpReg(kOpNot, rl_result.reg); + StoreValueWide(rl_dest, rl_result); + } else { + LOG(FATAL) << "Unexpected use GenNotLong()"; + } } void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div) { - LOG(FATAL) << "Unexpected use GenDivRemLong()"; + if (!Gen64Bit()) { + LOG(FATAL) << "Unexpected use GenDivRemLong()"; + return; + } + + // We have to use fixed registers, so flush all the temps. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage. + + // Load LHS into RAX. + LoadValueDirectWideFixed(rl_src1, rs_r0q); + + // Load RHS into RCX. + LoadValueDirectWideFixed(rl_src2, rs_r1q); + + // Copy LHS sign bit into RDX. + NewLIR0(kx86Cqo64Da); + + // Handle division by zero case. + GenDivZeroCheckWide(rs_r1q); + + // Have to catch 0x8000000000000000/-1 case, or we will get an exception! + NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); + LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + + // RHS is -1. + LoadConstantWide(rs_r3q, 0x8000000000000000); + NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg()); + LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + + // In 0x8000000000000000/-1 case. + if (!is_div) { + // For DIV, RAX is already right. For REM, we need RDX 0. + NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); + } + LIR* done = NewLIR1(kX86Jmp8, 0); + + // Expected case. + minus_one_branch->target = NewLIR0(kPseudoTargetLabel); + minint_branch->target = minus_one_branch->target; + NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); + done->target = NewLIR0(kPseudoTargetLabel); + + // Result is in RAX for div and RDX for rem. + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; + if (!is_div) { + rl_result.reg.SetReg(r2q); + } + + StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); - RegLocation rl_result = ForceTempWide(rl_src); - if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && - ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { - // The registers are the same, so we would clobber it before the use. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, rl_result.reg); - rl_result.reg.SetHighReg(temp_reg.GetReg()); + RegLocation rl_result; + if (Gen64Bit()) { + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); + } else { + rl_result = ForceTempWide(rl_src); + if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && + ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { + // The registers are the same, so we would clobber it before the use. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, rl_result.reg); + rl_result.reg.SetHighReg(temp_reg.GetReg()); + } + OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow + OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF + OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh } - OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow - OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF - OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh StoreValueWide(rl_dest, rl_result); } @@ -1551,60 +1696,84 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, int shift_amount) { RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - switch (opcode) { - case Instruction::SHL_LONG: - case Instruction::SHL_LONG_2ADDR: - DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); - LoadConstant(rl_result.reg.GetLow(), 0); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); - NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); - LoadConstant(rl_result.reg.GetLow(), 0); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount); - NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); - } - break; - case Instruction::SHR_LONG: - case Instruction::SHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); - } - break; - case Instruction::USHR_LONG: - case Instruction::USHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); - NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); - } - break; - default: - LOG(FATAL) << "Unexpected case"; + if (Gen64Bit()) { + OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + op = kOpLsl; + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + op = kOpAsr; + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + op = kOpLsr; + break; + default: + LOG(FATAL) << "Unexpected case"; + } + OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); + } else { + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); + LoadConstant(rl_result.reg.GetLow(), 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); + NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); + LoadConstant(rl_result.reg.GetLow(), 0); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), + shift_amount); + NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), + shift_amount); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), + shift_amount); + NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + } } return rl_result; } @@ -1634,24 +1803,26 @@ void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + bool isConstSuccess = false; switch (opcode) { case Instruction::ADD_LONG: case Instruction::AND_LONG: case Instruction::OR_LONG: case Instruction::XOR_LONG: if (rl_src2.is_const) { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { DCHECK(rl_src1.is_const); - GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (rl_src2.is_const) { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { GenSubLong(opcode, rl_dest, rl_src1, rl_src2); + isConstSuccess = true; } break; case Instruction::ADD_LONG_2ADDR: @@ -1660,20 +1831,24 @@ void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, case Instruction::AND_LONG_2ADDR: if (rl_src2.is_const) { if (GenerateTwoOperandInstructions()) { - GenLongImm(rl_dest, rl_src2, opcode); + isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); } else { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } } else { DCHECK(rl_src1.is_const); - GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; default: - // Default - bail to non-const handler. - GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + isConstSuccess = false; break; } + + if (!isConstSuccess) { + // Default - bail to non-const handler. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + } } bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { @@ -1695,40 +1870,50 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat bool is_high_op) { bool rhs_in_mem = rhs.location != kLocPhysReg; bool dest_in_mem = dest.location != kLocPhysReg; + bool is64Bit = Gen64Bit(); DCHECK(!rhs_in_mem || !dest_in_mem); switch (op) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: if (dest_in_mem) { - return is_high_op ? kX86Adc32MR : kX86Add32MR; + return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; } else if (rhs_in_mem) { - return is_high_op ? kX86Adc32RM : kX86Add32RM; + return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; } - return is_high_op ? kX86Adc32RR : kX86Add32RR; + return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (dest_in_mem) { - return is_high_op ? kX86Sbb32MR : kX86Sub32MR; + return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; } else if (rhs_in_mem) { - return is_high_op ? kX86Sbb32RM : kX86Sub32RM; + return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; } - return is_high_op ? kX86Sbb32RR : kX86Sub32RR; + return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (dest_in_mem) { - return kX86And32MR; + return is64Bit ? kX86And64MR : kX86And32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86And64RM : kX86And64RR; } return rhs_in_mem ? kX86And32RM : kX86And32RR; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (dest_in_mem) { - return kX86Or32MR; + return is64Bit ? kX86Or64MR : kX86Or32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86Or64RM : kX86Or64RR; } return rhs_in_mem ? kX86Or32RM : kX86Or32RR; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (dest_in_mem) { - return kX86Xor32MR; + return is64Bit ? kX86Xor64MR : kX86Xor32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; } return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; default: @@ -1740,6 +1925,7 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value) { bool in_mem = loc.location != kLocPhysReg; + bool is64Bit = Gen64Bit(); bool byte_imm = IS_SIMM8(value); DCHECK(in_mem || !loc.reg.IsFloat()); switch (op) { @@ -1747,43 +1933,61 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h case Instruction::ADD_LONG_2ADDR: if (byte_imm) { if (in_mem) { - return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; + return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; } - return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; + return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; } if (in_mem) { - return is_high_op ? kX86Adc32MI : kX86Add32MI; + return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; } - return is_high_op ? kX86Adc32RI : kX86Add32RI; + return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (byte_imm) { if (in_mem) { - return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; + return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; } - return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; + return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; } if (in_mem) { - return is_high_op ? kX86Sbb32MI : kX86Sub32MI; + return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; } - return is_high_op ? kX86Sbb32RI : kX86Sub32RI; + return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86And64MI8 : kX86And64RI8; + } return in_mem ? kX86And32MI8 : kX86And32RI8; } + if (is64Bit) { + return in_mem ? kX86And64MI : kX86And64RI; + } return in_mem ? kX86And32MI : kX86And32RI; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86Or64MI8 : kX86Or64RI8; + } return in_mem ? kX86Or32MI8 : kX86Or32RI8; } + if (is64Bit) { + return in_mem ? kX86Or64MI : kX86Or64RI; + } return in_mem ? kX86Or32MI : kX86Or32RI; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; + } return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; } + if (is64Bit) { + return in_mem ? kX86Xor64MI : kX86Xor64RI; + } return in_mem ? kX86Xor32MI : kX86Xor32RI; default: LOG(FATAL) << "Unexpected opcode: " << op; @@ -1791,9 +1995,43 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h } } -void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { +bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { DCHECK(rl_src.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src); + + if (Gen64Bit()) { + // We can do with imm only if it fits 32 bit + if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { + return false; + } + + rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); + + if ((rl_dest.location == kLocDalvikFrame) || + (rl_dest.location == kLocCompilerTemp)) { + int r_base = TargetReg(kSp).GetReg(); + int displacement = SRegOffset(rl_dest.s_reg_low); + + X86OpCode x86op = GetOpcode(op, rl_dest, false, val); + LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); + AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is64bit */); + AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, + false /* is_load */, true /* is64bit */); + return true; + } + + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + DCHECK_EQ(rl_result.location, kLocPhysReg); + DCHECK(!rl_result.reg.IsFloat()); + + X86OpCode x86op = GetOpcode(op, rl_result, false, val); + NewLIR2(x86op, rl_result.reg.GetReg(), val); + + StoreValueWide(rl_dest, rl_result); + return true; + } + int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); @@ -1820,7 +2058,7 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); } - return; + return true; } RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); @@ -1836,12 +2074,38 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); } StoreValueWide(rl_dest, rl_result); + return true; } -void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, +bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, Instruction::Code op) { DCHECK(rl_src2.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src2); + + if (Gen64Bit()) { + // We can do with imm only if it fits 32 bit + if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { + return false; + } + if (rl_dest.location == kLocPhysReg && + rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { + X86OpCode x86op = GetOpcode(op, rl_dest, false, val); + NewLIR2(x86op, rl_dest.reg.GetReg(), val); + StoreFinalValueWide(rl_dest, rl_dest); + return true; + } + + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + // We need the values to be in a temporary + RegLocation rl_result = ForceTempWide(rl_src1); + + X86OpCode x86op = GetOpcode(op, rl_result, false, val); + NewLIR2(x86op, rl_result.reg.GetReg(), val); + + StoreFinalValueWide(rl_dest, rl_result); + return true; + } + int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); @@ -1861,7 +2125,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, } StoreFinalValueWide(rl_dest, rl_dest); - return; + return true; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); @@ -1879,6 +2143,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, } StoreFinalValueWide(rl_dest, rl_result); + return true; } // For final classes there are no sub-classes to check and so we can answer the instance-of @@ -1899,7 +2164,12 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, LoadConstant(result_reg, 0); LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); - RegStorage check_class = AllocTypedTemp(false, kRefReg); + // We will use this register to compare to memory below. + // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode). + // For this reason, force allocation of a 32 bit register to use, so that the + // compare to memory will be done using a 32 bit comparision. + // The LoadRefDisp(s) below will work normally, even in 64 bit mode. + RegStorage check_class = AllocTemp(); // If Method* is already in a register, we can save a copy. RegLocation rl_method = mir_graph_->GetMethodLoc(); @@ -2239,7 +2509,8 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, // We should be careful with order here // If rl_dest and rl_lhs points to the same VR we should load first // If the are different we should find a register first for dest - if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { + if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == + mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); // No-op if these are the same. @@ -2289,4 +2560,82 @@ bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_ // Everything will be fine :-). return true; } + +void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { + if (!Gen64Bit()) { + Mir2Lir::GenIntToLong(rl_dest, rl_src); + return; + } + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (rl_src.location == kLocPhysReg) { + NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + } else { + int displacement = SRegOffset(rl_src.s_reg_low); + LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), + displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) { + if (!Gen64Bit()) { + Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); + return; + } + + bool is_two_addr = false; + OpKind op = kOpBkpt; + RegLocation rl_result; + + switch (opcode) { + case Instruction::SHL_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::SHL_LONG: + op = kOpLsl; + break; + case Instruction::SHR_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::SHR_LONG: + op = kOpAsr; + break; + case Instruction::USHR_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::USHR_LONG: + op = kOpLsr; + break; + default: + op = kOpBkpt; + } + + // X86 doesn't require masking and must use ECX. + RegStorage t_reg = TargetReg(kCount); // rCX + LoadValueDirectFixed(rl_shift, t_reg); + if (is_two_addr) { + // Can we do this directly into memory? + rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); + if (rl_result.location != kLocPhysReg) { + // Okay, we can do this into memory + OpMemReg(op, rl_result, t_reg.GetReg()); + } else if (!rl_result.reg.IsFloat()) { + // Can do this directly into the result register + OpRegReg(op, rl_result.reg, t_reg); + StoreFinalValueWide(rl_dest, rl_result); + } + } else { + // Three address form, or we can't do directly. + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); + StoreFinalValueWide(rl_dest, rl_result); + } + + FreeTemp(t_reg); +} + } // namespace art diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 4d8fd1b..ce7229b 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -31,33 +31,25 @@ static constexpr RegStorage core_regs_arr_32[] = { }; static constexpr RegStorage core_regs_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, -#ifdef TARGET_REX_SUPPORT rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 -#endif }; static constexpr RegStorage core_regs_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q, -#ifdef TARGET_REX_SUPPORT rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q -#endif }; static constexpr RegStorage sp_regs_arr_32[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, }; static constexpr RegStorage sp_regs_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, -#ifdef TARGET_REX_SUPPORT rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 -#endif }; static constexpr RegStorage dp_regs_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; static constexpr RegStorage dp_regs_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, -#ifdef TARGET_REX_SUPPORT rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 -#endif }; static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; @@ -65,33 +57,25 @@ static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX}; static constexpr RegStorage core_temps_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, -#ifdef TARGET_REX_SUPPORT rs_r8, rs_r9, rs_r10, rs_r11 -#endif }; static constexpr RegStorage core_temps_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, -#ifdef TARGET_REX_SUPPORT rs_r8q, rs_r9q, rs_r10q, rs_r11q -#endif }; static constexpr RegStorage sp_temps_arr_32[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, }; static constexpr RegStorage sp_temps_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, -#ifdef TARGET_REX_SUPPORT rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 -#endif }; static constexpr RegStorage dp_temps_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; static constexpr RegStorage dp_temps_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, -#ifdef TARGET_REX_SUPPORT rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 -#endif }; static constexpr RegStorage xp_temps_arr_32[] = { @@ -99,9 +83,7 @@ static constexpr RegStorage xp_temps_arr_32[] = { }; static constexpr RegStorage xp_temps_arr_64[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, -#ifdef TARGET_REX_SUPPORT rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 -#endif }; static constexpr ArrayRef<const RegStorage> empty_pool; @@ -132,10 +114,16 @@ X86NativeRegisterPool rX86_ARG0; X86NativeRegisterPool rX86_ARG1; X86NativeRegisterPool rX86_ARG2; X86NativeRegisterPool rX86_ARG3; +X86NativeRegisterPool rX86_ARG4; +X86NativeRegisterPool rX86_ARG5; X86NativeRegisterPool rX86_FARG0; X86NativeRegisterPool rX86_FARG1; X86NativeRegisterPool rX86_FARG2; X86NativeRegisterPool rX86_FARG3; +X86NativeRegisterPool rX86_FARG4; +X86NativeRegisterPool rX86_FARG5; +X86NativeRegisterPool rX86_FARG6; +X86NativeRegisterPool rX86_FARG7; X86NativeRegisterPool rX86_RET0; X86NativeRegisterPool rX86_RET1; X86NativeRegisterPool rX86_INVOKE_TGT; @@ -145,10 +133,16 @@ RegStorage rs_rX86_ARG0; RegStorage rs_rX86_ARG1; RegStorage rs_rX86_ARG2; RegStorage rs_rX86_ARG3; +RegStorage rs_rX86_ARG4; +RegStorage rs_rX86_ARG5; RegStorage rs_rX86_FARG0; RegStorage rs_rX86_FARG1; RegStorage rs_rX86_FARG2; RegStorage rs_rX86_FARG3; +RegStorage rs_rX86_FARG4; +RegStorage rs_rX86_FARG5; +RegStorage rs_rX86_FARG6; +RegStorage rs_rX86_FARG7; RegStorage rs_rX86_RET0; RegStorage rs_rX86_RET1; RegStorage rs_rX86_INVOKE_TGT; @@ -164,7 +158,7 @@ RegLocation X86Mir2Lir::LocCReturnRef() { } RegLocation X86Mir2Lir::LocCReturnWide() { - return x86_loc_c_return_wide; + return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide; } RegLocation X86Mir2Lir::LocCReturnFloat() { @@ -188,35 +182,27 @@ RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kArg1: res_reg = rs_rX86_ARG1; break; case kArg2: res_reg = rs_rX86_ARG2; break; case kArg3: res_reg = rs_rX86_ARG3; break; + case kArg4: res_reg = rs_rX86_ARG4; break; + case kArg5: res_reg = rs_rX86_ARG5; break; case kFArg0: res_reg = rs_rX86_FARG0; break; case kFArg1: res_reg = rs_rX86_FARG1; break; case kFArg2: res_reg = rs_rX86_FARG2; break; case kFArg3: res_reg = rs_rX86_FARG3; break; + case kFArg4: res_reg = rs_rX86_FARG4; break; + case kFArg5: res_reg = rs_rX86_FARG5; break; + case kFArg6: res_reg = rs_rX86_FARG6; break; + case kFArg7: res_reg = rs_rX86_FARG7; break; case kRet0: res_reg = rs_rX86_RET0; break; case kRet1: res_reg = rs_rX86_RET1; break; case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break; case kHiddenArg: res_reg = rs_rAX; break; - case kHiddenFpArg: res_reg = rs_fr0; break; + case kHiddenFpArg: DCHECK(!Gen64Bit()); res_reg = rs_fr0; break; case kCount: res_reg = rs_rX86_COUNT; break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } -RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - // TODO: This is not 64-bit compliant and depends on new internal ABI. - switch (arg_num) { - case 0: - return rs_rX86_ARG1; - case 1: - return rs_rX86_ARG2; - case 2: - return rs_rX86_ARG3; - default: - return RegStorage::InvalidReg(); - } -} - /* * Decode the register id. */ @@ -482,6 +468,18 @@ void X86Mir2Lir::LockCallTemps() { LockTemp(rs_rX86_ARG1); LockTemp(rs_rX86_ARG2); LockTemp(rs_rX86_ARG3); + if (Gen64Bit()) { + LockTemp(rs_rX86_ARG4); + LockTemp(rs_rX86_ARG5); + LockTemp(rs_rX86_FARG0); + LockTemp(rs_rX86_FARG1); + LockTemp(rs_rX86_FARG2); + LockTemp(rs_rX86_FARG3); + LockTemp(rs_rX86_FARG4); + LockTemp(rs_rX86_FARG5); + LockTemp(rs_rX86_FARG6); + LockTemp(rs_rX86_FARG7); + } } /* To be used when explicitly managing register use */ @@ -490,14 +488,26 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(rs_rX86_ARG1); FreeTemp(rs_rX86_ARG2); FreeTemp(rs_rX86_ARG3); + if (Gen64Bit()) { + FreeTemp(rs_rX86_ARG4); + FreeTemp(rs_rX86_ARG5); + FreeTemp(rs_rX86_FARG0); + FreeTemp(rs_rX86_FARG1); + FreeTemp(rs_rX86_FARG2); + FreeTemp(rs_rX86_FARG3); + FreeTemp(rs_rX86_FARG4); + FreeTemp(rs_rX86_FARG5); + FreeTemp(rs_rX86_FARG6); + FreeTemp(rs_rX86_FARG7); + } } bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { switch (opcode) { case kX86LockCmpxchgMR: case kX86LockCmpxchgAR: - case kX86LockCmpxchg8bM: - case kX86LockCmpxchg8bA: + case kX86LockCmpxchg64M: + case kX86LockCmpxchg64A: case kX86XchgMR: case kX86Mfence: // Atomic memory instructions provide full barrier. @@ -653,6 +663,14 @@ bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) { } RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) { + // X86_64 can handle any size. + if (Gen64Bit()) { + if (size == kReference) { + return kRefReg; + } + return kCoreReg; + } + if (UNLIKELY(is_volatile)) { // On x86, atomic 64-bit load/store requires an fp register. // Smaller aligned load/store is atomic for both core and fp registers. @@ -688,11 +706,30 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* rs_rX86_ARG1 = rs_rSI; rs_rX86_ARG2 = rs_rDX; rs_rX86_ARG3 = rs_rCX; + rs_rX86_ARG4 = rs_r8; + rs_rX86_ARG5 = rs_r9; + rs_rX86_FARG0 = rs_fr0; + rs_rX86_FARG1 = rs_fr1; + rs_rX86_FARG2 = rs_fr2; + rs_rX86_FARG3 = rs_fr3; + rs_rX86_FARG4 = rs_fr4; + rs_rX86_FARG5 = rs_fr5; + rs_rX86_FARG6 = rs_fr6; + rs_rX86_FARG7 = rs_fr7; rX86_ARG0 = rDI; rX86_ARG1 = rSI; rX86_ARG2 = rDX; rX86_ARG3 = rCX; - // TODO: ARG4(r8), ARG5(r9), floating point args. + rX86_ARG4 = r8; + rX86_ARG5 = r9; + rX86_FARG0 = fr0; + rX86_FARG1 = fr1; + rX86_FARG2 = fr2; + rX86_FARG3 = fr3; + rX86_FARG4 = fr4; + rX86_FARG5 = fr5; + rX86_FARG6 = fr6; + rX86_FARG7 = fr7; } else { rs_rX86_SP = rs_rX86_SP_32; @@ -700,23 +737,32 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* rs_rX86_ARG1 = rs_rCX; rs_rX86_ARG2 = rs_rDX; rs_rX86_ARG3 = rs_rBX; + rs_rX86_ARG4 = RegStorage::InvalidReg(); + rs_rX86_ARG5 = RegStorage::InvalidReg(); + rs_rX86_FARG0 = rs_rAX; + rs_rX86_FARG1 = rs_rCX; + rs_rX86_FARG2 = rs_rDX; + rs_rX86_FARG3 = rs_rBX; + rs_rX86_FARG4 = RegStorage::InvalidReg(); + rs_rX86_FARG5 = RegStorage::InvalidReg(); + rs_rX86_FARG6 = RegStorage::InvalidReg(); + rs_rX86_FARG7 = RegStorage::InvalidReg(); rX86_ARG0 = rAX; rX86_ARG1 = rCX; rX86_ARG2 = rDX; rX86_ARG3 = rBX; + rX86_FARG0 = rAX; + rX86_FARG1 = rCX; + rX86_FARG2 = rDX; + rX86_FARG3 = rBX; + // TODO(64): Initialize with invalid reg +// rX86_ARG4 = RegStorage::InvalidReg(); +// rX86_ARG5 = RegStorage::InvalidReg(); } - rs_rX86_FARG0 = rs_rAX; - rs_rX86_FARG1 = rs_rCX; - rs_rX86_FARG2 = rs_rDX; - rs_rX86_FARG3 = rs_rBX; rs_rX86_RET0 = rs_rAX; rs_rX86_RET1 = rs_rDX; rs_rX86_INVOKE_TGT = rs_rAX; rs_rX86_COUNT = rs_rCX; - rX86_FARG0 = rAX; - rX86_FARG1 = rCX; - rX86_FARG2 = rDX; - rX86_FARG3 = rBX; rX86_RET0 = rAX; rX86_RET1 = rDX; rX86_INVOKE_TGT = rAX; @@ -1356,7 +1402,11 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { // Address the start of the method. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force @@ -1676,4 +1726,458 @@ LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { return new_value; } +// ------------ ABI support: mapping of args to physical registers ------------- +RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) { + const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5}; + const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage); + const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3, + rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7}; + const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage); + + RegStorage result = RegStorage::InvalidReg(); + if (is_double_or_float) { + if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { + result = fpArgMappingToPhysicalReg[cur_fp_reg_++]; + if (result.Valid()) { + result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg()); + } + } + } else { + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = coreArgMappingToPhysicalReg[cur_core_reg_++]; + if (result.Valid()) { + result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg()); + } + } + } + return result; +} + +RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) { + DCHECK(IsInitialized()); + auto res = mapping_.find(in_position); + return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); +} + +void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) { + DCHECK(mapper != nullptr); + max_mapped_in_ = -1; + is_there_stack_mapped_ = false; + for (int in_position = 0; in_position < count; in_position++) { + RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide); + if (reg.Valid()) { + mapping_[in_position] = reg; + max_mapped_in_ = std::max(max_mapped_in_, in_position); + if (reg.Is64BitSolo()) { + // We covered 2 args, so skip the next one + in_position++; + } + } else { + is_there_stack_mapped_ = true; + } + } + initialized_ = true; +} + +RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { + if (!Gen64Bit()) { + return GetCoreArgMappingToPhysicalReg(arg_num); + } + + if (!in_to_reg_storage_mapping_.IsInitialized()) { + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; + + InToRegStorageX86_64Mapper mapper; + in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper); + } + return in_to_reg_storage_mapping_.Get(arg_num); +} + +RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) { + // For the 32-bit internal ABI, the first 3 arguments are passed in registers. + // Not used for 64-bit, TODO: Move X86_32 to the same framework + switch (core_arg_num) { + case 0: + return rs_rX86_ARG1; + case 1: + return rs_rX86_ARG2; + case 2: + return rs_rX86_ARG3; + default: + return RegStorage::InvalidReg(); + } +} + +// ---------End of ABI support: mapping of args to physical registers ------------- + +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform initial + * assignment of promoted arguments. + * + * ArgLocs is an array of location records describing the incoming arguments + * with one location record per word of argument. + */ +void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { + if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method); + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep kArg0 live (or copy it to home location + * if promoted). + */ + + RegLocation rl_src = rl_method; + rl_src.location = kLocPhysReg; + rl_src.reg = TargetReg(kArg0); + rl_src.home = false; + MarkLive(rl_src); + StoreValue(rl_method, rl_src); + // If Method* has been promoted, explicitly flush + if (rl_method.location == kLocPhysReg) { + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + } + + if (cu_->num_ins == 0) { + return; + } + + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + /* + * Copy incoming arguments to their proper home locations. + * NOTE: an older version of dx had an issue in which + * it would reuse static method argument registers. + * This could result in the same Dalvik virtual register + * being promoted to both core and fp regs. To account for this, + * we only copy to the corresponding promoted physical register + * if it matches the type of the SSA name for the incoming + * argument. It is also possible that long and double arguments + * end up half-promoted. In those cases, we must flush the promoted + * half to memory as well. + */ + for (int i = 0; i < cu_->num_ins; i++) { + PromotionMap* v_map = &promotion_map_[start_vreg + i]; + RegStorage reg = RegStorage::InvalidReg(); + // get reg corresponding to input + reg = GetArgMappingToPhysicalReg(i); + + if (reg.Valid()) { + // If arriving in register + bool need_flush = true; + RegLocation* t_loc = &ArgLocs[i]; + if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); + need_flush = false; + } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); + need_flush = false; + } else { + need_flush = true; + } + + // For wide args, force flush if not fully promoted + if (t_loc->wide) { + PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1); + // Is only half promoted? + need_flush |= (p_map->core_location != v_map->core_location) || + (p_map->fp_location != v_map->fp_location); + } + if (need_flush) { + if (t_loc->wide && t_loc->fp) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + // Increment i to skip the next one + i++; + } else if (t_loc->wide && !t_loc->fp) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + // Increment i to skip the next one + i++; + } else { + Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg); + } + } + } else { + // If arriving in frame & promoted + if (v_map->core_location == kLocPhysReg) { + Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg)); + } + if (v_map->fp_location == kLocPhysReg) { + Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg)); + } + } + } +} + +/* + * Load up to 5 arguments, the first three of which will be in + * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, + * and as part of the load sequence, it must be replaced with + * the target method pointer. Note, this may also be called + * for "range" variants if the number of arguments is 5 or fewer. + */ +int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, + int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) { + if (!Gen64Bit()) { + return Mir2Lir::GenDalvikArgsNoRange(info, + call_state, pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, + direct_method, type, skip_this); + } + return GenDalvikArgsRange(info, + call_state, pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, + direct_method, type, skip_this); +} + +/* + * May have 0+ arguments (also used for jumbo). Note that + * source virtual registers may be in physical registers, so may + * need to be flushed to home location before copying. This + * applies to arg3 and above (see below). + * + * Two general strategies: + * If < 20 arguments + * Pass args 3-18 using vldm/vstm block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * If 20+ arguments + * Pass args arg19+ using memcpy block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * + */ +int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, + LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, + InvokeType type, bool skip_this) { + if (!Gen64Bit()) { + return Mir2Lir::GenDalvikArgsRange(info, call_state, + pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, direct_method, + type, skip_this); + } + + /* If no arguments, just return */ + if (info->num_arg_words == 0) + return call_state; + + const int start_index = skip_this ? 1 : 0; + + InToRegStorageX86_64Mapper mapper; + InToRegStorageMapping in_to_reg_storage_mapping; + in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); + const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); + const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 : + in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1; + int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped); + + // Fisrt of all, check whether it make sense to use bulk copying + // Optimization is aplicable only for range case + // TODO: make a constant instead of 2 + if (info->is_range && regs_left_to_pass_via_stack >= 2) { + // Scan the rest of the args - if in phys_reg flush to memory + for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) { + RegLocation loc = info->args[next_arg]; + if (loc.wide) { + loc = UpdateLocWide(loc); + if (loc.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); + } + next_arg += 2; + } else { + loc = UpdateLoc(loc); + if (loc.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); + } + next_arg++; + } + } + + // Logic below assumes that Method pointer is at offset zero from SP. + DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0); + + // The rest can be copied together + int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low); + int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set); + + int current_src_offset = start_offset; + int current_dest_offset = outs_offset; + + while (regs_left_to_pass_via_stack > 0) { + // This is based on the knowledge that the stack itself is 16-byte aligned. + bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; + bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; + size_t bytes_to_move; + + /* + * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a + * a 128-bit move because we won't get the chance to try to aligned. If there are more than + * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. + * We do this because we could potentially do a smaller move to align. + */ + if (regs_left_to_pass_via_stack == 4 || + (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { + // Moving 128-bits via xmm register. + bytes_to_move = sizeof(uint32_t) * 4; + + // Allocate a free xmm temp. Since we are working through the calling sequence, + // we expect to have an xmm temporary available. AllocTempDouble will abort if + // there are no free registers. + RegStorage temp = AllocTempDouble(); + + LIR* ld1 = nullptr; + LIR* ld2 = nullptr; + LIR* st1 = nullptr; + LIR* st2 = nullptr; + + /* + * The logic is similar for both loads and stores. If we have 16-byte alignment, + * do an aligned move. If we have 8-byte alignment, then do the move in two + * parts. This approach prevents possible cache line splits. Finally, fall back + * to doing an unaligned move. In most cases we likely won't split the cache + * line but we cannot prove it and thus take a conservative approach. + */ + bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; + bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; + + if (src_is_16b_aligned) { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP); + } else if (src_is_8b_aligned) { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP); + ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), + kMovHi128FP); + } else { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP); + } + + if (dest_is_16b_aligned) { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP); + } else if (dest_is_8b_aligned) { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP); + st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), + temp, kMovHi128FP); + } else { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP); + } + + // TODO If we could keep track of aliasing information for memory accesses that are wider + // than 64-bit, we wouldn't need to set up a barrier. + if (ld1 != nullptr) { + if (ld2 != nullptr) { + // For 64-bit load we can actually set up the aliasing information. + AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); + AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); + } else { + // Set barrier for 128-bit load. + SetMemRefType(ld1, true /* is_load */, kDalvikReg); + ld1->u.m.def_mask = ENCODE_ALL; + } + } + if (st1 != nullptr) { + if (st2 != nullptr) { + // For 64-bit store we can actually set up the aliasing information. + AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); + AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); + } else { + // Set barrier for 128-bit store. + SetMemRefType(st1, false /* is_load */, kDalvikReg); + st1->u.m.def_mask = ENCODE_ALL; + } + } + + // Free the temporary used for the data movement. + FreeTemp(temp); + } else { + // Moving 32-bits via general purpose register. + bytes_to_move = sizeof(uint32_t); + + // Instead of allocating a new temp, simply reuse one of the registers being used + // for argument passing. + RegStorage temp = TargetReg(kArg3); + + // Now load the argument VR and store to the outs. + Load32Disp(TargetReg(kSp), current_src_offset, temp); + Store32Disp(TargetReg(kSp), current_dest_offset, temp); + } + + current_src_offset += bytes_to_move; + current_dest_offset += bytes_to_move; + regs_left_to_pass_via_stack -= (bytes_to_move >> 2); + } + DCHECK_EQ(regs_left_to_pass_via_stack, 0); + } + + // Now handle rest not registers if they are + if (in_to_reg_storage_mapping.IsThereStackMapped()) { + RegStorage regSingle = TargetReg(kArg2); + RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg()); + for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (!reg.Valid()) { + int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + } + i++; + } else { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + } else { + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + } + } + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + } + } + + // Finish with mapped registers + for (int i = start_index; i <= last_mapped_in; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (reg.Valid()) { + if (rl_arg.wide) { + LoadValueDirectWideFixed(rl_arg, reg); + i++; + } else { + LoadValueDirectFixed(rl_arg, reg); + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + } + + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + if (pcrLabel) { + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + Load32Disp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } + } + return call_state; +} + } // namespace art + diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 618b3a5..d074d81 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -89,11 +89,8 @@ LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg()); } else { // Note, there is no byte immediate form of a 32 bit immediate move. - if (r_dest.Is64Bit()) { - res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value); - } else { - res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); - } + // 64-bit immediate is not supported by LIR structure + res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); } if (r_dest_save.IsFloat()) { @@ -120,8 +117,8 @@ LIR* X86Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { X86OpCode opcode = kX86Bkpt; switch (op) { - case kOpNeg: opcode = kX86Neg32R; break; - case kOpNot: opcode = kX86Not32R; break; + case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break; + case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break; case kOpRev: opcode = kX86Bswap32R; break; case kOpBlx: opcode = kX86CallR; break; default: @@ -138,6 +135,9 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { switch (op) { case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break; case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break; + case kOpLsl: opcode = kX86Sal64RI; break; + case kOpLsr: opcode = kX86Shr64RI; break; + case kOpAsr: opcode = kX86Sar64RI; break; default: LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op; } @@ -189,6 +189,7 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { } LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { + bool is64Bit = r_dest_src1.Is64Bit(); X86OpCode opcode = kX86Nop; bool src2_must_be_cx = false; switch (op) { @@ -207,33 +208,34 @@ LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OpReg(kOpRev, r_dest_src1); return OpRegImm(kOpAsr, r_dest_src1, 16); // X86 binary opcodes - case kOpSub: opcode = kX86Sub32RR; break; - case kOpSbc: opcode = kX86Sbb32RR; break; - case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break; - case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break; - case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break; - case kOpMov: opcode = kX86Mov32RR; break; - case kOpCmp: opcode = kX86Cmp32RR; break; - case kOpAdd: opcode = kX86Add32RR; break; - case kOpAdc: opcode = kX86Adc32RR; break; - case kOpAnd: opcode = kX86And32RR; break; - case kOpOr: opcode = kX86Or32RR; break; - case kOpXor: opcode = kX86Xor32RR; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break; + case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break; + case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break; + case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break; + case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break; + case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break; + case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break; + case kOpOr: opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break; case kOp2Byte: // TODO: there are several instances of this check. A utility function perhaps? // TODO: Similar to Arm's reg < 8 check. Perhaps add attribute checks to RegStorage? // Use shifts instead of a byte operand if the source can't be byte accessed. if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) { - NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg()); - NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24); - return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24); + NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg()); + NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24); + return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(), + is64Bit ? 56 : 24); } else { - opcode = kX86Movsx8RR; + opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR; } break; - case kOp2Short: opcode = kX86Movsx16RR; break; - case kOp2Char: opcode = kX86Movzx16RR; break; - case kOpMul: opcode = kX86Imul32RR; break; + case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break; + case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break; + case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break; default: LOG(FATAL) << "Bad case in OpRegReg " << op; break; @@ -354,16 +356,17 @@ LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, Re } LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) { + bool is64Bit = r_dest.Is64Bit(); X86OpCode opcode = kX86Nop; switch (op) { // X86 binary opcodes - case kOpSub: opcode = kX86Sub32RM; break; - case kOpMov: opcode = kX86Mov32RM; break; - case kOpCmp: opcode = kX86Cmp32RM; break; - case kOpAdd: opcode = kX86Add32RM; break; - case kOpAnd: opcode = kX86And32RM; break; - case kOpOr: opcode = kX86Or32RM; break; - case kOpXor: opcode = kX86Xor32RM; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break; + case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break; + case kOpOr: opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break; case kOp2Byte: opcode = kX86Movsx8RM; break; case kOp2Short: opcode = kX86Movsx16RM; break; case kOp2Char: opcode = kX86Movzx16RM; break; @@ -382,63 +385,68 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int o LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) { DCHECK_NE(rl_dest.location, kLocPhysReg); int displacement = SRegOffset(rl_dest.s_reg_low); + bool is64Bit = rl_dest.wide != 0; X86OpCode opcode = kX86Nop; switch (op) { - case kOpSub: opcode = kX86Sub32MR; break; - case kOpMov: opcode = kX86Mov32MR; break; - case kOpCmp: opcode = kX86Cmp32MR; break; - case kOpAdd: opcode = kX86Add32MR; break; - case kOpAnd: opcode = kX86And32MR; break; - case kOpOr: opcode = kX86Or32MR; break; - case kOpXor: opcode = kX86Xor32MR; break; - case kOpLsl: opcode = kX86Sal32MC; break; - case kOpLsr: opcode = kX86Shr32MC; break; - case kOpAsr: opcode = kX86Sar32MC; break; + case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break; + case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break; + case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break; + case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break; + case kOpOr: opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break; + case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break; + case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break; + case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break; + case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break; default: LOG(FATAL) << "Bad case in OpMemReg " << op; break; } LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */); - AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); return l; } LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) { DCHECK_NE(rl_value.location, kLocPhysReg); + bool is64Bit = r_dest.Is64Bit(); int displacement = SRegOffset(rl_value.s_reg_low); X86OpCode opcode = kX86Nop; switch (op) { - case kOpSub: opcode = kX86Sub32RM; break; - case kOpMov: opcode = kX86Mov32RM; break; - case kOpCmp: opcode = kX86Cmp32RM; break; - case kOpAdd: opcode = kX86Add32RM; break; - case kOpAnd: opcode = kX86And32RM; break; - case kOpOr: opcode = kX86Or32RM; break; - case kOpXor: opcode = kX86Xor32RM; break; - case kOpMul: opcode = kX86Imul32RM; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break; + case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break; + case kOpOr: opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break; + case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break; default: LOG(FATAL) << "Bad case in OpRegMem " << op; break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); return l; } LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { + bool is64Bit = r_dest.Is64Bit(); if (r_dest != r_src1 && r_dest != r_src2) { if (op == kOpAdd) { // lea special case, except can't encode rbp as base if (r_src1 == r_src2) { OpRegCopy(r_dest, r_src1); return OpRegImm(kOpLsl, r_dest, 1); } else if (r_src1 != rs_rBP) { - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src1.GetReg() /* base */, - r_src2.GetReg() /* index */, 0 /* scale */, 0 /* disp */); + return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src1.GetReg() /* base */, r_src2.GetReg() /* index */, + 0 /* scale */, 0 /* disp */); } else { - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src2.GetReg() /* base */, - r_src1.GetReg() /* index */, 0 /* scale */, 0 /* disp */); + return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src2.GetReg() /* base */, r_src1.GetReg() /* index */, + 0 /* scale */, 0 /* disp */); } } else { OpRegCopy(r_dest, r_src1); @@ -476,10 +484,10 @@ LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, } LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) { - if (op == kOpMul) { + if (op == kOpMul && !Gen64Bit()) { X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI; return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value); - } else if (op == kOpAnd) { + } else if (op == kOpAnd && !Gen64Bit()) { if (value == 0xFF && r_src.Low4()) { return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg()); } else if (value == 0xFFFF) { @@ -492,8 +500,9 @@ LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r5sib_no_base /* base */, r_src.GetReg() /* index */, value /* scale */, 0 /* disp */); } else if (op == kOpAdd) { // lea add special case - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */, - rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */); + return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, + 0 /* scale */, value /* disp */); } OpRegCopy(r_dest, r_src); } @@ -556,7 +565,11 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // Address the start of the method RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force @@ -582,8 +595,20 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } } } else { - res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); - LoadConstantNoClobber(r_dest.GetHigh(), val_hi); + if (r_dest.IsPair()) { + res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); + LoadConstantNoClobber(r_dest.GetHigh(), val_hi); + } else { + // TODO(64) make int64_t value parameter of LoadConstantNoClobber + if (val_lo < 0) { + val_hi += 1; + } + res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi); + NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32); + if (val_lo != 0) { + NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo); + } + } } return res; } @@ -601,6 +626,8 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int case kDouble: if (r_dest.IsFloat()) { opcode = is_array ? kX86MovsdRA : kX86MovsdRM; + } else if (!pair) { + opcode = is_array ? kX86Mov64RA : kX86Mov64RM; } else { opcode = is_array ? kX86Mov32RA : kX86Mov32RM; } @@ -742,13 +769,10 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int case kDouble: if (r_src.IsFloat()) { opcode = is_array ? kX86MovsdAR : kX86MovsdMR; + } else if (!pair) { + opcode = is_array ? kX86Mov64AR : kX86Mov64MR; } else { - if (Gen64Bit()) { - opcode = is_array ? kX86Mov64AR : kX86Mov64MR; - } else { - // TODO(64): pair = true; - opcode = is_array ? kX86Mov32AR : kX86Mov32MR; - } + opcode = is_array ? kX86Mov32AR : kX86Mov32MR; } // TODO: double store is to unaligned address DCHECK_EQ((displacement & 0x3), 0); @@ -855,7 +879,7 @@ void X86Mir2Lir::AnalyzeMIR() { // Did we need a pointer to the method code? if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false); + base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true); } else { base_of_code_ = nullptr; } @@ -971,6 +995,7 @@ RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) { loc.location = kLocDalvikFrame; } } + DCHECK(CheckCorePoolSanity()); return loc; } @@ -984,7 +1009,7 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) { loc.location = kLocDalvikFrame; } } + DCHECK(CheckCorePoolSanity()); return loc; } - } // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index bb8df89..5022529 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -142,10 +142,6 @@ enum X86NativeRegisterPool { r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7, r7q = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7, rDI = r7, -#ifndef TARGET_REX_SUPPORT - // fake return address register for core spill mask. - rRET = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, -#else r8 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, r8q = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8, r9 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9, @@ -164,7 +160,6 @@ enum X86NativeRegisterPool { r15q = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15, // fake return address register for core spill mask. rRET = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16, -#endif // xmm registers, single precision view. fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0, @@ -175,7 +170,6 @@ enum X86NativeRegisterPool { fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, -#ifdef TARGET_REX_SUPPORT fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8, fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9, fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10, @@ -184,7 +178,6 @@ enum X86NativeRegisterPool { fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13, fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14, fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15, -#endif // xmm registers, double precision aliases. dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, @@ -195,7 +188,6 @@ enum X86NativeRegisterPool { dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, -#ifdef TARGET_REX_SUPPORT dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, @@ -204,7 +196,6 @@ enum X86NativeRegisterPool { dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, -#endif // xmm registers, quad precision aliases xr0 = RegStorage::k128BitSolo | 0, @@ -215,7 +206,6 @@ enum X86NativeRegisterPool { xr5 = RegStorage::k128BitSolo | 5, xr6 = RegStorage::k128BitSolo | 6, xr7 = RegStorage::k128BitSolo | 7, -#ifdef TARGET_REX_SUPPORT xr8 = RegStorage::k128BitSolo | 8, xr9 = RegStorage::k128BitSolo | 9, xr10 = RegStorage::k128BitSolo | 10, @@ -224,7 +214,6 @@ enum X86NativeRegisterPool { xr13 = RegStorage::k128BitSolo | 13, xr14 = RegStorage::k128BitSolo | 14, xr15 = RegStorage::k128BitSolo | 15, -#endif // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; @@ -254,7 +243,6 @@ constexpr RegStorage rs_r7(RegStorage::kValid | r7); constexpr RegStorage rs_r7q(RegStorage::kValid | r7q); constexpr RegStorage rs_rDI = rs_r7; constexpr RegStorage rs_rRET(RegStorage::kValid | rRET); -#ifdef TARGET_REX_SUPPORT constexpr RegStorage rs_r8(RegStorage::kValid | r8); constexpr RegStorage rs_r8q(RegStorage::kValid | r8q); constexpr RegStorage rs_r9(RegStorage::kValid | r9); @@ -271,7 +259,6 @@ constexpr RegStorage rs_r14(RegStorage::kValid | r14); constexpr RegStorage rs_r14q(RegStorage::kValid | r14q); constexpr RegStorage rs_r15(RegStorage::kValid | r15); constexpr RegStorage rs_r15q(RegStorage::kValid | r15q); -#endif constexpr RegStorage rs_fr0(RegStorage::kValid | fr0); constexpr RegStorage rs_fr1(RegStorage::kValid | fr1); @@ -281,7 +268,6 @@ constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); -#ifdef TARGET_REX_SUPPORT constexpr RegStorage rs_fr8(RegStorage::kValid | fr8); constexpr RegStorage rs_fr9(RegStorage::kValid | fr9); constexpr RegStorage rs_fr10(RegStorage::kValid | fr10); @@ -290,7 +276,6 @@ constexpr RegStorage rs_fr12(RegStorage::kValid | fr12); constexpr RegStorage rs_fr13(RegStorage::kValid | fr13); constexpr RegStorage rs_fr14(RegStorage::kValid | fr14); constexpr RegStorage rs_fr15(RegStorage::kValid | fr15); -#endif constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); @@ -300,7 +285,6 @@ constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); -#ifdef TARGET_REX_SUPPORT constexpr RegStorage rs_dr8(RegStorage::kValid | dr8); constexpr RegStorage rs_dr9(RegStorage::kValid | dr9); constexpr RegStorage rs_dr10(RegStorage::kValid | dr10); @@ -309,7 +293,6 @@ constexpr RegStorage rs_dr12(RegStorage::kValid | dr12); constexpr RegStorage rs_dr13(RegStorage::kValid | dr13); constexpr RegStorage rs_dr14(RegStorage::kValid | dr14); constexpr RegStorage rs_dr15(RegStorage::kValid | dr15); -#endif constexpr RegStorage rs_xr0(RegStorage::kValid | xr0); constexpr RegStorage rs_xr1(RegStorage::kValid | xr1); @@ -319,7 +302,6 @@ constexpr RegStorage rs_xr4(RegStorage::kValid | xr4); constexpr RegStorage rs_xr5(RegStorage::kValid | xr5); constexpr RegStorage rs_xr6(RegStorage::kValid | xr6); constexpr RegStorage rs_xr7(RegStorage::kValid | xr7); -#ifdef TARGET_REX_SUPPORT constexpr RegStorage rs_xr8(RegStorage::kValid | xr8); constexpr RegStorage rs_xr9(RegStorage::kValid | xr9); constexpr RegStorage rs_xr10(RegStorage::kValid | xr10); @@ -328,16 +310,21 @@ constexpr RegStorage rs_xr12(RegStorage::kValid | xr12); constexpr RegStorage rs_xr13(RegStorage::kValid | xr13); constexpr RegStorage rs_xr14(RegStorage::kValid | xr14); constexpr RegStorage rs_xr15(RegStorage::kValid | xr15); -#endif extern X86NativeRegisterPool rX86_ARG0; extern X86NativeRegisterPool rX86_ARG1; extern X86NativeRegisterPool rX86_ARG2; extern X86NativeRegisterPool rX86_ARG3; +extern X86NativeRegisterPool rX86_ARG4; +extern X86NativeRegisterPool rX86_ARG5; extern X86NativeRegisterPool rX86_FARG0; extern X86NativeRegisterPool rX86_FARG1; extern X86NativeRegisterPool rX86_FARG2; extern X86NativeRegisterPool rX86_FARG3; +extern X86NativeRegisterPool rX86_FARG4; +extern X86NativeRegisterPool rX86_FARG5; +extern X86NativeRegisterPool rX86_FARG6; +extern X86NativeRegisterPool rX86_FARG7; extern X86NativeRegisterPool rX86_RET0; extern X86NativeRegisterPool rX86_RET1; extern X86NativeRegisterPool rX86_INVOKE_TGT; @@ -347,10 +334,16 @@ extern RegStorage rs_rX86_ARG0; extern RegStorage rs_rX86_ARG1; extern RegStorage rs_rX86_ARG2; extern RegStorage rs_rX86_ARG3; +extern RegStorage rs_rX86_ARG4; +extern RegStorage rs_rX86_ARG5; extern RegStorage rs_rX86_FARG0; extern RegStorage rs_rX86_FARG1; extern RegStorage rs_rX86_FARG2; extern RegStorage rs_rX86_FARG3; +extern RegStorage rs_rX86_FARG4; +extern RegStorage rs_rX86_FARG5; +extern RegStorage rs_rX86_FARG6; +extern RegStorage rs_rX86_FARG7; extern RegStorage rs_rX86_RET0; extern RegStorage rs_rX86_RET1; extern RegStorage rs_rX86_INVOKE_TGT; @@ -363,6 +356,9 @@ const RegLocation x86_loc_c_return const RegLocation x86_loc_c_return_wide {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG}; +const RegLocation x86_64_loc_c_return_wide + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG}; const RegLocation x86_loc_c_return_float {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG}; @@ -505,6 +501,7 @@ enum X86OpCode { UnaryOpcode(kX86Divmod, DaR, DaM, DaA), UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), kx86Cdq32Da, + kx86Cqo64Da, kX86Bswap32R, kX86Push32R, kX86Pop32R, #undef UnaryOpcode @@ -518,8 +515,12 @@ enum X86OpCode { kX86MovssAR, Binary0fOpCode(kX86Cvtsi2sd), // int to double Binary0fOpCode(kX86Cvtsi2ss), // int to float + Binary0fOpCode(kX86Cvtsqi2sd), // long to double + Binary0fOpCode(kX86Cvtsqi2ss), // long to float Binary0fOpCode(kX86Cvttsd2si), // truncating double to int Binary0fOpCode(kX86Cvttss2si), // truncating float to int + Binary0fOpCode(kX86Cvttsd2sqi), // truncating double to long + Binary0fOpCode(kX86Cvttss2sqi), // truncating float to long Binary0fOpCode(kX86Cvtsd2si), // rounding double to int Binary0fOpCode(kX86Cvtss2si), // rounding float to int Binary0fOpCode(kX86Ucomisd), // unordered double compare @@ -587,14 +588,18 @@ enum X86OpCode { kX86MovhpsRM, kX86MovhpsRA, // load packed single FP values from m64 to high quadword of xmm kX86MovhpsMR, kX86MovhpsAR, // store packed single FP values from high quadword of xmm to m64 Binary0fOpCode(kX86Movdxr), // move into xmm from gpr + Binary0fOpCode(kX86Movqxr), // move into xmm from 64 bit gpr + kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR, // move into 64 bit reg from xmm kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm + kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA, // move 32 bit to 64 bit with sign extension kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand kX86Mfence, // memory barrier Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply + Binary0fOpCode(kX86Imul64), // 64bit multiply kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange - kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange + kX86LockCmpxchg64M, kX86LockCmpxchg64A, // locked compare and exchange kX86XchgMR, // exchange memory with register (automatically locked) Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value @@ -628,7 +633,6 @@ enum X86EncodingKind { kData, // Special case for raw data. kNop, // Special case for variable length nop. kNullary, // Opcode that takes no arguments. - kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes. kRegOpcode, // Shorter form of R instruction kind (opcode+rd) kReg, kMem, kArray, // R, M and A instruction kinds. kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. @@ -637,11 +641,11 @@ enum X86EncodingKind { kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. kMovRegImm, // Shorter form move RI. - kRegRegImmRev, // RRI with first reg in r/m + kRegRegImmStore, // RRI following the store modrm reg-reg encoding rather than the load. kMemRegImm, // MRI instruction kinds. kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate. kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL. - kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. + // kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition. kRegRegCond, // RR instruction kind followed by a condition. kRegMemCond, // RM instruction kind followed by a condition. @@ -654,19 +658,25 @@ enum X86EncodingKind { /* Struct used to define the EncodingMap positions for each X86 opcode */ struct X86EncodingMap { X86OpCode opcode; // e.g. kOpAddRI - X86EncodingKind kind; // Used to discriminate in the union below + // The broad category the instruction conforms to, such as kRegReg. Identifies which LIR operands + // hold meaning for the opcode. + X86EncodingKind kind; uint64_t flags; struct { - uint8_t prefix1; // non-zero => a prefix byte - uint8_t prefix2; // non-zero => a second prefix byte - uint8_t opcode; // 1 byte opcode - uint8_t extra_opcode1; // possible extra opcode byte - uint8_t extra_opcode2; // possible second extra opcode byte - // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the - // encoding kind + uint8_t prefix1; // Non-zero => a prefix byte. + uint8_t prefix2; // Non-zero => a second prefix byte. + uint8_t opcode; // 1 byte opcode. + uint8_t extra_opcode1; // Possible extra opcode byte. + uint8_t extra_opcode2; // Possible second extra opcode byte. + // 3-bit opcode that gets encoded in the register bits of the modrm byte, use determined by the + // encoding kind. uint8_t modrm_opcode; - uint8_t ax_opcode; // non-zero => shorter encoding for AX as a destination - uint8_t immediate_bytes; // number of bytes of immediate + uint8_t ax_opcode; // Non-zero => shorter encoding for AX as a destination. + uint8_t immediate_bytes; // Number of bytes of immediate. + // Does the instruction address a byte register? In 32-bit mode the registers ah, bh, ch and dh + // are not used. In 64-bit mode the REX prefix is used to normalize and allow any byte register + // to be addressed. + bool r8_form; } skeleton; const char *name; const char* fmt; @@ -700,6 +710,7 @@ struct X86EncodingMap { #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127)) #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767)) +#define IS_SIMM32(v) ((INT64_C(-2147483648) <= (v)) && ((v) <= INT64_C(2147483647))) extern X86EncodingMap EncodingMap[kX86Last]; extern X86ConditionCode X86ConditionEncoding(ConditionCode cond); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ed3f43c..e888cc1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -484,7 +484,10 @@ void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store) { } void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) { - constant->SetLocations(nullptr); + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { @@ -492,7 +495,10 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { } void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) { - constant->SetLocations(nullptr); + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) { @@ -794,7 +800,12 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { } void LocationsBuilderARM::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); + instruction->SetLocations(locations); } void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8bfd8d6..72c697f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -494,7 +494,10 @@ void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) { } void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { - constant->SetLocations(nullptr); + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { @@ -502,7 +505,10 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { } void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { - constant->SetLocations(nullptr); + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) { @@ -814,7 +820,12 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { } void LocationsBuilderX86::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); + instruction->SetLocations(locations); } void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc new file mode 100644 index 0000000..371478c --- /dev/null +++ b/compiler/optimizing/graph_test.cc @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/stringprintf.h" +#include "builder.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "pretty_printer.h" +#include "utils/arena_allocator.h" + +#include "gtest/gtest.h" + +namespace art { + +static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* if_block = new (allocator) HBasicBlock(graph); + graph->AddBlock(if_block); + HInstruction* instr = new (allocator) HIntConstant(4); + if_block->AddInstruction(instr); + instr = new (allocator) HIf(instr); + if_block->AddInstruction(instr); + return if_block; +} + +static HBasicBlock* createGotoBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* got = new (allocator) HGoto(); + block->AddInstruction(got); + return block; +} + +static HBasicBlock* createReturnBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* return_instr = new (allocator) HReturnVoid(); + block->AddInstruction(return_instr); + return block; +} + +static HBasicBlock* createExitBlock(HGraph* graph, ArenaAllocator* allocator) { + HBasicBlock* block = new (allocator) HBasicBlock(graph); + graph->AddBlock(block); + HInstruction* exit_instr = new (allocator) HExit(); + block->AddInstruction(exit_instr); + return block; +} + + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be the return block. +TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_true = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_true); + if_true->AddSuccessor(return_block); + if_block->AddSuccessor(return_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->SimplifyCFG(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_true); + + // Ensure the critical edge has been removed. + HBasicBlock* false_block = if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(); + ASSERT_NE(false_block, return_block); + + // Ensure the new block branches to the join block. + ASSERT_EQ(false_block->GetSuccessors().Get(0), return_block); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be the return block. +TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_false = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + if_false->AddSuccessor(return_block); + if_block->AddSuccessor(if_false); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false); + + graph->SimplifyCFG(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_false); + + // Ensure the critical edge has been removed. + HBasicBlock* true_block = if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(); + ASSERT_NE(true_block, return_block); + + // Ensure the new block branches to the join block. + ASSERT_EQ(true_block->GetSuccessors().Get(0), return_block); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be the loop header. +TEST(GraphTest, IfSuccessorMultipleBackEdges1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), if_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->BuildDominatorTree(); + + // Ensure we still have the same if false block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + // Ensure there is only one back edge. + ASSERT_EQ(if_block->GetPredecessors().Size(), 2u); + ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block); + ASSERT_NE(if_block->GetPredecessors().Get(1), if_block); + + // Ensure the new block is the back edge. + ASSERT_EQ(if_block->GetPredecessors().Get(1), + if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be the loop header. +TEST(GraphTest, IfSuccessorMultipleBackEdges2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + HBasicBlock* exit_block = createExitBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(return_block); + if_block->AddSuccessor(if_block); + return_block->AddSuccessor(exit_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), if_block); + + graph->BuildDominatorTree(); + + // Ensure we still have the same if true block. + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + + // Ensure there is only one back edge. + ASSERT_EQ(if_block->GetPredecessors().Size(), 2u); + ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block); + ASSERT_NE(if_block->GetPredecessors().Get(1), if_block); + + // Ensure the new block is the back edge. + ASSERT_EQ(if_block->GetPredecessors().Get(1), + if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the true block to be a loop header with multiple pre headers. +TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* first_if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* loop_block = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(first_if_block); + first_if_block->AddSuccessor(if_block); + first_if_block->AddSuccessor(loop_block); + loop_block->AddSuccessor(loop_block); + if_block->AddSuccessor(loop_block); + if_block->AddSuccessor(return_block); + + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), loop_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block); + + graph->BuildDominatorTree(); + + HIf* if_instr = if_block->GetLastInstruction()->AsIf(); + // Ensure we still have the same if false block. + ASSERT_EQ(if_instr->IfFalseSuccessor(), return_block); + + // Ensure there is only one pre header.. + ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u); + + // Ensure the new block is the successor of the true block. + ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Size(), 1u); + ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Get(0), + loop_block->GetLoopInformation()->GetPreHeader()); +} + +// Test that the successors of an if block stay consistent after a SimplifyCFG. +// This test sets the false block to be a loop header with multiple pre headers. +TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* entry_block = createGotoBlock(graph, &allocator); + HBasicBlock* first_if_block = createIfBlock(graph, &allocator); + HBasicBlock* if_block = createIfBlock(graph, &allocator); + HBasicBlock* loop_block = createGotoBlock(graph, &allocator); + HBasicBlock* return_block = createReturnBlock(graph, &allocator); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(first_if_block); + first_if_block->AddSuccessor(if_block); + first_if_block->AddSuccessor(loop_block); + loop_block->AddSuccessor(loop_block); + if_block->AddSuccessor(return_block); + if_block->AddSuccessor(loop_block); + + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block); + ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), loop_block); + + graph->BuildDominatorTree(); + + HIf* if_instr = if_block->GetLastInstruction()->AsIf(); + // Ensure we still have the same if true block. + ASSERT_EQ(if_instr->IfTrueSuccessor(), return_block); + + // Ensure there is only one pre header.. + ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u); + + // Ensure the new block is the successor of the false block. + ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Size(), 1u); + ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Get(0), + loop_block->GetLoopInformation()->GetPreHeader()); +} + +TEST(GraphTest, InsertInstructionBefore) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = new (&allocator) HGraph(&allocator); + HBasicBlock* block = createGotoBlock(graph, &allocator); + HInstruction* got = block->GetLastInstruction(); + ASSERT_TRUE(got->IsControlFlow()); + + // Test at the beginning of the block. + HInstruction* first_instruction = new (&allocator) HIntConstant(4); + block->InsertInstructionBefore(first_instruction, got); + + ASSERT_NE(first_instruction->GetId(), -1); + ASSERT_EQ(first_instruction->GetBlock(), block); + ASSERT_EQ(block->GetFirstInstruction(), first_instruction); + ASSERT_EQ(block->GetLastInstruction(), got); + ASSERT_EQ(first_instruction->GetNext(), got); + ASSERT_EQ(first_instruction->GetPrevious(), nullptr); + ASSERT_EQ(got->GetNext(), nullptr); + ASSERT_EQ(got->GetPrevious(), first_instruction); + + // Test in the middle of the block. + HInstruction* second_instruction = new (&allocator) HIntConstant(4); + block->InsertInstructionBefore(second_instruction, got); + + ASSERT_NE(second_instruction->GetId(), -1); + ASSERT_EQ(second_instruction->GetBlock(), block); + ASSERT_EQ(block->GetFirstInstruction(), first_instruction); + ASSERT_EQ(block->GetLastInstruction(), got); + ASSERT_EQ(first_instruction->GetNext(), second_instruction); + ASSERT_EQ(first_instruction->GetPrevious(), nullptr); + ASSERT_EQ(second_instruction->GetNext(), got); + ASSERT_EQ(second_instruction->GetPrevious(), first_instruction); + ASSERT_EQ(got->GetNext(), nullptr); + ASSERT_EQ(got->GetPrevious(), second_instruction); +} + +} // namespace art diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index f9ae529..e4f9371 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -18,6 +18,7 @@ #include "base/stringprintf.h" #include "builder.h" +#include "code_generator.h" #include "dex_file.h" #include "dex_instruction.h" #include "graph_visualizer.h" @@ -41,8 +42,11 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num ASSERT_NE(graph, nullptr); graph->BuildDominatorTree(); + graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index c797497..987c5f2 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -15,6 +15,7 @@ */ #include "builder.h" +#include "code_generator.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" @@ -56,14 +57,16 @@ TEST(LiveRangesTest, CFG1) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(8u, range->GetEnd()); + ASSERT_EQ(9u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(1); ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr); ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition()); @@ -101,14 +104,15 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(2u, range->GetStart()); // Last use is the return instruction. - ASSERT_EQ(22u, range->GetEnd()); + ASSERT_EQ(23u, range->GetEnd()); HBasicBlock* block = graph->GetBlocks().Get(3); ASSERT_TRUE(block->GetLastInstruction()->AsReturn() != nullptr); ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition()); @@ -149,25 +153,26 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); - // Test for the 0 constant. - LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); + // Test for the 4 constant. + LiveInterval* interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); LiveRange* range = interval->GetFirstRange(); - ASSERT_EQ(2u, range->GetStart()); + ASSERT_EQ(4u, range->GetStart()); // Last use is the phi at the return block so instruction is live until // the end of the then block. ASSERT_EQ(18u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); - // Test for the 4 constant. - interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); + // Test for the 0 constant. + interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); // The then branch is a hole for this constant, therefore its interval has 2 ranges. // First range starts from the definition and ends at the if block. range = interval->GetFirstRange(); - ASSERT_EQ(4u, range->GetStart()); - // 9 is the end of the if block. + ASSERT_EQ(2u, range->GetStart()); + // 14 is the end of the if block. ASSERT_EQ(14u, range->GetEnd()); // Second range is the else block. range = range->GetNext(); @@ -179,8 +184,9 @@ TEST(LiveRangesTest, CFG3) { // Test for the phi. interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval(); range = interval->GetFirstRange(); + ASSERT_EQ(22u, liveness.GetInstructionFromSsaIndex(3)->GetLifetimePosition()); ASSERT_EQ(22u, range->GetStart()); - ASSERT_EQ(24u, range->GetEnd()); + ASSERT_EQ(25u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } @@ -223,7 +229,8 @@ TEST(LiveRangesTest, Loop) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); // Test for the 0 constant. @@ -248,7 +255,7 @@ TEST(LiveRangesTest, Loop) { range = interval->GetFirstRange(); // The instruction is live until the return instruction after the loop. ASSERT_EQ(6u, range->GetStart()); - ASSERT_EQ(26u, range->GetEnd()); + ASSERT_EQ(27u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the phi. @@ -256,7 +263,7 @@ TEST(LiveRangesTest, Loop) { range = interval->GetFirstRange(); // Instruction is consumed by the if. ASSERT_EQ(14u, range->GetStart()); - ASSERT_EQ(16u, range->GetEnd()); + ASSERT_EQ(17u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 7a33620..2d0bc39 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -15,6 +15,7 @@ */ #include "builder.h" +#include "code_generator.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" @@ -48,7 +49,8 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->BuildDominatorTree(); graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, InstructionSet::kX86); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); std::ostringstream buffer; @@ -69,17 +71,17 @@ static void TestCode(const uint16_t* data, const char* expected) { TEST(LivenessTest, CFG1) { const char* expected = "Block 0\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n" + " live in: (0)\n" + " live out: (0)\n" + " kill: (1)\n" "Block 1\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n" + " live in: (0)\n" + " live out: (0)\n" + " kill: (0)\n" "Block 2\n" - " live in: ()\n" - " live out: ()\n" - " kill: ()\n"; + " live in: (0)\n" + " live out: (0)\n" + " kill: (0)\n"; // Constant is not used. const uint16_t data[] = ONE_REGISTER_CODE_ITEM( diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 752466b..2a97fad 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -35,7 +35,7 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_.Get(i); for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { - block->GetSuccessors().Get(j)->RemovePredecessor(block, false); + block->GetSuccessors().Get(j)->RemovePredecessor(block); } for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { block->RemovePhi(it.Current()->AsPhi()); @@ -143,8 +143,7 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { HBasicBlock* new_block = new (arena_) HBasicBlock(this); AddBlock(new_block); new_block->AddInstruction(new (arena_) HGoto()); - block->RemoveSuccessor(successor); - block->AddSuccessor(new_block); + block->ReplaceSuccessor(successor, new_block); new_block->AddSuccessor(successor); if (successor->IsLoopHeader()) { // If we split at a back edge boundary, make the new block the back edge. @@ -168,8 +167,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { new_back_edge->AddInstruction(new (arena_) HGoto()); for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - header->RemovePredecessor(back_edge); - back_edge->AddSuccessor(new_back_edge); + back_edge->ReplaceSuccessor(header, new_back_edge); } info->ClearBackEdges(); info->AddBackEdge(new_back_edge); @@ -190,9 +188,8 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); if (predecessor != back_edge) { - header->RemovePredecessor(predecessor); + predecessor->ReplaceSuccessor(header, pre_header); pred--; - predecessor->AddSuccessor(pre_header); } } pre_header->AddSuccessor(header); @@ -294,12 +291,20 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) { DCHECK(cursor->AsPhi() == nullptr); DCHECK(instruction->AsPhi() == nullptr); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); instruction->next_ = cursor; instruction->previous_ = cursor->previous_; cursor->previous_ = instruction; if (GetFirstInstruction() == cursor) { instructions_.first_instruction_ = instruction; + } else { + instruction->previous_->next_ = instruction; } + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); } static void Add(HInstructionList* instruction_list, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b1c8016..68848de 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -283,18 +283,16 @@ class HBasicBlock : public ArenaObject { block->predecessors_.Add(this); } - void RemovePredecessor(HBasicBlock* block, bool remove_in_successor = true) { - predecessors_.Delete(block); - if (remove_in_successor) { - block->successors_.Delete(this); - } + void ReplaceSuccessor(HBasicBlock* existing, HBasicBlock* new_block) { + size_t successor_index = GetSuccessorIndexOf(existing); + DCHECK_NE(successor_index, static_cast<size_t>(-1)); + existing->RemovePredecessor(this); + new_block->predecessors_.Add(this); + successors_.Put(successor_index, new_block); } - void RemoveSuccessor(HBasicBlock* block, bool remove_in_predecessor = true) { - successors_.Delete(block); - if (remove_in_predecessor) { - block->predecessors_.Delete(this); - } + void RemovePredecessor(HBasicBlock* block) { + predecessors_.Delete(block); } void ClearAllPredecessors() { @@ -315,6 +313,15 @@ class HBasicBlock : public ArenaObject { return -1; } + size_t GetSuccessorIndexOf(HBasicBlock* successor) { + for (size_t i = 0, e = successors_.Size(); i < e; ++i) { + if (successors_.Get(i) == successor) { + return i; + } + } + return -1; + } + void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -455,6 +462,7 @@ class HInstruction : public ArenaObject { virtual void SetRawInputAt(size_t index, HInstruction* input) = 0; virtual bool NeedsEnvironment() const { return false; } + virtual bool IsControlFlow() const { return false; } void AddUseAt(HInstruction* user, size_t index) { uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_); @@ -733,7 +741,9 @@ class HTemplateInstruction: public HInstruction { // instruction that branches to the exit block. class HReturnVoid : public HTemplateInstruction<0> { public: - HReturnVoid() { } + HReturnVoid() {} + + virtual bool IsControlFlow() const { return true; } DECLARE_INSTRUCTION(ReturnVoid); @@ -749,6 +759,8 @@ class HReturn : public HTemplateInstruction<1> { SetRawInputAt(0, value); } + virtual bool IsControlFlow() const { return true; } + DECLARE_INSTRUCTION(Return); private: @@ -760,7 +772,9 @@ class HReturn : public HTemplateInstruction<1> { // exit block. class HExit : public HTemplateInstruction<0> { public: - HExit() { } + HExit() {} + + virtual bool IsControlFlow() const { return true; } DECLARE_INSTRUCTION(Exit); @@ -771,12 +785,14 @@ class HExit : public HTemplateInstruction<0> { // Jumps from one block to another. class HGoto : public HTemplateInstruction<0> { public: - HGoto() { } + HGoto() {} HBasicBlock* GetSuccessor() const { return GetBlock()->GetSuccessors().Get(0); } + virtual bool IsControlFlow() const { return true; } + DECLARE_INSTRUCTION(Goto); private: @@ -799,6 +815,8 @@ class HIf : public HTemplateInstruction<1> { return GetBlock()->GetSuccessors().Get(1); } + virtual bool IsControlFlow() const { return true; } + DECLARE_INSTRUCTION(If); private: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index dfbb488..3dc0928 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -131,7 +131,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer.DumpGraph("ssa"); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); + SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); visualizer.DumpGraph("liveness"); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index dd175d2..8c6eb2a 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -22,6 +22,7 @@ namespace art { static constexpr size_t kMaxLifetimePosition = -1; +static constexpr size_t kDefaultNumberOfSpillSlots = 4; RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen) : allocator_(allocator), @@ -30,6 +31,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenera handled_(allocator, 0), active_(allocator, 0), inactive_(allocator, 0), + spill_slots_(allocator, kDefaultNumberOfSpillSlots), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), @@ -78,11 +80,39 @@ bool RegisterAllocator::ValidateInternal(const SsaLivenessAnalysis& liveness, intervals.Add(instruction->GetLiveInterval()); } } - return ValidateIntervals(intervals, codegen_, allocator_, processing_core_registers_, - log_fatal_on_failure); + return ValidateIntervals(intervals, spill_slots_.Size(), codegen_, allocator_, + processing_core_registers_, log_fatal_on_failure); } -bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ranges, +class AllRangesIterator : public ValueObject { + public: + explicit AllRangesIterator(LiveInterval* interval) + : current_interval_(interval), + current_range_(interval->GetFirstRange()) {} + + bool Done() const { return current_interval_ == nullptr; } + LiveRange* CurrentRange() const { return current_range_; } + LiveInterval* CurrentInterval() const { return current_interval_; } + + void Advance() { + current_range_ = current_range_->GetNext(); + if (current_range_ == nullptr) { + current_interval_ = current_interval_->GetNextSibling(); + if (current_interval_ != nullptr) { + current_range_ = current_interval_->GetFirstRange(); + } + } + } + + private: + LiveInterval* current_interval_; + LiveRange* current_range_; + + DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); +}; + +bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + size_t number_of_spill_slots, const CodeGenerator& codegen, ArenaAllocator* allocator, bool processing_core_registers, @@ -90,25 +120,40 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ra size_t number_of_registers = processing_core_registers ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); - GrowableArray<ArenaBitVector*> bit_vectors(allocator, number_of_registers); + GrowableArray<ArenaBitVector*> liveness_of_values( + allocator, number_of_registers + number_of_spill_slots); // Allocate a bit vector per register. A live interval that has a register // allocated will populate the associated bit vector based on its live ranges. - for (size_t i = 0; i < number_of_registers; i++) { - bit_vectors.Add(new (allocator) ArenaBitVector(allocator, 0, true)); + for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { + liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true)); } - for (size_t i = 0, e = ranges.Size(); i < e; ++i) { - LiveInterval* current = ranges.Get(i); - do { - if (!current->HasRegister()) { - continue; + for (size_t i = 0, e = intervals.Size(); i < e; ++i) { + for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { + LiveInterval* current = it.CurrentInterval(); + if (current->GetParent()->HasSpillSlot()) { + BitVector* liveness_of_spill_slot = liveness_of_values.Get( + number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize); + for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { + if (liveness_of_spill_slot->IsBitSet(j)) { + if (log_fatal_on_failure) { + std::ostringstream message; + message << "Spill slot conflict at " << j; + LOG(FATAL) << message.str(); + } else { + return false; + } + } else { + liveness_of_spill_slot->SetBit(j); + } + } } - BitVector* vector = bit_vectors.Get(current->GetRegister()); - LiveRange* range = current->GetFirstRange(); - do { - for (size_t j = range->GetStart(); j < range->GetEnd(); ++j) { - if (vector->IsBitSet(j)) { + + if (current->HasRegister()) { + BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); + for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { + if (liveness_of_register->IsBitSet(j)) { if (log_fatal_on_failure) { std::ostringstream message; message << "Register conflict at " << j << " for "; @@ -122,11 +167,11 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& ra return false; } } else { - vector->SetBit(j); + liveness_of_register->SetBit(j); } } - } while ((range = range->GetNext()) != nullptr); - } while ((current = current->GetNextSibling()) != nullptr); + } + } } return true; } @@ -270,7 +315,7 @@ bool RegisterAllocator::IsBlocked(int reg) const { bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { size_t first_register_use = current->FirstRegisterUse(); if (current->FirstRegisterUse() == kNoLifetime) { - // TODO: Allocate spill slot for `current`. + AllocateSpillSlotFor(current); return false; } @@ -317,6 +362,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { if (first_register_use >= next_use[reg]) { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. + AllocateSpillSlotFor(current); LiveInterval* split = Split(current, first_register_use - 1); AddToUnhandled(split); return false; @@ -370,9 +416,42 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) return interval; } else { LiveInterval* new_interval = interval->SplitAt(position); - // TODO: Allocate spill slot for `interval`. return new_interval; } } +void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { + LiveInterval* parent = interval->GetParent(); + + // An instruction gets a spill slot for its entire lifetime. If the parent + // of this interval already has a spill slot, there is nothing to do. + if (parent->HasSpillSlot()) { + return; + } + + // Find when this instruction dies. + LiveInterval* last_sibling = interval; + while (last_sibling->GetNextSibling() != nullptr) { + last_sibling = last_sibling->GetNextSibling(); + } + size_t end = last_sibling->GetEnd(); + + // Find an available spill slot. + size_t slot = 0; + for (size_t e = spill_slots_.Size(); slot < e; ++slot) { + if (spill_slots_.Get(slot) <= parent->GetStart()) { + break; + } + } + + if (slot == spill_slots_.Size()) { + // We need a new spill slot. + spill_slots_.Add(end); + } else { + spill_slots_.Put(slot, end); + } + + interval->GetParent()->SetSpillSlot(slot * kVRegSize); +} + } // namespace art diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index e575b96..3393a04 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -55,6 +55,7 @@ class RegisterAllocator { // Helper method for validation. Used by unit testing. static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + size_t number_of_spill_slots, const CodeGenerator& codegen, ArenaAllocator* allocator, bool processing_core_registers, @@ -75,6 +76,9 @@ class RegisterAllocator { // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; + // Allocate a spill slot for the given interval. + void AllocateSpillSlotFor(LiveInterval* interval); + // Helper methods. void AllocateRegistersInternal(const SsaLivenessAnalysis& liveness); bool ValidateInternal(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) const; @@ -98,6 +102,9 @@ class RegisterAllocator { // That is, they have a lifetime hole that spans the start of the new interval. GrowableArray<LiveInterval*> inactive_; + // The spill slots allocated for live intervals. + GrowableArray<size_t> spill_slots_; + // True if processing core registers. False if processing floating // point registers. bool processing_core_registers_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 019d0f8..ff9b9be 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -40,9 +40,9 @@ static bool Check(const uint16_t* data) { graph->BuildDominatorTree(); graph->TransformToSSA(); graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph); - liveness.Analyze(); CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); RegisterAllocator register_allocator(&allocator, *codegen); register_allocator.AllocateRegisters(liveness); return register_allocator.Validate(liveness, false); @@ -64,10 +64,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { static constexpr size_t ranges[][2] = {{0, 42}}; intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Reset(); } @@ -77,10 +79,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Reset(); } @@ -90,10 +94,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Reset(); } @@ -103,10 +109,12 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 47}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Reset(); } @@ -117,14 +125,17 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { intervals.Get(0)->SplitAt(43); static constexpr size_t ranges2[][2] = {{42, 47}}; intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(1)->SetRegister(0); // Sibling of the first interval has no register allocated to it. - ASSERT_TRUE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_TRUE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); intervals.Get(0)->GetNextSibling()->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals(intervals, *codegen, &allocator, true, false)); + ASSERT_FALSE(RegisterAllocator::ValidateIntervals( + intervals, 0, *codegen, &allocator, true, false)); } } @@ -286,9 +297,9 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - SsaLivenessAnalysis liveness(*graph); - liveness.Analyze(); CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); RegisterAllocator register_allocator(&allocator, *codegen); register_allocator.AllocateRegisters(liveness); ASSERT_TRUE(register_allocator.Validate(liveness, false)); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 1284a97..471307e 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -15,22 +15,12 @@ */ #include "ssa_builder.h" + #include "nodes.h" +#include "ssa_type_propagation.h" namespace art { -static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { - // We trust the verifier has already done the necessary checking. - switch (existing) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - case Primitive::kPrimNot: - return existing; - default: - return new_type; - } -} - void SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block visited // (with the exception of loops) in order to create the right environment for that @@ -44,18 +34,18 @@ void SsaBuilder::BuildSsa() { HBasicBlock* block = loop_headers_.Get(i); for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - Primitive::Type type = Primitive::kPrimVoid; for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) { HInstruction* input = ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber()); phi->AddInput(input); - type = MergeTypes(type, input->GetType()); } - phi->SetType(type); } } - // TODO: Now that the type of loop phis is set, we need a type propagation phase. - // 3) Clear locals. + // 3) Propagate types of phis. + SsaTypePropagation type_propagation(GetGraph()); + type_propagation.Run(); + + // 4) Clear locals. // TODO: Move this to a dead code eliminator phase. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); @@ -102,8 +92,8 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local); if (current == nullptr) { -// one_predecessor_has_no_value = true; -// break; + one_predecessor_has_no_value = true; + break; } else if (current != value) { is_different = true; } @@ -118,16 +108,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { if (is_different) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid); - Primitive::Type type = Primitive::kPrimVoid; for (size_t i = 0; i < block->GetPredecessors().Size(); i++) { HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(i), local); - // We need to merge the incoming types, as the Dex format does not - // guarantee the inputs have the same type. In particular the 0 constant is - // used for all types, but the graph builder treats it as an int. - type = MergeTypes(type, value->GetType()); phi->SetRawInputAt(i, value); } - phi->SetType(type); block->AddPhi(phi); value = phi; } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index dc4b2e5..50ea00f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -15,6 +15,8 @@ */ #include "ssa_liveness_analysis.h" + +#include "code_generator.h" #include "nodes.h" namespace art { @@ -80,38 +82,6 @@ static void VisitBlockForLinearization(HBasicBlock* block, order->Add(block); } -class HLinearOrderIterator : public ValueObject { - public: - explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order) - : post_order_(post_order), index_(post_order.Size()) {} - - bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return post_order_.Get(index_ -1); } - void Advance() { --index_; DCHECK_GE(index_, 0U); } - - private: - const GrowableArray<HBasicBlock*>& post_order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); -}; - -class HLinearPostOrderIterator : public ValueObject { - public: - explicit HLinearPostOrderIterator(const GrowableArray<HBasicBlock*>& post_order) - : post_order_(post_order), index_(0) {} - - bool Done() const { return index_ == post_order_.Size(); } - HBasicBlock* Current() const { return post_order_.Get(index_); } - void Advance() { ++index_; } - - private: - const GrowableArray<HBasicBlock*>& post_order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); -}; - void SsaLivenessAnalysis::LinearizeGraph() { // For simplicity of the implementation, we create post linear order. The order for // computing live ranges is the reverse of that order. @@ -131,30 +101,38 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); - lifetime_position += 2; for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->HasUses()) { + current->Accept(codegen_->GetLocationBuilder()); + LocationSummary* locations = current->GetLocations(); + if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType())); + new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current)); } current->SetLifetimePosition(lifetime_position); } + lifetime_position += 2; + + // Add a null marker to notify we are starting a block. + instructions_from_lifetime_position_.Add(nullptr); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->HasUses()) { + current->Accept(codegen_->GetLocationBuilder()); + LocationSummary* locations = current->GetLocations(); + if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType())); + new (graph_.GetArena()) LiveInterval(graph_.GetArena(), current->GetType(), current)); } + instructions_from_lifetime_position_.Add(current); current->SetLifetimePosition(lifetime_position); lifetime_position += 2; } @@ -165,7 +143,7 @@ void SsaLivenessAnalysis::NumberInstructions() { } void SsaLivenessAnalysis::ComputeLiveness() { - for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block_infos_.Put( block->GetBlockId(), @@ -186,7 +164,7 @@ void SsaLivenessAnalysis::ComputeLiveness() { void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HLinearPostOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) { + for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); BitVector* kill = GetKillSet(*block); @@ -201,7 +179,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { for (HInstructionIterator it(successor->GetPhis()); !it.Done(); it.Advance()) { HInstruction* phi = it.Current(); HInstruction* input = phi->InputAt(phi_input_index); - input->GetLiveInterval()->AddPhiUse(phi, block); + input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block); // A phi input whose last user is the phi dies at the end of the predecessor block, // and not at the phi's lifetime position. live_in->SetBit(input->GetSsaIndex()); @@ -228,7 +206,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { HInstruction* input = current->InputAt(i); DCHECK(input->HasSsaIndex()); live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current); + input->GetLiveInterval()->AddUse(current, i, false); } if (current->HasEnvironment()) { @@ -239,7 +217,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { if (instruction != nullptr) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); - instruction->GetLiveInterval()->AddUse(current); + instruction->GetLiveInterval()->AddUse(current, i, true); } } } @@ -251,6 +229,10 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { if (current->HasSsaIndex()) { kill->SetBit(current->GetSsaIndex()); live_in->ClearBit(current->GetSsaIndex()); + LiveInterval* interval = current->GetLiveInterval(); + DCHECK((interval->GetFirstRange() == nullptr) + || (interval->GetStart() == current->GetLifetimePosition())); + interval->SetFrom(current->GetLifetimePosition()); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 4d56e1f..7903ad6 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -21,6 +21,8 @@ namespace art { +class CodeGenerator; + class BlockInfo : public ArenaObject { public: BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) @@ -87,9 +89,17 @@ class LiveRange : public ArenaObject { */ class UsePosition : public ArenaObject { public: - UsePosition(HInstruction* user, size_t position, UsePosition* next) - : user_(user), position_(position), next_(next) { - DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition()); + UsePosition(HInstruction* user, + size_t input_index, + bool is_environment, + size_t position, + UsePosition* next) + : user_(user), + input_index_(input_index), + is_environment_(is_environment), + position_(position), + next_(next) { + DCHECK(user->AsPhi() != nullptr || GetPosition() == user->GetLifetimePosition() + 1); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } @@ -99,12 +109,18 @@ class UsePosition : public ArenaObject { HInstruction* GetUser() const { return user_; } - void Dump(std::ostream& stream) { + bool GetIsEnvironment() const { return is_environment_; } + + size_t GetInputIndex() const { return input_index_; } + + void Dump(std::ostream& stream) const { stream << position_; } private: HInstruction* const user_; + const size_t input_index_; + const bool is_environment_; const size_t position_; UsePosition* const next_; @@ -117,17 +133,33 @@ class UsePosition : public ArenaObject { */ class LiveInterval : public ArenaObject { public: - LiveInterval(ArenaAllocator* allocator, Primitive::Type type) + LiveInterval(ArenaAllocator* allocator, Primitive::Type type, HInstruction* defined_by = nullptr) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), first_use_(nullptr), type_(type), next_sibling_(nullptr), - register_(kNoRegister) {} + parent_(this), + register_(kNoRegister), + spill_slot_(kNoSpillSlot), + is_fixed_(false), + defined_by_(defined_by) {} + + static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { + LiveInterval* interval = new (allocator) LiveInterval(allocator, type); + interval->SetRegister(reg); + interval->is_fixed_ = true; + return interval; + } + + bool IsFixed() const { return is_fixed_; } - void AddUse(HInstruction* instruction) { - size_t position = instruction->GetLifetimePosition(); + void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { + // Set the use within the instruction. + // TODO: Use the instruction's location to know whether the instruction can die + // at entry, or needs to say alive within the user. + size_t position = instruction->GetLifetimePosition() + 1; size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); size_t end_block_position = instruction->GetBlock()->GetLifetimeEnd(); if (first_range_ == nullptr) { @@ -143,12 +175,14 @@ class LiveInterval : public ArenaObject { // There is a hole in the interval. Create a new range. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } - first_use_ = new (allocator_) UsePosition(instruction, position, first_use_); + first_use_ = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_); } - void AddPhiUse(HInstruction* instruction, HBasicBlock* block) { + void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->AsPhi() != nullptr); - first_use_ = new (allocator_) UsePosition(instruction, block->GetLifetimeEnd(), first_use_); + first_use_ = new (allocator_) UsePosition( + instruction, input_index, false, block->GetLifetimeEnd(), first_use_); } void AddRange(size_t start, size_t end) { @@ -178,11 +212,23 @@ class LiveInterval : public ArenaObject { } } + bool HasSpillSlot() const { return spill_slot_ != kNoSpillSlot; } + void SetSpillSlot(int slot) { spill_slot_ = slot; } + int GetSpillSlot() const { return spill_slot_; } + void SetFrom(size_t from) { - DCHECK(first_range_ != nullptr); - first_range_->start_ = from; + if (first_range_ != nullptr) { + first_range_->start_ = from; + } else { + // Instruction without uses. + DCHECK(!defined_by_->HasUses()); + DCHECK(from == defined_by_->GetLifetimePosition()); + first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr); + } } + LiveInterval* GetParent() const { return parent_; } + LiveRange* GetFirstRange() const { return first_range_; } int GetRegister() const { return register_; } @@ -190,11 +236,11 @@ class LiveInterval : public ArenaObject { void ClearRegister() { register_ = kNoRegister; } bool HasRegister() const { return register_ != kNoRegister; } - bool IsDeadAt(size_t position) { + bool IsDeadAt(size_t position) const { return last_range_->GetEnd() <= position; } - bool Covers(size_t position) { + bool Covers(size_t position) const { LiveRange* current = first_range_; while (current != nullptr) { if (position >= current->GetStart() && position < current->GetEnd()) { @@ -208,27 +254,10 @@ class LiveInterval : public ArenaObject { /** * Returns the first intersection of this interval with `other`. */ - size_t FirstIntersectionWith(LiveInterval* other) { - // We only call this method if there is a lifetime hole in this interval - // at the start of `other`. - DCHECK(!Covers(other->GetStart())); - DCHECK_LE(GetStart(), other->GetStart()); - // Move to the range in this interval that starts after the other interval. - size_t other_start = other->GetStart(); - LiveRange* my_range = first_range_; - while (my_range != nullptr) { - if (my_range->GetStart() >= other_start) { - break; - } else { - my_range = my_range->GetNext(); - } - } - if (my_range == nullptr) { - return kNoLifetime; - } - + size_t FirstIntersectionWith(LiveInterval* other) const { // Advance both intervals and find the first matching range start in // this interval. + LiveRange* my_range = first_range_; LiveRange* other_range = other->first_range_; do { if (my_range->IntersectsWith(*other_range)) { @@ -252,16 +281,33 @@ class LiveInterval : public ArenaObject { return first_range_->GetStart(); } + size_t GetEnd() const { + return last_range_->GetEnd(); + } + size_t FirstRegisterUseAfter(size_t position) const { + if (position == GetStart() && defined_by_ != nullptr) { + Location location = defined_by_->GetLocations()->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && defined_by_->GetLocations()->InAt(0).GetPolicy() == Location::kRequiresRegister)) { + return position; + } + } + } + UsePosition* use = first_use_; while (use != nullptr) { size_t use_position = use->GetPosition(); - // TODO: Once we plug the Locations builder of the code generator - // to the register allocator, this method must be adjusted. We - // test if there is an environment, because these are currently the only - // instructions that could have more uses than the number of registers. - if (use_position >= position && !use->GetUser()->NeedsEnvironment()) { - return use_position; + if (use_position >= position && !use->GetIsEnvironment()) { + Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + return use_position; + } } use = use->GetNext(); } @@ -272,10 +318,18 @@ class LiveInterval : public ArenaObject { return FirstRegisterUseAfter(GetStart()); } + UsePosition* GetFirstUse() const { + return first_use_; + } + Primitive::Type GetType() const { return type_; } + HInstruction* GetDefinedBy() const { + return defined_by_; + } + /** * Split this interval at `position`. This interval is changed to: * [start ... position). @@ -284,7 +338,7 @@ class LiveInterval : public ArenaObject { * [position ... end) */ LiveInterval* SplitAt(size_t position) { - DCHECK(next_sibling_ == nullptr); + DCHECK(!is_fixed_); DCHECK_GT(position, GetStart()); if (last_range_->GetEnd() <= position) { @@ -293,7 +347,9 @@ class LiveInterval : public ArenaObject { } LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_); + new_interval->next_sibling_ = next_sibling_; next_sibling_ = new_interval; + new_interval->parent_ = parent_; new_interval->first_use_ = first_use_; LiveRange* current = first_range_; @@ -383,21 +439,36 @@ class LiveInterval : public ArenaObject { // Live interval that is the result of a split. LiveInterval* next_sibling_; + // The first interval from which split intervals come from. + LiveInterval* parent_; + // The register allocated to this interval. int register_; + // The spill slot allocated to this interval. + int spill_slot_; + + // Whether the interval is for a fixed register. + bool is_fixed_; + + // The instruction represented by this interval. + HInstruction* const defined_by_; + static constexpr int kNoRegister = -1; + static constexpr int kNoSpillSlot = -1; DISALLOW_COPY_AND_ASSIGN(LiveInterval); }; class SsaLivenessAnalysis : public ValueObject { public: - explicit SsaLivenessAnalysis(const HGraph& graph) + SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen) : graph_(graph), + codegen_(codegen), linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()), block_infos_(graph.GetArena(), graph.GetBlocks().Size()), instructions_from_ssa_index_(graph.GetArena(), 0), + instructions_from_lifetime_position_(graph.GetArena(), 0), number_of_ssa_values_(0) { block_infos_.SetSize(graph.GetBlocks().Size()); } @@ -424,6 +495,14 @@ class SsaLivenessAnalysis : public ValueObject { return instructions_from_ssa_index_.Get(index); } + HInstruction* GetInstructionFromPosition(size_t index) const { + return instructions_from_lifetime_position_.Get(index); + } + + size_t GetMaxLifetimePosition() const { + return instructions_from_lifetime_position_.Size() * 2 - 1; + } + size_t GetNumberOfSsaValues() const { return number_of_ssa_values_; } @@ -458,14 +537,52 @@ class SsaLivenessAnalysis : public ValueObject { bool UpdateLiveOut(const HBasicBlock& block); const HGraph& graph_; + CodeGenerator* const codegen_; GrowableArray<HBasicBlock*> linear_post_order_; GrowableArray<BlockInfo*> block_infos_; + + // Temporary array used when computing live_in, live_out, and kill sets. GrowableArray<HInstruction*> instructions_from_ssa_index_; + + // Temporary array used when inserting moves in the graph. + GrowableArray<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; +class HLinearOrderIterator : public ValueObject { + public: + explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness) + : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {} + + bool Done() const { return index_ == 0; } + HBasicBlock* Current() const { return post_order_.Get(index_ -1); } + void Advance() { --index_; DCHECK_GE(index_, 0U); } + + private: + const GrowableArray<HBasicBlock*>& post_order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); +}; + +class HLinearPostOrderIterator : public ValueObject { + public: + explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness) + : post_order_(liveness.GetLinearPostOrder()), index_(0) {} + + bool Done() const { return index_ == post_order_.Size(); } + HBasicBlock* Current() const { return post_order_.Get(index_); } + void Advance() { ++index_; } + + private: + const GrowableArray<HBasicBlock*>& post_order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 485ea27..088a5c4 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -87,6 +87,13 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TransformToSSA(); ReNumberInstructions(graph); + // Test that phis had their type set. + for (size_t i = 0, e = graph->GetBlocks().Size(); i < e; ++i) { + for (HInstructionIterator it(graph->GetBlocks().Get(i)->GetPhis()); !it.Done(); it.Advance()) { + ASSERT_NE(it.Current()->GetType(), Primitive::kPrimVoid); + } + } + SsaPrettyPrinter printer(graph); printer.VisitInsertionOrder(); @@ -99,7 +106,7 @@ TEST(SsaTest, CFG1) { "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [2, 2]\n" " 1: Goto\n" - "BasicBlock 1, pred: 0, succ: 2, 5\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" " 2: Equal(0, 0) [3]\n" " 3: If(2)\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -129,7 +136,7 @@ TEST(SsaTest, CFG2) { " 0: IntConstant 0 [6, 3, 3]\n" " 1: IntConstant 4 [6]\n" " 2: Goto\n" - "BasicBlock 1, pred: 0, succ: 2, 5\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" " 3: Equal(0, 0) [4]\n" " 4: If(3)\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -409,7 +416,7 @@ TEST(SsaTest, Loop7) { " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 5, succ: 3, 8\n" + "BasicBlock 2, pred: 1, 5, succ: 8, 3\n" " 5: Phi(0, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" @@ -467,7 +474,7 @@ TEST(SsaTest, LocalInIf) { " 0: IntConstant 0 [3, 3]\n" " 1: IntConstant 4\n" " 2: Goto\n" - "BasicBlock 1, pred: 0, succ: 2, 5\n" + "BasicBlock 1, pred: 0, succ: 5, 2\n" " 3: Equal(0, 0) [4]\n" " 4: If(3)\n" "BasicBlock 2, pred: 1, succ: 3\n" @@ -489,4 +496,43 @@ TEST(SsaTest, LocalInIf) { TestCode(data, expected); } +TEST(SsaTest, MultiplePredecessors) { + // Test that we do not create a phi when one predecessor + // does not update the local. + const char* expected = + "BasicBlock 0, succ: 1\n" + " 0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n" + " 1: Goto\n" + "BasicBlock 1, pred: 0, succ: 3, 2\n" + " 2: Equal(0, 0) [3]\n" + " 3: If(2)\n" + "BasicBlock 2, pred: 1, succ: 5\n" + " 4: Add(0, 0)\n" + " 5: Goto\n" + "BasicBlock 3, pred: 1, succ: 7, 4\n" + " 6: Equal(0, 0) [7]\n" + " 7: If(6)\n" + "BasicBlock 4, pred: 3, succ: 5\n" + " 8: Add(0, 0)\n" + " 9: Goto\n" + // This block should not get a phi for local 1. + "BasicBlock 5, pred: 2, 4, 7, succ: 6\n" + " 10: ReturnVoid\n" + "BasicBlock 6, pred: 5\n" + " 11: Exit\n" + "BasicBlock 7, pred: 3, succ: 5\n" + " 12: Goto\n"; + + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 5, + Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, + Instruction::GOTO | 0x0500, + Instruction::IF_EQ, 4, + Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, + Instruction::RETURN_VOID); + + TestCode(data, expected); +} + } // namespace art diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc new file mode 100644 index 0000000..53fa74e --- /dev/null +++ b/compiler/optimizing/ssa_type_propagation.cc @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ssa_type_propagation.h" + +#include "nodes.h" + +namespace art { + +static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { + // We trust the verifier has already done the necessary checking. + switch (existing) { + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + case Primitive::kPrimNot: + return existing; + default: + return new_type; + } +} + +// Re-compute and update the type of the instruction. Returns +// whether or not the type was changed. +static bool UpdateType(HPhi* phi) { + Primitive::Type existing = phi->GetType(); + + Primitive::Type new_type = Primitive::kPrimVoid; + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + Primitive::Type input_type = phi->InputAt(i)->GetType(); + new_type = MergeTypes(new_type, input_type); + } + phi->SetType(new_type); + return existing != new_type; +} + +void SsaTypePropagation::Run() { + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + ProcessWorklist(); +} + +void SsaTypePropagation::VisitBasicBlock(HBasicBlock* block) { + if (block->IsLoopHeader()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + // Set the initial type for the phi. Use the non back edge input for reaching + // a fixed point faster. + phi->SetType(phi->InputAt(0)->GetType()); + AddToWorklist(phi); + } + } else { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + if (UpdateType(phi)) { + AddDependentInstructionsToWorklist(phi); + } + } + } +} + +void SsaTypePropagation::ProcessWorklist() { + while (!worklist_.IsEmpty()) { + HPhi* instruction = worklist_.Pop(); + if (UpdateType(instruction)) { + AddDependentInstructionsToWorklist(instruction); + } + } +} + +void SsaTypePropagation::AddToWorklist(HPhi* instruction) { + worklist_.Add(instruction); +} + +void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { + for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->GetUser()->AsPhi(); + if (phi != nullptr) { + AddToWorklist(phi); + } + } +} + +} // namespace art diff --git a/compiler/optimizing/ssa_type_propagation.h b/compiler/optimizing/ssa_type_propagation.h new file mode 100644 index 0000000..5f471a9 --- /dev/null +++ b/compiler/optimizing/ssa_type_propagation.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ +#define ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ + +#include "nodes.h" + +namespace art { + +// Compute and propagate types of phis in the graph. +class SsaTypePropagation : public ValueObject { + public: + explicit SsaTypePropagation(HGraph* graph) + : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + + void Run(); + + private: + void VisitBasicBlock(HBasicBlock* block); + void ProcessWorklist(); + void AddToWorklist(HPhi* phi); + void AddDependentInstructionsToWorklist(HPhi* phi); + + HGraph* const graph_; + GrowableArray<HPhi*> worklist_; + + static constexpr size_t kDefaultWorklistSize = 8; + + DISALLOW_COPY_AND_ASSIGN(SsaTypePropagation); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SSA_TYPE_PROPAGATION_H_ diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc index ca4635d..6a39641 100644 --- a/compiler/utils/arena_allocator.cc +++ b/compiler/utils/arena_allocator.cc @@ -215,7 +215,7 @@ void ArenaAllocator::UpdateBytesAllocated() { } void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) { - size_t rounded_bytes = (bytes + 3 + kValgrindRedZoneBytes) & ~3; + size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8); if (UNLIKELY(ptr_ + rounded_bytes > end_)) { // Obtain a new block. ObtainNewArenaForAllocation(rounded_bytes); diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index dbe482d..ac3938f 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -156,7 +156,7 @@ class ArenaAllocator : private ArenaAllocatorStats { if (UNLIKELY(running_on_valgrind_)) { return AllocValgrind(bytes, kind); } - bytes = RoundUp(bytes, 4); + bytes = RoundUp(bytes, 8); if (UNLIKELY(ptr_ + bytes > end_)) { // Obtain a new block. ObtainNewArenaForAllocation(bytes); diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc index b8b0e6e..aeb2f76 100644 --- a/compiler/utils/scoped_arena_allocator.cc +++ b/compiler/utils/scoped_arena_allocator.cc @@ -92,7 +92,7 @@ void ArenaStack::UpdateBytesAllocated() { } void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) { - size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4); + size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8); uint8_t* ptr = top_ptr_; if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { ptr = AllocateFromNextArena(rounded_bytes); diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h index c090062..37799cb 100644 --- a/compiler/utils/scoped_arena_allocator.h +++ b/compiler/utils/scoped_arena_allocator.h @@ -67,7 +67,7 @@ class ArenaStack : private DebugStackRefCounter { if (UNLIKELY(running_on_valgrind_)) { return AllocValgrind(bytes, kind); } - size_t rounded_bytes = RoundUp(bytes, 4); + size_t rounded_bytes = RoundUp(bytes, 8); uint8_t* ptr = top_ptr_; if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { ptr = AllocateFromNextArena(rounded_bytes); diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 456e3b5..e6a6860 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -258,6 +258,17 @@ DISASSEMBLER_ENTRY(cmp, reg_in_opcode = true; target_specific = true; break; + case 0x63: + if (rex == 0x48) { + opcode << "movsxd"; + has_modrm = true; + load = true; + } else { + // In 32-bit mode (!supports_rex_) this is ARPL, with no REX prefix the functionality is the + // same as 'mov' but the use of the instruction is discouraged. + opcode << StringPrintf("unknown opcode '%02X'", *instr); + } + break; case 0x68: opcode << "push"; immediate_bytes = 4; break; case 0x69: opcode << "imul"; load = true; has_modrm = true; immediate_bytes = 4; break; case 0x6A: opcode << "push"; immediate_bytes = 1; break; diff --git a/runtime/Android.mk b/runtime/Android.mk index 7a832c1..8d532c7 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -26,6 +26,7 @@ LIBART_COMMON_SRC_FILES := \ base/hex_dump.cc \ base/logging.cc \ base/mutex.cc \ + base/scoped_flock.cc \ base/stringpiece.cc \ base/stringprintf.cc \ base/timing_logger.cc \ diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 00b94ec..9a877f6 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -484,7 +484,7 @@ ENTRY \c_name // Helper signature is always // (method_idx, *this_object, *caller_method, *self, sp) - ldr x2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE] // pass caller Method* + ldr w2, [sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE] // pass caller Method* mov x3, xSELF // pass Thread::Current mov x4, sp bl \cxx_name // (method_idx, this, caller, Thread*, SP) diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 56d51e2..3be0faf 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -82,9 +82,9 @@ class StubTest : public CommonRuntimeTest { "addl $4, %%esp" // Pop referrer : "=a" (result) // Use the result from eax - : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer) - // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx - : ); // clobber. + : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer) + // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx + : "memory"); // clobber. // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs, // but compilation fails when declaring that. #elif defined(__arm__) @@ -122,7 +122,7 @@ class StubTest : public CommonRuntimeTest { // Use the result from r0 : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), [referrer] "r"(referrer) - : ); // clobber. + : "memory"); // clobber. #elif defined(__aarch64__) __asm__ __volatile__( // Spill x0-x7 which we say we don't clobber. May contain args. @@ -255,7 +255,8 @@ class StubTest : public CommonRuntimeTest { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", - "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber. + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "memory"); // clobber. #elif defined(__x86_64__) // Note: Uses the native convention // TODO: Set the thread? @@ -268,9 +269,10 @@ class StubTest : public CommonRuntimeTest { ".cfi_adjust_cfa_offset -16\n\t" : "=a" (result) // Use the result from rax - : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer) - // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax - : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all + : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer) + // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax + : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "memory"); // clobber all // TODO: Should we clobber the other registers? #else LOG(WARNING) << "Was asked to invoke for an architecture I do not understand."; @@ -303,9 +305,9 @@ class StubTest : public CommonRuntimeTest { "addl $4, %%esp" // Pop referrer : "=a" (result) // Use the result from eax - : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden) - // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx - : ); // clobber. + : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden) + // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx + : "memory"); // clobber. // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs, // but compilation fails when declaring that. #elif defined(__arm__) @@ -343,9 +345,9 @@ class StubTest : public CommonRuntimeTest { "mov %[result], r0\n\t" // Save the result : [result] "=r" (result) // Use the result from r0 - : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), - [referrer] "r"(referrer), [hidden] "r"(hidden) - : ); // clobber. + : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), + [referrer] "r"(referrer), [hidden] "r"(hidden) + : "memory"); // clobber. #elif defined(__aarch64__) __asm__ __volatile__( // Spill x0-x7 which we say we don't clobber. May contain args. @@ -477,7 +479,8 @@ class StubTest : public CommonRuntimeTest { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", - "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber. + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "memory"); // clobber. #elif defined(__x86_64__) // Note: Uses the native convention // TODO: Set the thread? @@ -494,7 +497,8 @@ class StubTest : public CommonRuntimeTest { // Use the result from rax : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden) // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax - : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all + : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "memory"); // clobber all // TODO: Should we clobber the other registers? #else LOG(WARNING) << "Was asked to invoke for an architecture I do not understand."; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 07268ea..989ecf9 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1152,8 +1152,92 @@ DEFINE_FUNCTION art_quick_resolution_trampoline END_FUNCTION art_quick_resolution_trampoline DEFINE_FUNCTION art_quick_generic_jni_trampoline - int3 - int3 + SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME + // This also stores the native ArtMethod reference at the bottom of the stack. + + movl %esp, %ebp // save SP at callee-save frame + movl %esp, %edi + CFI_DEF_CFA_REGISTER(edi) + subl LITERAL(5120), %esp + // prepare for artQuickGenericJniTrampoline call + // (Thread*, SP) + // (esp) 4(esp) <= C calling convention + // fs:... ebp <= where they are + // Also: PLT, so need GOT in ebx. + + subl LITERAL(8), %esp // Padding for 16B alignment. + pushl %ebp // Pass SP (to ArtMethod). + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + SETUP_GOT_NOSAVE // Clobbers ebx. + call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) + // Drop call stack. + addl LITERAL(16), %esp + + // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save + // get the adjusted frame pointer + popl %ebp + + // Check for error, negative value. + test %eax, %eax + js .Lentry_error + + // release part of the alloca, get the code pointer + addl %eax, %esp + popl %eax + + // On x86 there are no registers passed, so nothing to pop here. + + // Native call. + call *%eax + + // Pop native stack, but keep the space that was reserved cookie. + movl %ebp, %esp + subl LITERAL(16), %esp // Alignment. + + // result sign extension is handled in C code + // prepare for artQuickGenericJniEndTrampoline call + // (Thread*, SP, result, result_f) + // (esp) 4(esp) 8(esp) 16(esp) <= C calling convention + // fs:... ebp eax:edx xmm0 <= where they are + + subl LITERAL(8), %esp // Pass float result. + movsd %xmm0, (%esp) + pushl %edx // Pass int result. + pushl %eax + pushl %ebp // Pass SP (to ArtMethod). + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + call PLT_SYMBOL(artQuickGenericJniEndTrampoline) + + // Tear down the alloca. + movl %edi, %esp + CFI_DEF_CFA_REGISTER(esp) + + // Pending exceptions possible. + mov %fs:THREAD_EXCEPTION_OFFSET, %ebx + testl %ebx, %ebx + jnz .Lexception_in_native + + // Tear down the callee-save frame. + addl MACRO_LITERAL(4), %esp // Remove padding + CFI_ADJUST_CFA_OFFSET(-4) + POP ecx + addl MACRO_LITERAL(4), %esp // Avoid edx, as it may be part of the result. + CFI_ADJUST_CFA_OFFSET(-4) + POP ebx + POP ebp // Restore callee saves + POP esi + POP edi + // store into fpr, for when it's a fpr return... + movd %eax, %xmm0 + movd %edx, %xmm1 + punpckldq %xmm1, %xmm0 + ret +.Lentry_error: + movl %edi, %esp + CFI_DEF_CFA_REGISTER(esp) +.Lexception_in_native: + RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + DELIVER_PENDING_EXCEPTION END_FUNCTION art_quick_generic_jni_trampoline DEFINE_FUNCTION art_quick_to_interpreter_bridge diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index d2ac0ad..c9220c8 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1002,15 +1002,12 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler END_FUNCTION art_quick_proxy_invoke_handler /* - * Called to resolve an imt conflict. Clobbers %rax (which will be clobbered later anyways). - * - * xmm0 is a hidden argument that holds the target method's dex method index. - * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler. + * Called to resolve an imt conflict. + * rax is a hidden argument that holds the target method's dex method index. */ DEFINE_FUNCTION art_quick_imt_conflict_trampoline movl 8(%rsp), %edi // load caller Method* movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi // load dex_cache_resolved_methods - movd %xmm0, %rax // get target method index stored in xmm0 movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi // load the target method jmp art_quick_invoke_interface_trampoline_local END_FUNCTION art_quick_imt_conflict_trampoline diff --git a/runtime/atomic.h b/runtime/atomic.h index 04daea8..ed83a33 100644 --- a/runtime/atomic.h +++ b/runtime/atomic.h @@ -392,7 +392,7 @@ template<class T> struct AtomicHelper<8, T> { // sizeof(T) == 8 volatile const int64_t* loc_ptr = reinterpret_cast<volatile const int64_t*>(loc); - return reinterpret_cast<T>(QuasiAtomic::Read64(loc_ptr)); + return static_cast<T>(QuasiAtomic::Read64(loc_ptr)); } static void StoreRelaxed(volatile T* loc, T desired) { @@ -400,7 +400,7 @@ template<class T> struct AtomicHelper<8, T> { volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc); QuasiAtomic::Write64(loc_ptr, - reinterpret_cast<int64_t>(desired)); + static_cast<int64_t>(desired)); } @@ -409,8 +409,8 @@ template<class T> struct AtomicHelper<8, T> { // sizeof(T) == 8 volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc); return QuasiAtomic::Cas64( - reinterpret_cast<int64_t>(expected_value), - reinterpret_cast<int64_t>(desired_value), loc_ptr); + static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)), + static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr); } }; diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc new file mode 100644 index 0000000..c0bce84 --- /dev/null +++ b/runtime/base/scoped_flock.cc @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "scoped_flock.h" + +#include <sys/file.h> +#include <sys/stat.h> + +#include "base/logging.h" +#include "base/stringprintf.h" +#include "base/unix_file/fd_file.h" + +namespace art { + +bool ScopedFlock::Init(const char* filename, std::string* error_msg) { + while (true) { + file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR)); + if (file_.get() == NULL) { + *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno)); + return false; + } + int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX)); + if (flock_result != 0) { + *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno)); + return false; + } + struct stat fstat_stat; + int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat)); + if (fstat_result != 0) { + *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno)); + return false; + } + struct stat stat_stat; + int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat)); + if (stat_result != 0) { + PLOG(WARNING) << "Failed to stat, will retry: " << filename; + // ENOENT can happen if someone racing with us unlinks the file we created so just retry. + continue; + } + if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) { + LOG(WARNING) << "File changed while locking, will retry: " << filename; + continue; + } + return true; + } +} + +File* ScopedFlock::GetFile() { + CHECK(file_.get() != NULL); + return file_.get(); +} + +ScopedFlock::ScopedFlock() { } + +ScopedFlock::~ScopedFlock() { + if (file_.get() != NULL) { + int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN)); + CHECK_EQ(0, flock_result); + } +} + +} // namespace art diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h new file mode 100644 index 0000000..26b4eb0 --- /dev/null +++ b/runtime/base/scoped_flock.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_BASE_SCOPED_FLOCK_H_ +#define ART_RUNTIME_BASE_SCOPED_FLOCK_H_ + +#include <memory> +#include <string> + +#include "base/macros.h" +#include "os.h" + +namespace art { + +class ScopedFlock { + public: + ScopedFlock(); + + // Attempts to acquire an exclusive file lock (see flock(2)) on the file + // at filename, and blocks until it can do so. + // + // Returns true if the lock could be acquired, or false if an error + // occurred. It is an error if the file does not exist, or if its inode + // changed (usually due to a new file being created at the same path) + // between attempts to lock it. + bool Init(const char* filename, std::string* error_msg); + + // Returns the (locked) file associated with this instance. + File* GetFile(); + ~ScopedFlock(); + private: + std::unique_ptr<File> file_; + DISALLOW_COPY_AND_ASSIGN(ScopedFlock); +}; + +} // namespace art + +#endif // ART_RUNTIME_BASE_SCOPED_FLOCK_H_ diff --git a/runtime/base/scoped_flock_test.cc b/runtime/base/scoped_flock_test.cc new file mode 100644 index 0000000..8fa181a --- /dev/null +++ b/runtime/base/scoped_flock_test.cc @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "scoped_flock.h" +#include "common_runtime_test.h" + +#include "gtest/gtest.h" + +namespace art { + +class ScopedFlockTest : public CommonRuntimeTest {}; + +TEST_F(ScopedFlockTest, TestLocking) { + ScratchFile scratch_file; + std::string error_msg; + + // NOTE: Locks applied using flock(2) and fcntl(2) are oblivious + // to each other, so attempting to query locks set by flock using + // using fcntl(,F_GETLK,) will not work. see kernel doc at + // Documentation/filesystems/locks.txt. + ScopedFlock file_lock; + ASSERT_TRUE(file_lock.Init(scratch_file.GetFilename().c_str(), + &error_msg)); + + ASSERT_FALSE(file_lock.Init("/guaranteed/not/to/exist", &error_msg)); +} + +} // namespace art diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 330b110..28164cd 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -27,6 +27,7 @@ #include "base/casts.h" #include "base/logging.h" +#include "base/scoped_flock.h" #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker-inl.h" @@ -701,60 +702,6 @@ const DexFile* ClassLinker::FindDexFileInOatLocation(const char* dex_location, return dex_file; } -class ScopedFlock { - public: - ScopedFlock() {} - - bool Init(const char* filename, std::string* error_msg) { - while (true) { - file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR)); - if (file_.get() == NULL) { - *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno)); - return false; - } - int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX)); - if (flock_result != 0) { - *error_msg = StringPrintf("Failed to lock file '%s': %s", filename, strerror(errno)); - return false; - } - struct stat fstat_stat; - int fstat_result = TEMP_FAILURE_RETRY(fstat(file_->Fd(), &fstat_stat)); - if (fstat_result != 0) { - *error_msg = StringPrintf("Failed to fstat file '%s': %s", filename, strerror(errno)); - return false; - } - struct stat stat_stat; - int stat_result = TEMP_FAILURE_RETRY(stat(filename, &stat_stat)); - if (stat_result != 0) { - PLOG(WARNING) << "Failed to stat, will retry: " << filename; - // ENOENT can happen if someone racing with us unlinks the file we created so just retry. - continue; - } - if (fstat_stat.st_dev != stat_stat.st_dev || fstat_stat.st_ino != stat_stat.st_ino) { - LOG(WARNING) << "File changed while locking, will retry: " << filename; - continue; - } - return true; - } - } - - File& GetFile() { - return *file_; - } - - ~ScopedFlock() { - if (file_.get() != NULL) { - int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN)); - CHECK_EQ(0, flock_result); - } - } - - private: - std::unique_ptr<File> file_; - - DISALLOW_COPY_AND_ASSIGN(ScopedFlock); -}; - const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation( const char* dex_location, uint32_t dex_location_checksum, @@ -785,7 +732,7 @@ const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation( // Generate the output oat file for the dex file VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location; - if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, &error_msg)) { + if (!GenerateOatFile(dex_location, scoped_flock.GetFile()->Fd(), oat_location, &error_msg)) { CHECK(!error_msg.empty()); error_msgs->push_back(error_msg); return nullptr; diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h index b582abb..e573d6d 100644 --- a/runtime/entrypoints/quick/callee_save_frame.h +++ b/runtime/entrypoints/quick/callee_save_frame.h @@ -18,8 +18,17 @@ #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_ #include "base/mutex.h" +#include "instruction_set.h" #include "thread-inl.h" +// Specific frame size code is in architecture-specific files. We include this to compile-time +// specialize the code. +#include "arch/arm/quick_method_frame_info_arm.h" +#include "arch/arm64/quick_method_frame_info_arm64.h" +#include "arch/mips/quick_method_frame_info_mips.h" +#include "arch/x86/quick_method_frame_info_x86.h" +#include "arch/x86_64/quick_method_frame_info_x86_64.h" + namespace art { namespace mirror { class ArtMethod; @@ -36,6 +45,34 @@ static inline void FinishCalleeSaveFrameSetup(Thread* self, StackReference<mirro self->VerifyStack(); } +static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, Runtime::CalleeSaveType type) { + // constexpr must be a return statement. + return (isa == kArm || isa == kThumb2) ? arm::ArmCalleeSaveFrameSize(type) : + isa == kArm64 ? arm64::Arm64CalleeSaveFrameSize(type) : + isa == kMips ? mips::MipsCalleeSaveFrameSize(type) : + isa == kX86 ? x86::X86CalleeSaveFrameSize(type) : + isa == kX86_64 ? x86_64::X86_64CalleeSaveFrameSize(type) : + isa == kNone ? (LOG(FATAL) << "kNone has no frame size", 0) : + (LOG(FATAL) << "Unknown instruction set" << isa, 0); +} + +// Note: this specialized statement is sanity-checked in the quick-trampoline gtest. +static constexpr size_t GetConstExprPointerSize(InstructionSet isa) { + // constexpr must be a return statement. + return (isa == kArm || isa == kThumb2) ? kArmPointerSize : + isa == kArm64 ? kArm64PointerSize : + isa == kMips ? kMipsPointerSize : + isa == kX86 ? kX86PointerSize : + isa == kX86_64 ? kX86_64PointerSize : + isa == kNone ? (LOG(FATAL) << "kNone has no pointer size", 0) : + (LOG(FATAL) << "Unknown instruction set" << isa, 0); +} + +// Note: this specialized statement is sanity-checked in the quick-trampoline gtest. +static constexpr size_t GetCalleeSavePCOffset(InstructionSet isa, Runtime::CalleeSaveType type) { + return GetCalleeSaveFrameSize(isa, type) - GetConstExprPointerSize(isa); +} + } // namespace art #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_ diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc index 3178cde..5cb0f36 100644 --- a/runtime/entrypoints/quick/quick_field_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc @@ -248,10 +248,7 @@ extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value, Thread* self, StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - Runtime* runtime = Runtime::Current(); - mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly); - uint32_t frame_size = - runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes(); + constexpr size_t frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsOnly); mirror::ArtMethod* referrer = reinterpret_cast<StackReference<mirror::ArtMethod>*>( reinterpret_cast<uint8_t*>(sp) + frame_size)->AsMirrorPtr(); @@ -262,7 +259,7 @@ extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, field->Set64<false>(obj, new_value); return 0; // success } - sp->Assign(callee_save); + sp->Assign(Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly)); self->SetTopOfStack(sp, 0); field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t)); diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc index 5febcb7..d161d0b 100644 --- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc @@ -52,10 +52,8 @@ extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self, // Be aware the store below may well stomp on an incoming argument. Locks::mutator_lock_->AssertSharedHeld(self); Runtime* runtime = Runtime::Current(); - mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly); - sp->Assign(callee_save); - uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes( - runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes()); + sp->Assign(runtime->GetCalleeSaveMethod(Runtime::kRefsOnly)); + uint32_t return_pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly); uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + return_pc_offset); CHECK_EQ(*return_pc, 0U); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index c41c090..63010cf 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -37,6 +37,9 @@ namespace art { class QuickArgumentVisitor { // Number of bytes for each out register in the caller method's frame. static constexpr size_t kBytesStackArgLocation = 4; + // Frame size in bytes of a callee-save frame for RefsAndArgs. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = + GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kRefsAndArgs); #if defined(__arm__) // The callee save frame is pointed to by SP. // | argN | | @@ -59,7 +62,6 @@ class QuickArgumentVisitor { static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8; // Offset of first GPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 44; // Offset of return address. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 48; // Frame size. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -87,10 +89,9 @@ class QuickArgumentVisitor { static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr size_t kNumQuickGprArgs = 7; // 7 arguments passed in GPRs. static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =16; // Offset of first FPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144; // Offset of first GPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296; // Offset of return address. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 304; // Frame size. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -115,7 +116,6 @@ class QuickArgumentVisitor { static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4; // Offset of first GPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 60; // Offset of return address. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 64; // Frame size. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -140,7 +140,6 @@ class QuickArgumentVisitor { static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4; // Offset of first GPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 28; // Offset of return address. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 32; // Frame size. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -173,12 +172,11 @@ class QuickArgumentVisitor { // | Padding | // | RDI/Method* | <- sp static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. - static constexpr size_t kNumQuickGprArgs = 5; // 3 arguments passed in GPRs. - static constexpr size_t kNumQuickFprArgs = 8; // 0 arguments passed in FPRs. + static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168; // Offset of return address. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 176; // Frame size. static size_t GprIndexToGprOffset(uint32_t gpr_index) { switch (gpr_index) { case 0: return (4 * GetBytesPerGprSpillLocation(kRuntimeISA)); @@ -220,10 +218,7 @@ class QuickArgumentVisitor { stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize + StackArgumentStartFromShorty(is_static, shorty, shorty_len)), gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid), - is_split_long_or_double_(false) { - DCHECK_EQ(kQuickCalleeSaveFrame_RefAndArgs_FrameSize, - Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes()); - } + is_split_long_or_double_(false) { } virtual ~QuickArgumentVisitor() {} @@ -1199,10 +1194,8 @@ class ComputeGenericJniFrameSize FINAL { size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>); sp8 -= scope_and_method; - // Align by kStackAlignment - uintptr_t sp_to_align = reinterpret_cast<uintptr_t>(sp8); - sp_to_align = RoundDown(sp_to_align, kStackAlignment); - sp8 = reinterpret_cast<uint8_t*>(sp_to_align); + // Align by kStackAlignment. + sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment)); uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>); *table = reinterpret_cast<HandleScope*>(sp8_table); @@ -1222,9 +1215,8 @@ class ComputeGenericJniFrameSize FINAL { // Next comes the native call stack. sp8 -= GetStackSize(); - // Now align the call stack below. This aligns by 16, as AArch64 seems to require. - uintptr_t mask = ~0x0F; - sp8 = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(sp8) & mask); + // Align by kStackAlignment. + sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment)); *start_stack = reinterpret_cast<uintptr_t*>(sp8); // put fprs and gprs below @@ -1756,66 +1748,12 @@ extern "C" TwoWordReturn artInvokeInterfaceTrampoline(mirror::ArtMethod* interfa } else { FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs); DCHECK(interface_method == Runtime::Current()->GetResolutionMethod()); - // Determine method index from calling dex instruction. -#if defined(__arm__) - // On entry the stack pointed by sp is: - // | argN | | - // | ... | | - // | arg4 | | - // | arg3 spill | | Caller's frame - // | arg2 spill | | - // | arg1 spill | | - // | Method* | --- - // | LR | - // | ... | callee saves - // | R3 | arg3 - // | R2 | arg2 - // | R1 | arg1 - // | R0 | - // | Method* | <- sp - DCHECK_EQ(48U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes()); - uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + kPointerSize); - uintptr_t caller_pc = regs[10]; -#elif defined(__i386__) - // On entry the stack pointed by sp is: - // | argN | | - // | ... | | - // | arg4 | | - // | arg3 spill | | Caller's frame - // | arg2 spill | | - // | arg1 spill | | - // | Method* | --- - // | Return | - // | EBP,ESI,EDI | callee saves - // | EBX | arg3 - // | EDX | arg2 - // | ECX | arg1 - // | EAX/Method* | <- sp - DCHECK_EQ(32U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes()); - uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp)); - uintptr_t caller_pc = regs[7]; -#elif defined(__mips__) - // On entry the stack pointed by sp is: - // | argN | | - // | ... | | - // | arg4 | | - // | arg3 spill | | Caller's frame - // | arg2 spill | | - // | arg1 spill | | - // | Method* | --- - // | RA | - // | ... | callee saves - // | A3 | arg3 - // | A2 | arg2 - // | A1 | arg1 - // | A0/Method* | <- sp - DCHECK_EQ(64U, Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes()); - uintptr_t* regs = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp)); - uintptr_t caller_pc = regs[15]; -#else - UNIMPLEMENTED(FATAL); - uintptr_t caller_pc = 0; -#endif + + // Find the caller PC. + constexpr size_t pc_offset = GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs); + uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) + pc_offset); + + // Map the caller PC to a dex PC. uint32_t dex_pc = caller_method->ToDexPc(caller_pc); const DexFile::CodeItem* code = MethodHelper(caller_method).GetCodeItem(); CHECK_LT(dex_pc, code->insns_size_in_code_units_); diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc new file mode 100644 index 0000000..66ee218 --- /dev/null +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> + +#include "callee_save_frame.h" +#include "common_runtime_test.h" +#include "mirror/art_method-inl.h" +#include "quick/quick_method_frame_info.h" + +namespace art { + +class QuickTrampolineEntrypointsTest : public CommonRuntimeTest { + protected: + static mirror::ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, + Runtime::CalleeSaveType type) + NO_THREAD_SAFETY_ANALYSIS { + Runtime* r = Runtime::Current(); + + Thread* t = Thread::Current(); + t->TransitionFromSuspendedToRunnable(); // So we can create callee-save methods. + + r->SetInstructionSet(isa); + mirror::ArtMethod* save_method = r->CreateCalleeSaveMethod(type); + r->SetCalleeSaveMethod(save_method, type); + + t->TransitionFromRunnableToSuspended(ThreadState::kNative); // So we can shut down. + + return save_method; + } + + static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size) + NO_THREAD_SAFETY_ANALYSIS { + mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type); + QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo(); + EXPECT_EQ(frame_info.FrameSizeInBytes(), save_size) << "Expected and real size differs for " + << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << " fp spills=" + << frame_info.FpSpillMask() << std::dec << " ISA " << isa; + } + + static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset) + NO_THREAD_SAFETY_ANALYSIS { + mirror::ArtMethod* save_method = CreateCalleeSaveMethod(isa, type); + QuickMethodFrameInfo frame_info = save_method->GetQuickFrameInfo(); + EXPECT_EQ(save_method->GetReturnPcOffsetInBytes(), pc_offset) << "Expected and real pc offset" + " differs for " << type << " core spills=" << std::hex << frame_info.CoreSpillMask() << + " fp spills=" << frame_info.FpSpillMask() << std::dec << " ISA " << isa; + } +}; + +// Note: these tests are all runtime tests. They let the Runtime create the corresponding ArtMethod +// and check against it. Technically we know and expect certain values, but the Runtime code is +// not constexpr, so we cannot make this compile-time checks (and I want the Runtime code tested). + +// This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct. +TEST_F(QuickTrampolineEntrypointsTest, FrameSize) { + // We have to use a define here as the callee_save_frame.h functions are constexpr. +#define CHECK_FRAME_SIZE(isa) \ + CheckFrameSize(isa, Runtime::kRefsAndArgs, GetCalleeSaveFrameSize(isa, Runtime::kRefsAndArgs)); \ + CheckFrameSize(isa, Runtime::kRefsOnly, GetCalleeSaveFrameSize(isa, Runtime::kRefsOnly)); \ + CheckFrameSize(isa, Runtime::kSaveAll, GetCalleeSaveFrameSize(isa, Runtime::kSaveAll)) + + CHECK_FRAME_SIZE(kArm); + CHECK_FRAME_SIZE(kArm64); + CHECK_FRAME_SIZE(kMips); + CHECK_FRAME_SIZE(kX86); + CHECK_FRAME_SIZE(kX86_64); +} + +// This test ensures that GetConstExprPointerSize is correct with respect to +// GetInstructionSetPointerSize. +TEST_F(QuickTrampolineEntrypointsTest, PointerSize) { + EXPECT_EQ(GetInstructionSetPointerSize(kArm), GetConstExprPointerSize(kArm)); + EXPECT_EQ(GetInstructionSetPointerSize(kArm64), GetConstExprPointerSize(kArm64)); + EXPECT_EQ(GetInstructionSetPointerSize(kMips), GetConstExprPointerSize(kMips)); + EXPECT_EQ(GetInstructionSetPointerSize(kX86), GetConstExprPointerSize(kX86)); + EXPECT_EQ(GetInstructionSetPointerSize(kX86_64), GetConstExprPointerSize(kX86_64)); +} + +// This test ensures that the constexpr specialization of the return PC offset computation in +// GetCalleeSavePCOffset is correct. +TEST_F(QuickTrampolineEntrypointsTest, ReturnPC) { + // Ensure that the computation in callee_save_frame.h correct. + // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses + // kPointerSize, which is wrong when the target bitwidth is not the same as the host's. + CheckPCOffset(kRuntimeISA, Runtime::kRefsAndArgs, + GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsAndArgs)); + CheckPCOffset(kRuntimeISA, Runtime::kRefsOnly, + GetCalleeSavePCOffset(kRuntimeISA, Runtime::kRefsOnly)); + CheckPCOffset(kRuntimeISA, Runtime::kSaveAll, + GetCalleeSavePCOffset(kRuntimeISA, Runtime::kSaveAll)); +} + +} // namespace art diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index d7b673e..f5f7a86 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2464,7 +2464,7 @@ void Heap::DumpForSigQuit(std::ostream& os) { } size_t Heap::GetPercentFree() { - return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / GetMaxMemory()); + return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / max_allowed_footprint_); } void Heap::SetIdealFootprint(size_t max_allowed_footprint) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index e568b36..9b49373 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -408,7 +408,7 @@ class Heap { // Implements java.lang.Runtime.freeMemory. size_t GetFreeMemory() const { - return GetMaxMemory() - num_bytes_allocated_.LoadSequentiallyConsistent(); + return max_allowed_footprint_ - num_bytes_allocated_.LoadSequentiallyConsistent(); } // get the space that corresponds to an object's address. Current implementation searches all diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index a58df8e..7988af7 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -61,15 +61,20 @@ mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* } // Try to see if the referent is already marked by using the is_marked_callback. We can return // it to the mutator as long as the GC is not preserving references. If the GC is - // preserving references, the mutator could take a white field and move it somewhere else - // in the heap causing corruption since this field would get swept. IsMarkedCallback* const is_marked_callback = process_references_args_.is_marked_callback_; - if (!preserving_references_ && is_marked_callback != nullptr) { + if (LIKELY(is_marked_callback != nullptr)) { mirror::Object* const obj = is_marked_callback(referent, process_references_args_.arg_); // If it's null it means not marked, but it could become marked if the referent is reachable - // by finalizer referents. So we can not return in this case and must block. + // by finalizer referents. So we can not return in this case and must block. Otherwise, we + // can return it to the mutator as long as the GC is not preserving references, in which + // case only black nodes can be safely returned. If the GC is preserving references, the + // mutator could take a white field from a grey or white node and move it somewhere else + // in the heap causing corruption since this field would get swept. if (obj != nullptr) { - return obj; + if (!preserving_references_ || + (LIKELY(!reference->IsFinalizerReferenceInstance()) && !reference->IsEnqueued())) { + return obj; + } } } condition_.WaitHoldingLocks(self); @@ -113,14 +118,14 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences"); // Unless required to clear soft references with white references, preserve some white referents. if (!clear_soft_references) { - TimingLogger::ScopedSplit split(concurrent ? "PreserveSomeSoftReferences" : - "(Paused)PreserveSomeSoftReferences", timings); + TimingLogger::ScopedSplit split(concurrent ? "ForwardSoftReferences" : + "(Paused)ForwardSoftReferences", timings); if (concurrent) { StartPreservingReferences(self); } - // References with a marked referent are removed from the list. - soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback, - &process_references_args_); + + soft_reference_queue_.ForwardSoftReferences(&PreserveSoftReferenceCallback, + &process_references_args_); process_mark_stack_callback(arg); if (concurrent) { StopPreservingReferences(self); diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc index caacef5..3910c29 100644 --- a/runtime/gc/reference_queue.cc +++ b/runtime/gc/reference_queue.cc @@ -160,22 +160,23 @@ void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_referenc } } -void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg) { - ReferenceQueue cleared; - while (!IsEmpty()) { - mirror::Reference* ref = DequeuePendingReference(); +void ReferenceQueue::ForwardSoftReferences(IsMarkedCallback* preserve_callback, + void* arg) { + if (UNLIKELY(IsEmpty())) { + return; + } + mirror::Reference* const head = list_; + mirror::Reference* ref = head; + do { mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>(); if (referent != nullptr) { mirror::Object* forward_address = preserve_callback(referent, arg); - if (forward_address == nullptr) { - // Either the reference isn't marked or we don't wish to preserve it. - cleared.EnqueuePendingReference(ref); - } else if (forward_address != referent) { + if (forward_address != nullptr && forward_address != referent) { ref->SetReferent<false>(forward_address); } } - } - list_ = cleared.GetList(); + ref = ref->GetPendingNext(); + } while (LIKELY(ref != head)); } } // namespace gc diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h index 4f223e2..1d8cc1a 100644 --- a/runtime/gc/reference_queue.h +++ b/runtime/gc/reference_queue.h @@ -65,7 +65,7 @@ class ReferenceQueue { // Walks the reference list marking any references subject to the reference clearing policy. // References with a black referent are removed from the list. References with white referents // biased toward saving are blackened and also removed from the list. - void PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg) + void ForwardSoftReferences(IsMarkedCallback* preserve_callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Unlink the reference list clearing references objects with white referents. Cleared references // registered to a reference queue are scheduled for appending by the heap worker thread. diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 3d35c00..61633cd 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -18,6 +18,7 @@ #include "base/stl_util.h" #include "base/unix_file/fd_file.h" +#include "base/scoped_flock.h" #include "gc/accounting/space_bitmap-inl.h" #include "mirror/art_method.h" #include "mirror/class-inl.h" @@ -148,7 +149,17 @@ ImageSpace* ImageSpace::Create(const char* image_location, std::string image_filename; std::string error_msg; bool is_system = false; - if (FindImageFilename(image_location, image_isa, &image_filename, &is_system)) { + const bool found_image = FindImageFilename(image_location, image_isa, &image_filename, + &is_system); + + // Note that we must not use the file descriptor associated with + // ScopedFlock::GetFile to Init the image file. We want the file + // descriptor (and the associated exclusive lock) to be released when + // we leave Create. + ScopedFlock image_lock; + image_lock.Init(image_filename.c_str(), &error_msg); + + if (found_image) { ImageSpace* space = ImageSpace::Init(image_filename.c_str(), image_location, !is_system, &error_msg); if (space != nullptr) { diff --git a/runtime/globals.h b/runtime/globals.h index 07fadb9..58c2118 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -36,13 +36,6 @@ static constexpr size_t GB = KB * KB * KB; static constexpr size_t kWordSize = sizeof(word); static constexpr size_t kPointerSize = sizeof(void*); -// Architecture-specific pointer sizes -static constexpr size_t kArmPointerSize = 4; -static constexpr size_t kArm64PointerSize = 8; -static constexpr size_t kMipsPointerSize = 4; -static constexpr size_t kX86PointerSize = 4; -static constexpr size_t kX86_64PointerSize = 8; - static constexpr size_t kBitsPerByte = 8; static constexpr size_t kBitsPerByteLog2 = 3; static constexpr int kBitsPerWord = kWordSize * kBitsPerByte; @@ -51,20 +44,6 @@ static constexpr size_t kWordHighBitMask = static_cast<size_t>(1) << (kBitsPerWo // Required stack alignment static constexpr size_t kStackAlignment = 16; -// ARM instruction alignment. ARM processors require code to be 4-byte aligned, -// but ARM ELF requires 8.. -static constexpr size_t kArmAlignment = 8; - -// ARM64 instruction alignment. This is the recommended alignment for maximum performance. -static constexpr size_t kArm64Alignment = 16; - -// MIPS instruction alignment. MIPS processors require code to be 4-byte aligned. -// TODO: Can this be 4? -static constexpr size_t kMipsAlignment = 8; - -// X86 instruction alignment. This is the recommended alignment for maximum performance. -static constexpr size_t kX86Alignment = 16; - // System page size. We check this against sysconf(_SC_PAGE_SIZE) at runtime, but use a simple // compile-time constant so the compiler can generate better code. static constexpr int kPageSize = 4096; diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc index c1931a9..5b60396 100644 --- a/runtime/instruction_set.cc +++ b/runtime/instruction_set.cc @@ -16,9 +16,6 @@ #include "instruction_set.h" -#include "globals.h" -#include "base/logging.h" // Logging is required for FATAL in the helper functions. - namespace art { const char* GetInstructionSetString(const InstructionSet isa) { @@ -63,75 +60,6 @@ InstructionSet GetInstructionSetFromString(const char* isa_str) { return kNone; } -size_t GetInstructionSetPointerSize(InstructionSet isa) { - switch (isa) { - case kArm: - // Fall-through. - case kThumb2: - return kArmPointerSize; - case kArm64: - return kArm64PointerSize; - case kX86: - return kX86PointerSize; - case kX86_64: - return kX86_64PointerSize; - case kMips: - return kMipsPointerSize; - case kNone: - LOG(FATAL) << "ISA kNone does not have pointer size."; - return 0; - default: - LOG(FATAL) << "Unknown ISA " << isa; - return 0; - } -} - -size_t GetBytesPerGprSpillLocation(InstructionSet isa) { - switch (isa) { - case kArm: - // Fall-through. - case kThumb2: - return 4; - case kArm64: - return 8; - case kX86: - return 4; - case kX86_64: - return 8; - case kMips: - return 4; - case kNone: - LOG(FATAL) << "ISA kNone does not have spills."; - return 0; - default: - LOG(FATAL) << "Unknown ISA " << isa; - return 0; - } -} - -size_t GetBytesPerFprSpillLocation(InstructionSet isa) { - switch (isa) { - case kArm: - // Fall-through. - case kThumb2: - return 4; - case kArm64: - return 8; - case kX86: - return 8; - case kX86_64: - return 8; - case kMips: - return 4; - case kNone: - LOG(FATAL) << "ISA kNone does not have spills."; - return 0; - default: - LOG(FATAL) << "Unknown ISA " << isa; - return 0; - } -} - size_t GetInstructionSetAlignment(InstructionSet isa) { switch (isa) { case kArm: @@ -155,27 +83,6 @@ size_t GetInstructionSetAlignment(InstructionSet isa) { } } -bool Is64BitInstructionSet(InstructionSet isa) { - switch (isa) { - case kArm: - case kThumb2: - case kX86: - case kMips: - return false; - - case kArm64: - case kX86_64: - return true; - - case kNone: - LOG(FATAL) << "ISA kNone does not have bit width."; - return 0; - default: - LOG(FATAL) << "Unknown ISA " << isa; - return 0; - } -} - std::string InstructionSetFeatures::GetFeatureString() const { std::string result; if ((mask_ & kHwDiv) != 0) { diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h index 7231733..67e7100 100644 --- a/runtime/instruction_set.h +++ b/runtime/instruction_set.h @@ -20,6 +20,7 @@ #include <iosfwd> #include <string> +#include "base/logging.h" // Logging is required for FATAL in the helper functions. #include "base/macros.h" namespace art { @@ -35,14 +36,122 @@ enum InstructionSet { }; std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs); +// Architecture-specific pointer sizes +static constexpr size_t kArmPointerSize = 4; +static constexpr size_t kArm64PointerSize = 8; +static constexpr size_t kMipsPointerSize = 4; +static constexpr size_t kX86PointerSize = 4; +static constexpr size_t kX86_64PointerSize = 8; + +// ARM instruction alignment. ARM processors require code to be 4-byte aligned, +// but ARM ELF requires 8.. +static constexpr size_t kArmAlignment = 8; + +// ARM64 instruction alignment. This is the recommended alignment for maximum performance. +static constexpr size_t kArm64Alignment = 16; + +// MIPS instruction alignment. MIPS processors require code to be 4-byte aligned. +// TODO: Can this be 4? +static constexpr size_t kMipsAlignment = 8; + +// X86 instruction alignment. This is the recommended alignment for maximum performance. +static constexpr size_t kX86Alignment = 16; + + const char* GetInstructionSetString(InstructionSet isa); InstructionSet GetInstructionSetFromString(const char* instruction_set); -size_t GetInstructionSetPointerSize(InstructionSet isa); +static inline size_t GetInstructionSetPointerSize(InstructionSet isa) { + switch (isa) { + case kArm: + // Fall-through. + case kThumb2: + return kArmPointerSize; + case kArm64: + return kArm64PointerSize; + case kX86: + return kX86PointerSize; + case kX86_64: + return kX86_64PointerSize; + case kMips: + return kMipsPointerSize; + case kNone: + LOG(FATAL) << "ISA kNone does not have pointer size."; + return 0; + default: + LOG(FATAL) << "Unknown ISA " << isa; + return 0; + } +} + size_t GetInstructionSetAlignment(InstructionSet isa); -bool Is64BitInstructionSet(InstructionSet isa); -size_t GetBytesPerGprSpillLocation(InstructionSet isa); -size_t GetBytesPerFprSpillLocation(InstructionSet isa); + +static inline bool Is64BitInstructionSet(InstructionSet isa) { + switch (isa) { + case kArm: + case kThumb2: + case kX86: + case kMips: + return false; + + case kArm64: + case kX86_64: + return true; + + case kNone: + LOG(FATAL) << "ISA kNone does not have bit width."; + return 0; + default: + LOG(FATAL) << "Unknown ISA " << isa; + return 0; + } +} + +static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) { + switch (isa) { + case kArm: + // Fall-through. + case kThumb2: + return 4; + case kArm64: + return 8; + case kX86: + return 4; + case kX86_64: + return 8; + case kMips: + return 4; + case kNone: + LOG(FATAL) << "ISA kNone does not have spills."; + return 0; + default: + LOG(FATAL) << "Unknown ISA " << isa; + return 0; + } +} + +static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) { + switch (isa) { + case kArm: + // Fall-through. + case kThumb2: + return 4; + case kArm64: + return 8; + case kX86: + return 8; + case kX86_64: + return 8; + case kMips: + return 4; + case kNone: + LOG(FATAL) << "ISA kNone does not have spills."; + return 0; + default: + LOG(FATAL) << "Unknown ISA " << isa; + return 0; + } +} #if defined(__arm__) static constexpr InstructionSet kRuntimeISA = kArm; diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc index cd6337c..ece3238 100644 --- a/runtime/instruction_set_test.cc +++ b/runtime/instruction_set_test.cc @@ -45,4 +45,8 @@ TEST_F(InstructionSetTest, TestRoundTrip) { EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA))); } +TEST_F(InstructionSetTest, PointerSize) { + EXPECT_EQ(kPointerSize, GetInstructionSetPointerSize(kRuntimeISA)); +} + } // namespace art diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index 74dfe91..6c44aa9 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -292,11 +292,14 @@ bool OatFile::Setup(std::string* error_msg) { return false; } - oat_dex_files_.Put(dex_file_location, new OatDexFile(this, - dex_file_location, - dex_file_checksum, - dex_file_pointer, - methods_offsets_pointer)); + OatDexFile* oat_dex_file = new OatDexFile(this, + dex_file_location, + dex_file_checksum, + dex_file_pointer, + methods_offsets_pointer); + // Use a StringPiece backed by the oat_dex_file's internal std::string as the key. + StringPiece key(oat_dex_file->GetDexFileLocation()); + oat_dex_files_.Put(key, oat_dex_file); } return true; } diff --git a/runtime/oat_file.h b/runtime/oat_file.h index d703731..eae0418 100644 --- a/runtime/oat_file.h +++ b/runtime/oat_file.h @@ -20,6 +20,7 @@ #include <string> #include <vector> +#include "base/stringpiece.h" #include "dex_file.h" #include "invoke_type.h" #include "mem_map.h" @@ -206,11 +207,11 @@ class OatFile { const byte* dex_file_pointer, const uint32_t* oat_class_offsets_pointer); - const OatFile* oat_file_; - std::string dex_file_location_; - uint32_t dex_file_location_checksum_; - const byte* dex_file_pointer_; - const uint32_t* oat_class_offsets_pointer_; + const OatFile* const oat_file_; + const std::string dex_file_location_; + const uint32_t dex_file_location_checksum_; + const byte* const dex_file_pointer_; + const uint32_t* const oat_class_offsets_pointer_; friend class OatFile; DISALLOW_COPY_AND_ASSIGN(OatDexFile); @@ -270,7 +271,9 @@ class OatFile { // dlopen handle during runtime. void* dlopen_handle_; - typedef SafeMap<std::string, const OatDexFile*> Table; + // NOTE: We use a StringPiece as the key type to avoid a memory allocation on every lookup + // with a const char* key. + typedef SafeMap<StringPiece, const OatDexFile*> Table; Table oat_dex_files_; friend class OatClass; diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java index 1ce4a04..f0fe934 100644 --- a/test/107-int-math2/src/Main.java +++ b/test/107-int-math2/src/Main.java @@ -979,7 +979,7 @@ class Main extends IntMathBase { if (lres == 0x96deff00aa010000L) { System.out.println("longShiftTest PASSED"); } else { - System.out.println("longShiftTest FAILED: " + res); + System.out.println("longShiftTest FAILED: " + lres); failure = true; } diff --git a/test/700-LoadArgRegs/expected.txt b/test/700-LoadArgRegs/expected.txt new file mode 100644 index 0000000..4908e5b --- /dev/null +++ b/test/700-LoadArgRegs/expected.txt @@ -0,0 +1,75 @@ +11 +21, 22 +31, 32, 33 +41, 42, 43, 44 +51, 52, 53, 54, 55 +61, 62, 63, 64, 65, 66 +71, 72, 73, 74, 75, 76, 77 +81, 82, 83, 84, 85, 86, 87, 88 +91, 92, 93, 94, 95, 96, 97, 98, 99 +101, 102, 103, 104, 105, 106, 107, 108, 109, 110 +111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111 +121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212 +61, 62, 63, 64, 65, 66 +true +true, false +true, false, true +true, false, true, false +true, false, true, false, true +true, false, true, false, true, false +true, false, true, false, true, false, true +a +a, b +a, b, c +a, b, c, d +a, b, c, d, e +a, b, c, d, e, f +a, b, c, d, e, f, g +11 +11, b +11, b, true +11, b, true, 12 +11, b, true, 12, e +11, b, true, 12, e, false +11, b, true, 12, e, false, 13 +1.1 +2.1, 2.2 +3.1, 3.2, 3.3 +4.1, 4.2, 4.3, 4.4 +5.1, 5.2, 5.3, 5.4, 5.5 +6.1, 6.2, 6.3, 6.4, 6.5, 6.6 +7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7 +8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8 +9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9 +10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 10.1 +1.01 +2.01, 2.02 +3.01, 3.02, 3.03 +4.01, 4.02, 4.03, 4.04 +5.01, 5.02, 5.03, 5.04, 5.05 +6.01, 6.02, 6.03, 6.04, 6.05, 6.06 +7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07 +8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08 +9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09 +-1.1, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09 +10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01 +100011 +100021, 100022 +100031, 100032, 100033 +100041, 100042, 100043, 100044 +100051, 100052, 100053, 100054, 100055 +100061, 100062, 100063, 100064, 100065, 100066 +100071, 100072, 100073, 100074, 100075, 100076, 100077 +100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088 +100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099 +100100100100011 +-11 +-21, -22 +-31, -32, -33 +-41, -42, -43, -44 +-51, -52, -53, -54, -55 +-61, -62, -63, -64, -65, -66 +-71, -72, -73, -74, -75, -76, -77 +-81, -82, -83, -84, -85, -86, -87, -88 +-91, -92, -93, -94, -95, -96, -97, -98, -99 +-1, -91, -92, -93, -94, -95, -96, -97, -98, -99 diff --git a/test/700-LoadArgRegs/info.txt b/test/700-LoadArgRegs/info.txt new file mode 100644 index 0000000..dcaa46e --- /dev/null +++ b/test/700-LoadArgRegs/info.txt @@ -0,0 +1 @@ +Simple tests for passing int/float/long/double arguments. diff --git a/test/700-LoadArgRegs/src/Main.java b/test/700-LoadArgRegs/src/Main.java new file mode 100644 index 0000000..281ab16 --- /dev/null +++ b/test/700-LoadArgRegs/src/Main.java @@ -0,0 +1,288 @@ +public class Main { + + static void testI1(int p1) { + System.out.println(p1); + } + static void testI2(int p1, int p2) { + System.out.println(p1+", "+p2); + } + static void testI3(int p1, int p2, int p3) { + System.out.println(p1+", "+p2+", "+p3); + } + static void testI4(int p1, int p2, int p3, int p4) { + System.out.println(p1+", "+p2+", "+p3+", "+p4); + } + static void testI5(int p1, int p2, int p3, int p4, int p5) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); + } + static void testI6(int p1, int p2, int p3, int p4, int p5, int p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + static void testI7(int p1, int p2, int p3, int p4, int p5, int p6, int p7) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); + } + static void testI8(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); + } + static void testI9(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); + } + static void testI10(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); + } + static void testI11(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11); + } + static void testI12(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11, int p12) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11+", "+p12); + } + void testI6_nonstatic(int p1, int p2, int p3, int p4, int p5, int p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + + static void testB1(boolean p1) { + System.out.println(p1); + } + static void testB2(boolean p1, boolean p2) { + System.out.println(p1+", "+p2); + } + static void testB3(boolean p1, boolean p2, boolean p3) { + System.out.println(p1+", "+p2+", "+p3); + } + static void testB4(boolean p1, boolean p2, boolean p3, boolean p4) { + System.out.println(p1+", "+p2+", "+p3+", "+p4); + } + static void testB5(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); + } + static void testB6(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + static void testB7(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6, boolean p7) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); + } + + static void testO1(Object p1) { + System.out.println(p1); + } + static void testO2(Object p1, Object p2) { + System.out.println(p1+", "+p2); + } + static void testO3(Object p1, Object p2, Object p3) { + System.out.println(p1+", "+p2+", "+p3); + } + static void testO4(Object p1, Object p2, Object p3, Object p4) { + System.out.println(p1+", "+p2+", "+p3+", "+p4); + } + static void testO5(Object p1, Object p2, Object p3, Object p4, Object p5) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); + } + static void testO6(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + static void testO7(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6, Object p7) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); + } + + static void testIOB1(int p1) { + System.out.println(p1); + } + static void testIOB2(int p1, Object p2) { + System.out.println(p1+", "+p2); + } + static void testIOB3(int p1, Object p2, boolean p3) { + System.out.println(p1+", "+p2+", "+p3); + } + static void testIOB4(int p1, Object p2, boolean p3, int p4) { + System.out.println(p1+", "+p2+", "+p3+", "+p4); + } + static void testIOB5(int p1, Object p2, boolean p3, int p4, Object p5) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); + } + static void testIOB6(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + static void testIOB7(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6, int p7) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); + } + + static void testF1(float p1) { + System.out.println(p1); + } + static void testF2(float p1, float p2) { + System.out.println(p1+", "+p2); + } + static void testF3(float p1, float p2, float p3) { + System.out.println(p1+", "+p2+", "+p3); + } + static void testF4(float p1, float p2, float p3, float p4) { + System.out.println(p1+", "+p2+", "+p3+", "+p4); + } + static void testF5(float p1, float p2, float p3, float p4, float p5) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); + } + static void testF6(float p1, float p2, float p3, float p4, float p5, float p6) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); + } + static void testF7(float p1, float p2, float p3, float p4, float p5, float p6, float p7) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); + } + static void testF8(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); + } + static void testF9(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); + } + static void testF10(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9, float p10) { + System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); + } + + static void testD1 (double p1) { System.out.println(p1); } + static void testD2 (double p1, double p2) { System.out.println(p1+", "+p2); } + static void testD3 (double p1, double p2, double p3) { System.out.println(p1+", "+p2+", "+p3); } + static void testD4 (double p1, double p2, double p3, double p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); } + static void testD5 (double p1, double p2, double p3, double p4, double p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); } + static void testD6 (double p1, double p2, double p3, double p4, double p5, double p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); } + static void testD7 (double p1, double p2, double p3, double p4, double p5, double p6, double p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); } + static void testD8 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); } + static void testD9 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); } + static void testD9f (float p0, double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); } + static void testD10(double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9, double p10) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); } + + static void testI() { + testI1(11); + testI2(21, 22); + testI3(31, 32, 33); + testI4(41, 42, 43, 44); + testI5(51, 52, 53, 54, 55); + testI6(61, 62, 63, 64, 65, 66); + testI7(71, 72, 73, 74, 75, 76, 77); + testI8(81, 82, 83, 84, 85, 86, 87, 88); + testI9(91, 92, 93, 94, 95, 96, 97, 98, 99); + testI10(101, 102, 103, 104, 105, 106, 107, 108, 109, 110); + testI11(111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111); + testI12(121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212); + new Main().testI6_nonstatic(61, 62, 63, 64, 65, 66); + } + + static void testB() { + testB1(true); + testB2(true, false); + testB3(true, false, true); + testB4(true, false, true, false); + testB5(true, false, true, false, true); + testB6(true, false, true, false, true, false); + testB7(true, false, true, false, true, false, true); + } + + static void testO() { + testO1("a"); + testO2("a", "b"); + testO3("a", "b", "c"); + testO4("a", "b", "c", "d"); + testO5("a", "b", "c", "d", "e"); + testO6("a", "b", "c", "d", "e", "f"); + testO7("a", "b", "c", "d", "e", "f", "g"); + } + + static void testIOB() { + testIOB1(11); + testIOB2(11, "b"); + testIOB3(11, "b", true); + testIOB4(11, "b", true, 12); + testIOB5(11, "b", true, 12, "e"); + testIOB6(11, "b", true, 12, "e", false); + testIOB7(11, "b", true, 12, "e", false, 13); + } + + static void testF() { + testF1(1.1f); + testF2(2.1f, 2.2f); + testF3(3.1f, 3.2f, 3.3f); + testF4(4.1f, 4.2f, 4.3f, 4.4f); + testF5(5.1f, 5.2f, 5.3f, 5.4f, 5.5f); + testF6(6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f); + testF7(7.1f, 7.2f, 7.3f, 7.4f, 7.5f, 7.6f, 7.7f); + testF8(8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f); + testF9(9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f); + testF10(10.1f, 10.2f, 10.3f, 10.4f, 10.5f, 10.6f, 10.7f, 10.8f, 10.9f, 10.1f); + } + + static void testD() { + + testD1(1.01); + testD2(2.01, 2.02); + testD3(3.01, 3.02, 3.03); + testD4(4.01, 4.02, 4.03, 4.04); + testD5(5.01, 5.02, 5.03, 5.04, 5.05); + testD6(6.01, 6.02, 6.03, 6.04, 6.05, 6.06); + testD7(7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07); + testD8(8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08); + testD9(9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09); + testD9f(-1.1f, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09); + + // TODO: 10.01 as first arg fails: 10.009994506835938 + testD10(10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01); + } + + static void testL1(long p1) { System.out.println(p1); } +// static void testL2x(long p1, long p2) { testL2(p1+p2, p2); } // TODO(64) GenAddLong 64BIT_TEMP + static void testL2(long p1, long p2) { System.out.println(p1+", "+p2); } + static void testL3(long p1, long p2, long p3) { System.out.println(p1+", "+p2+", "+p3); } + static void testL4(long p1, long p2, long p3, long p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); } + static void testL5(long p1, long p2, long p3, long p4, long p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); } + static void testL6(long p1, long p2, long p3, long p4, long p5, long p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); } + static void testL7(long p1, long p2, long p3, long p4, long p5, long p6, long p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); } + static void testL8(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); } + static void testL9(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); } + + static void testL9i(int p0, long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); } + + static void testL() { +// testL2x(100021, 100022); + testL1(100011); + testL2(100021, 100022); + testL3(100031, 100032, 100033); + testL4(100041, 100042, 100043, 100044); + testL5(100051, 100052, 100053, 100054, 100055); + testL6(100061, 100062, 100063, 100064, 100065, 100066); + testL7(100071, 100072, 100073, 100074, 100075, 100076, 100077); + testL8(100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088); + testL9(100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099); + } + + static void testLL() { + testL1(100100100100011L); + + testL1(-11L); + testL2(-21L, -22L); + testL3(-31L, -32L, -33L); + testL4(-41L, -42L, -43L, -44L); + testL5(-51L, -52L, -53L, -54L, -55L); + testL6(-61L, -62L, -63L, -64L, -65L, -66L); + testL7(-71L, -72L, -73L, -74L, -75L, -76L, -77L); + testL8(-81L, -82L, -83L, -84L, -85L, -86L, -87L, -88L); + testL9(-91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L); + testL9i(-1, -91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L); + + // TODO(64) GenAddLong 64BIT_TEMP +// testL2x(100100100100011L, 1L); +// testL2x(100100100100011L, 100100100100011L); + } + + static public void main(String[] args) throws Exception { + + testI(); + testB(); + testO(); + testIOB(); + testF(); + + testD(); + + testL(); + + testLL(); + + } +} |