diff options
31 files changed, 378 insertions, 407 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index bd74ada..5aa264c 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -66,8 +66,16 @@ ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) # The imgdiag test has dependencies on core.oat since it needs to load it during the test. -ART_GTEST_imgdiag_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) -ART_GTEST_imgdiag_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) $(TARGET_CORE_IMAGE_default_no-pic_32) +# For the host, also add the installed tool (in the base size, that should suffice). For the +# target, just the module is fine, the sync will happen late enough. +ART_GTEST_imgdiag_test_HOST_DEPS := \ + $(HOST_CORE_IMAGE_default_no-pic_64) \ + $(HOST_CORE_IMAGE_default_no-pic_32) \ + $(HOST_OUT_EXECUTABLES)/imgdiagd +ART_GTEST_imgdiag_test_TARGET_DEPS := \ + $(TARGET_CORE_IMAGE_default_no-pic_64) \ + $(TARGET_CORE_IMAGE_default_no-pic_32) \ + imgdiagd # The path for which all the source files are relative, not actually the current directory. LOCAL_PATH := art diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 99b2166..0713b7a 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -23,6 +23,7 @@ #include "mirror/art_method.h" #include "mirror/object_array-inl.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "utils.h" namespace art { diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index fe1d126..03e0e92 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -23,6 +23,7 @@ #include "dex/reg_storage_eq.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array-inl.h" +#include "utils.h" namespace art { @@ -567,21 +568,29 @@ bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div // Try to convert *lit to 1 RegRegRegShift/RegRegShift form. bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) { + if (lit == 0) { + // Special case for *divide-by-zero*. The ops won't actually be used to generate code, as + // GenArithOpIntLit will directly generate exception-throwing code, and multiply-by-zero will + // have been optimized away earlier. + op->op = kOpInvalid; + return true; + } + if (IsPowerOfTwo(lit)) { op->op = kOpLsl; - op->shift = LowestSetBit(lit); + op->shift = CTZ(lit); return true; } if (IsPowerOfTwo(lit - 1)) { op->op = kOpAdd; - op->shift = LowestSetBit(lit - 1); + op->shift = CTZ(lit - 1); return true; } if (IsPowerOfTwo(lit + 1)) { op->op = kOpRsub; - op->shift = LowestSetBit(lit + 1); + op->shift = CTZ(lit + 1); return true; } @@ -599,7 +608,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } int lit1 = lit; - uint32_t shift = LowestSetBit(lit1); + uint32_t shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpLsl; ops[1].shift = shift; @@ -607,7 +616,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } lit1 = lit - 1; - shift = LowestSetBit(lit1); + shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpAdd; ops[1].shift = shift; @@ -615,7 +624,7 @@ bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { } lit1 = lit + 1; - shift = LowestSetBit(lit1); + shift = CTZ(lit1); if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { ops[1].op = kOpRsub; ops[1].shift = shift; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 5ac2aa0..88ab6f8 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -543,7 +543,7 @@ bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_d return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit)); } } - int k = LowestSetBit(lit); + int k = CTZ(lit); if (k >= nbits - 2) { // Avoid special cases. return false; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 67ea897..ae9b0f4 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1175,24 +1175,6 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) { new_lir->next->prev = new_lir; } -bool Mir2Lir::IsPowerOfTwo(uint64_t x) { - return (x & (x - 1)) == 0; -} - -// Returns the index of the lowest set bit in 'x'. -int32_t Mir2Lir::LowestSetBit(uint64_t x) { - int bit_posn = 0; - while ((x & 0xf) == 0) { - bit_posn += 4; - x >>= 4; - } - while ((x & 1) == 0) { - bit_posn++; - x >>= 1; - } - return bit_posn; -} - bool Mir2Lir::PartiallyIntersects(RegLocation rl_src, RegLocation rl_dest) { DCHECK(rl_src.wide); DCHECK(rl_dest.wide); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index e8adffb..3733507 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -13,6 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include <functional> + #include "arch/arm/instruction_set_features_arm.h" #include "dex/compiler_ir.h" #include "dex/compiler_internals.h" @@ -23,8 +26,8 @@ #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" #include "mirror/object_reference.h" +#include "utils.h" #include "verifier/method_verifier.h" -#include <functional> namespace art { @@ -1733,7 +1736,7 @@ bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, if ((cu_->instruction_set == kThumb2) && !IsPowerOfTwo(lit)) { return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit); } - int k = LowestSetBit(lit); + int k = CTZ(lit); if (k >= 30) { // Avoid special cases. return false; @@ -1813,18 +1816,18 @@ bool Mir2Lir::HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int li RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (power_of_two) { // Shift. - OpRegRegImm(kOpLsl, rl_result.reg, rl_src.reg, LowestSetBit(lit)); + OpRegRegImm(kOpLsl, rl_result.reg, rl_src.reg, CTZ(lit)); } else if (pop_count_le2) { // Shift and add and shift. - int first_bit = LowestSetBit(lit); - int second_bit = LowestSetBit(lit ^ (1 << first_bit)); + int first_bit = CTZ(lit); + int second_bit = CTZ(lit ^ (1 << first_bit)); GenMultiplyByTwoBitMultiplier(rl_src, rl_result, lit, first_bit, second_bit); } else { // Reverse subtract: (src << (shift + 1)) - src. DCHECK(power_of_two_minus_one); - // TUNING: rsb dst, src, src lsl#LowestSetBit(lit + 1) + // TUNING: rsb dst, src, src lsl#CTZ(lit + 1) RegStorage t_reg = AllocTemp(); - OpRegRegImm(kOpLsl, t_reg, rl_src.reg, LowestSetBit(lit + 1)); + OpRegRegImm(kOpLsl, t_reg, rl_src.reg, CTZ(lit + 1)); OpRegRegReg(kOpSub, rl_result.reg, t_reg, rl_src.reg); } StoreValue(rl_dest, rl_result); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index f102881..5f8a71c 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -1482,18 +1482,6 @@ class Mir2Lir : public Backend { return cu_; } /* - * @brief Returns the index of the lowest set bit in 'x'. - * @param x Value to be examined. - * @returns The bit number of the lowest bit set in the value. - */ - int32_t LowestSetBit(uint64_t x); - /* - * @brief Is this value a power of two? - * @param x Value to be examined. - * @returns 'true' if only 1 bit is set in the value. - */ - bool IsPowerOfTwo(uint64_t x); - /* * @brief Do these SRs overlap? * @param rl_op1 One RegLocation * @param rl_op2 The other RegLocation diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc index 088bec8..ca68f95 100644 --- a/compiler/dex/quick/resource_mask.cc +++ b/compiler/dex/quick/resource_mask.cc @@ -19,6 +19,7 @@ #include "resource_mask.h" #include "utils/arena_allocator.h" +#include "utils.h" namespace art { diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index a79f299..ba9c611 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -21,6 +21,7 @@ #include "dex/reg_storage_eq.h" #include "mirror/art_method.h" #include "mirror/array-inl.h" +#include "utils.h" #include "x86_lir.h" namespace art { @@ -656,7 +657,7 @@ RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1); NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); - int shift_amount = LowestSetBit(imm); + int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); @@ -1627,7 +1628,7 @@ bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags); return true; } else if (IsPowerOfTwo(val)) { - int shift_amount = LowestSetBit(val); + int shift_amount = CTZ(val); if (!PartiallyIntersects(rl_src1, rl_dest)) { rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, @@ -2070,7 +2071,7 @@ void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); - int shift_amount = LowestSetBit(imm); + int shift_amount = CTZ(imm); OpRegImm(kOpAsr, rl_result.reg, shift_amount); if (imm < 0) { OpReg(kOpNeg, rl_result.reg); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 91426f3..4d8154e 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set) { + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new (allocator) arm::CodeGeneratorARM(graph, + isa_features.AsArmInstructionSetFeatures()); } case kArm64: { return new (allocator) arm64::CodeGeneratorARM64(graph); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2e7eca2..4205ebe 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "globals.h" #include "locations.h" @@ -84,7 +85,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set); + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features); HGraph* GetGraph() const { return graph_; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8c107f3..3b3fb64 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -2615,16 +2618,18 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; - + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - } else if (is_volatile && is_wide) { + } else if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2651,6 +2656,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2684,10 +2690,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { - // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension - // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should - // pass it around to be able to optimize. + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicStore(base, offset, value.AsRegisterPairLow<Register>(), value.AsRegisterPairHigh<Register>(), @@ -2706,7 +2709,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); @@ -2740,7 +2743,10 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2760,6 +2766,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2791,7 +2798,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), out.AsRegisterPairHigh<Register>()); @@ -2808,7 +2815,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register lo = locations->GetTemp(0).AsRegister<Register>(); Register hi = locations->GetTemp(1).AsRegister<Register>(); GenerateWideAtomicLoad(base, offset, lo, hi); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b86670d..40f4edc 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -233,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -240,6 +244,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; + const ArmInstructionSetFeatures* isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8b75cc7..18722f7 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -17,6 +17,7 @@ #include <functional> #include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -87,7 +88,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - arm::CodeGeneratorARM codegenARM(graph); + std::unique_ptr<const ArmInstructionSetFeatures> features( + ArmInstructionSetFeatures::FromCppDefines()); + arm::CodeGeneratorARM codegenARM(graph, features.get()); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -130,7 +133,7 @@ static void RunCodeOptimized(HGraph* graph, bool has_result, Expected expected) { if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { arm64::CodeGeneratorARM64 codegenARM64(graph); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 87f2b90..1a27724 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -289,7 +289,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); + CompilerDriver* compiler_driver = GetCompilerDriver(); + CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set, + *compiler_driver->GetInstructionSetFeatures()); if (codegen == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); @@ -315,7 +317,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } RunOptimizations( - graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer); + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -333,7 +335,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); return CompiledMethod::SwapAllocCompiledMethodStackMap( - GetCompilerDriver(), + compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), @@ -358,16 +360,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, std::vector<uint8_t> mapping_table; DefaultSrcMap src_mapping_table; - codegen->BuildMappingTable(&mapping_table, - GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? - &src_mapping_table : nullptr); + bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); + codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return CompiledMethod::SwapAllocCompiledMethod(GetCompilerDriver(), + return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index f8c0043..83584a2 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1372,38 +1372,6 @@ void X86Assembler::LoadDoubleConstant(XmmRegister dst, double value) { } -void X86Assembler::FloatNegate(XmmRegister f) { - static const struct { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - } float_negate_constant __attribute__((aligned(16))) = - { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; - xorps(f, Address::Absolute(reinterpret_cast<uintptr_t>(&float_negate_constant))); -} - - -void X86Assembler::DoubleNegate(XmmRegister d) { - static const struct { - uint64_t a; - uint64_t b; - } double_negate_constant __attribute__((aligned(16))) = - {0x8000000000000000LL, 0x8000000000000000LL}; - xorpd(d, Address::Absolute(reinterpret_cast<uintptr_t>(&double_negate_constant))); -} - - -void X86Assembler::DoubleAbs(XmmRegister reg) { - static const struct { - uint64_t a; - uint64_t b; - } double_abs_constant __attribute__((aligned(16))) = - {0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL}; - andpd(reg, Address::Absolute(reinterpret_cast<uintptr_t>(&double_abs_constant))); -} - - void X86Assembler::Align(int alignment, int offset) { CHECK(IsPowerOfTwo(alignment)); // Emit nop instruction until the real position is aligned. diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 6c3d131..ad07067 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -447,11 +447,6 @@ class X86Assembler FINAL : public Assembler { void LoadLongConstant(XmmRegister dst, int64_t value); void LoadDoubleConstant(XmmRegister dst, double value); - void DoubleNegate(XmmRegister d); - void FloatNegate(XmmRegister f); - - void DoubleAbs(XmmRegister reg); - void LockCmpxchgl(const Address& address, Register reg) { lock()->cmpxchgl(address, reg); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 2a6c58e..d843a72 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1768,38 +1768,6 @@ void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { } -void X86_64Assembler::FloatNegate(XmmRegister f) { - static const struct { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - } float_negate_constant __attribute__((aligned(16))) = - { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; - xorps(f, Address::Absolute(reinterpret_cast<uintptr_t>(&float_negate_constant))); -} - - -void X86_64Assembler::DoubleNegate(XmmRegister d) { - static const struct { - uint64_t a; - uint64_t b; - } double_negate_constant __attribute__((aligned(16))) = - {0x8000000000000000LL, 0x8000000000000000LL}; - xorpd(d, Address::Absolute(reinterpret_cast<uintptr_t>(&double_negate_constant))); -} - - -void X86_64Assembler::DoubleAbs(XmmRegister reg) { - static const struct { - uint64_t a; - uint64_t b; - } double_abs_constant __attribute__((aligned(16))) = - {0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL}; - andpd(reg, Address::Absolute(reinterpret_cast<uintptr_t>(&double_abs_constant))); -} - - void X86_64Assembler::Align(int alignment, int offset) { CHECK(IsPowerOfTwo(alignment)); // Emit nop instruction until the real position is aligned. diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index abf2561..ac8bc9a 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -512,11 +512,6 @@ class X86_64Assembler FINAL : public Assembler { void LoadDoubleConstant(XmmRegister dst, double value); - void DoubleNegate(XmmRegister d); - void FloatNegate(XmmRegister f); - - void DoubleAbs(XmmRegister reg); - void LockCmpxchgl(const Address& address, CpuRegister reg) { lock()->cmpxchgl(address, reg); } diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 52fd736..31e653b 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -21,6 +21,7 @@ #include <ostream> #include <sstream> +#include "arch/arm/registers_arm.h" #include "base/logging.h" #include "base/stringprintf.h" #include "thread.h" @@ -148,15 +149,15 @@ struct ThumbRegister : ArmRegister { ThumbRegister(uint16_t instruction, uint16_t at_bit) : ArmRegister((instruction >> at_bit) & 0x7) {} }; -struct Rm { - explicit Rm(uint32_t instruction) : shift((instruction >> 4) & 0xff), rm(instruction & 0xf) {} - uint32_t shift; +struct RmLslImm2 { + explicit RmLslImm2(uint32_t instr) : imm2((instr >> 4) & 0x3), rm(instr & 0xf) {} + uint32_t imm2; ArmRegister rm; }; -std::ostream& operator<<(std::ostream& os, const Rm& r) { +std::ostream& operator<<(std::ostream& os, const RmLslImm2& r) { os << r.rm; - if (r.shift != 0) { - os << "-shift-" << r.shift; // TODO + if (r.imm2 != 0) { + os << ", lsl #" << r.imm2; } return os; } @@ -397,7 +398,74 @@ static uint64_t VFPExpand64(uint32_t imm8) { uint64_t bit_a = (imm8 >> 7) & 1; uint64_t bit_b = (imm8 >> 6) & 1; uint64_t slice = imm8 & 0x3f; - return (bit_a << 31) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); + return (bit_a << 63) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); +} + +enum T2LitType { + kT2LitInvalid, + kT2LitUByte, + kT2LitSByte, + kT2LitUHalf, + kT2LitSHalf, + kT2LitUWord, + kT2LitSWord, + kT2LitHexWord, + kT2LitULong, + kT2LitSLong, + kT2LitHexLong, +}; +std::ostream& operator<<(std::ostream& os, T2LitType type) { + return os << static_cast<int>(type); +} + +void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32, + T2LitType type) { + // Literal offsets (imm32) are not required to be aligned so we may need unaligned access. + typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1))); + typedef const uint16_t unaligned_uint16_t __attribute__ ((aligned (1))); + typedef const int32_t unaligned_int32_t __attribute__ ((aligned (1))); + typedef const uint32_t unaligned_uint32_t __attribute__ ((aligned (1))); + typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1))); + typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1))); + + uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4); + uintptr_t lit_adr = U ? pc + imm32 : pc - imm32; + args << " ; "; + switch (type) { + case kT2LitUByte: + args << *reinterpret_cast<const uint8_t*>(lit_adr); + break; + case kT2LitSByte: + args << *reinterpret_cast<const int8_t*>(lit_adr); + break; + case kT2LitUHalf: + args << *reinterpret_cast<const unaligned_uint16_t*>(lit_adr); + break; + case kT2LitSHalf: + args << *reinterpret_cast<const unaligned_int16_t*>(lit_adr); + break; + case kT2LitUWord: + args << *reinterpret_cast<const unaligned_uint32_t*>(lit_adr); + break; + case kT2LitSWord: + args << *reinterpret_cast<const unaligned_int32_t*>(lit_adr); + break; + case kT2LitHexWord: + args << StringPrintf("0x%08x", *reinterpret_cast<const unaligned_uint32_t*>(lit_adr)); + break; + case kT2LitULong: + args << *reinterpret_cast<const unaligned_uint64_t*>(lit_adr); + break; + case kT2LitSLong: + args << *reinterpret_cast<const unaligned_int64_t*>(lit_adr); + break; + case kT2LitHexLong: + args << StringPrintf("0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + break; + default: + LOG(FATAL) << "Invalid type: " << type; + break; + } } size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) { @@ -756,10 +824,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-") << (imm8 << 2) << "]"; if (Rn.r == 15 && U == 1) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2); - typedef const int64_t unaligned_int64_t __attribute__ ((aligned (2))); - args << StringPrintf(" ; 0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong); } } else if (Rn.r == 13 && W == 1 && U == L) { // VPUSH/VPOP opcode << (L == 1 ? "vpop" : "vpush"); @@ -1227,164 +1292,141 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) break; case 3: switch (op2) { - case 0x00: case 0x02: case 0x04: case 0x06: // 000xxx0 - case 0x08: case 0x09: case 0x0A: case 0x0C: case 0x0E: { - // Store single data item - // |111|11|100|000|0|0000|1111|110000|000000| - // |5 3|21|098|765|4|3 0|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |332|22|222|222|2|1111|1111|110000|000000| - // |1 9|87|654|321|0|9 6|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |111|11|000|op3|0| | | op4 | | - uint32_t op3 = (instr >> 21) & 7; - // uint32_t op4 = (instr >> 6) & 0x3F; - switch (op3) { - case 0x0: case 0x4: { - // {ST,LD}RB Rt,[Rn,#+/-imm12] - 111 11 00 0 1 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,#+/-imm8] - 111 11 00 0 0 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 00 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << (HasBitSet(instr, 20) ? "ldrb" : "strb"); - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x1: case 0x5: { - // STRH Rt,[Rn,#+/-imm12] - 111 11 00 0 1 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,#+/-imm8] - 111 11 00 0 0 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 01 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << "strh"; - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x2: case 0x6: { - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 2) { - if ((instr & 0x800) != 0) { - // STR Rt, [Rn, #imm8] - 111 11 000 010 0 nnnn tttt 1PUWiiiiiiii - uint32_t P = (instr >> 10) & 1; - uint32_t U = (instr >> 9) & 1; - uint32_t W = (instr >> 8) & 1; - uint32_t imm8 = instr & 0xFF; - int32_t imm32 = (imm8 << 24) >> 24; // sign-extend imm8 - if (Rn.r == 13 && P == 1 && U == 0 && W == 1 && imm32 == 4) { - opcode << "push"; - args << "{" << Rt << "}"; - } else if (Rn.r == 15 || (P == 0 && W == 0)) { - opcode << "UNDEFINED"; - } else { - if (P == 1 && U == 1 && W == 0) { - opcode << "strt"; - } else { - opcode << "str"; - } - args << Rt << ", [" << Rn; - if (P == 0 && W == 1) { - args << "], #" << imm32; - } else { - args << ", #" << imm32 << "]"; - if (W == 1) { - args << "!"; - } - } - } - } else { - // STR Rt, [Rn, Rm, LSL #imm2] - 111 11 000 010 0 nnnn tttt 000000iimmmm - ArmRegister Rm(instr, 0); - uint32_t imm2 = (instr >> 4) & 3; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } - } else if (op3 == 6) { - // STR.W Rt, [Rn, #imm12] - 111 11 000 110 0 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - } - break; - } - } - + case 0x07: case 0x0F: case 0x17: case 0x1F: { // Explicitly UNDEFINED, A6.3. + opcode << "UNDEFINED"; + break; + } + case 0x06: case 0x0E: { // "Store single data item" undefined opcodes, A6.3.10. + opcode << "UNDEFINED [store]"; + break; + } + case 0x15: case 0x1D: { // "Load word" undefined opcodes, A6.3.7. + opcode << "UNDEFINED [load]"; break; } - case 0x03: case 0x0B: case 0x11: case 0x13: case 0x19: case 0x1B: { // 00xx011 - // Load byte/halfword - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|01|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; + case 0x10: case 0x12: case 0x14: case 0x16: case 0x18: case 0x1A: case 0x1C: case 0x1E: { + opcode << "UNKNOWN " << op2 << " [SIMD]"; + break; + } + case 0x01: case 0x00: case 0x09: case 0x08: // {LD,ST}RB{,T} + case 0x03: case 0x02: case 0x0B: case 0x0A: // {LD,ST}RH{,T} + case 0x05: case 0x04: case 0x0D: case 0x0C: // {LD,ST}R{,T} + case 0x11: case 0x19: // LDRSB{,T} (no signed store) + case 0x13: case 0x1B: { // LDRSH{,T} (no signed store) + // Load: + // (Store is the same except that l==0 and always s==0 below.) + // 00s.whl (sign, word, half, load) + // LDR{S}B imm12: 11111|00s1001| Rn | Rt |imm12 (0x09) + // LDR{S}B imm8: 11111|00s0001| Rn | Rt |1PUW|imm8 (0x01) + // LDR{S}BT imm8: 11111|00s0001| Rn | Rt |1110|imm8 (0x01) + // LDR{S}B lit: 11111|00sU001|1111| Rt |imm12 (0x01/0x09) + // LDR{S}B reg: 11111|00s0001| Rn | Rt |000000|imm2| Rm (0x01) + // LDR{S}H imm12: 11111|00s1011| Rn | Rt |imm12 (0x0B) + // LDR{S}H imm8: 11111|00s0011| Rn | Rt |1PUW|imm8 (0x03) + // LDR{S}HT imm8: 11111|00s0011| Rn | Rt |1110|imm8 (0x03) + // LDR{S}H lit: 11111|00sU011|1111| Rt |imm12 (0x03/0x0B) + // LDR{S}H reg: 11111|00s0011| Rn | Rt |000000|imm2| Rm (0x03) + // LDR imm12: 11111|0001101| Rn | Rt |imm12 (0x0D) + // LDR imm8: 11111|0000101| Rn | Rt |1PUW|imm8 (0x05) + // LDRT imm8: 11111|0000101| Rn | Rt |1110|imm8 (0x05) + // LDR lit: 11111|000U101|1111| Rt |imm12 (0x05/0x0D) + // LDR reg: 11111|0000101| Rn | Rt |000000|imm2| Rm (0x05) + // + // If Rt == 15, instead of load we have preload: + // PLD{W} imm12: 11111|00010W1| Rn |1111|imm12 (0x09/0x0B) + // PLD{W} imm8: 11111|00000W1| Rn |1111|1100|imm8 (0x01/0x03); -imm8 + // PLD lit: 11111|000U001|1111|1111|imm12 (0x01/0x09) + // PLD{W} reg: 11111|00000W1| Rn |1111|000000|imm2| Rm (0x01/0x03) + // PLI imm12: 11111|0011001| Rn |1111|imm12 (0x19) + // PLI imm8: 11111|0010001| Rn |1111|1100|imm8 (0x11); -imm8 + // PLI lit: 11111|001U001|1111|1111|imm12 (0x01/0x09) + // PLI reg: 11111|0010001| Rn |1111|000000|imm2| Rm (0x01/0x03) + + bool is_load = HasBitSet(instr, 20); + bool is_half = HasBitSet(instr, 21); // W for PLD/PLDW. + bool is_word = HasBitSet(instr, 22); + bool is_signed = HasBitSet(instr, 24); ArmRegister Rn(instr, 16); ArmRegister Rt(instr, 12); - if (Rt.r != 15) { - if (op3 == 1) { - // LDRH.W Rt, [Rn, #imm12] - 111 11 00 01 011 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldrh.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op3 == 3) { - // LDRSH.W Rt, [Rn, #imm12] - 111 11 00 11 011 nnnn tttt iiiiiiiiiiii - // LDRSB.W Rt, [Rn, #imm12] - 111 11 00 11 001 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << (HasBitSet(instr, 20) ? "ldrsb.w" : "ldrsh.w"); - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); + uint32_t imm12 = instr & 0xFFF; + uint32_t U = (instr >> 23) & 1; // U for imm12 + uint32_t imm8 = instr & 0xFF; + uint32_t op4 = (instr >> 8) & 0xF; // 1PUW for imm8 + if (Rt.r == PC && is_load && !is_word) { + // PLD, PLDW, PLI + const char* pld_pli = (is_signed ? "pli" : "pld"); + const char* w = (is_half ? "w" : ""); + if (is_signed && !is_half) { + opcode << "UNDEFINED [PLI+W]"; + } else if (Rn.r == PC || U != 0u) { + opcode << pld_pli << w; + args << "[" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == PC && is_half) { + args << " (UNPREDICTABLE)"; } + } else if ((instr & 0xFC0) == 0) { + opcode << pld_pli << w; + RmLslImm2 Rm(instr); + args << "[" << Rn << ", " << Rm << "]"; + } else if (op4 == 0xC) { + opcode << pld_pli << w; + args << "[" << Rn << ", #-" << imm8 << "]"; + } else { + opcode << "UNDEFINED [~" << pld_pli << "]"; } + break; + } + const char* ldr_str = is_load ? "ldr" : "str"; + const char* sign = is_signed ? "s" : ""; + const char* type = is_word ? "" : is_half ? "h" : "b"; + bool unpred = (Rt.r == SP && !is_word) || (Rt.r == PC && !is_load); + if (Rn.r == PC && !is_load) { + opcode << "UNDEFINED [STR-lit]"; + unpred = false; + } else if (Rn.r == PC || U != 0u) { + // Load/store with imm12 (load literal if Rn.r == PC; there's no store literal). + opcode << ldr_str << sign << type << ".w"; + args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == TR && is_load) { + args << " ; "; + Thread::DumpThreadOffset<4>(args, imm12); + } else if (Rn.r == PC) { + T2LitType lit_type[] = { + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + }; + DCHECK_LT(op2 >> 1, arraysize(lit_type)); + DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid); + DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]); + } + } else if ((instr & 0xFC0) == 0) { + opcode << ldr_str << sign << type << ".w"; + RmLslImm2 Rm(instr); + args << Rt << ", [" << Rn << ", " << Rm << "]"; + unpred = unpred || (Rm.rm.r == SP) || (Rm.rm.r == PC); + } else if (is_word && Rn.r == SP && imm8 == 4 && op4 == (is_load ? 0xB : 0xD)) { + opcode << (is_load ? "pop" : "push") << ".w"; + args << Rn; + unpred = unpred || (Rn.r == SP); + } else if ((op4 & 5) == 0) { + opcode << "UNDEFINED [P = W = 0 for " << ldr_str << "]"; + unpred = false; + } else { + uint32_t P = (instr >> 10) & 1; + U = (instr >> 9) & 1; + uint32_t W = (instr >> 8) & 1; + bool pre_index = (P != 0 && W == 1); + bool post_index = (P == 0 && W == 1); + const char* t = (P != 0 && U != 0 && W == 0) ? "t" : ""; // Unprivileged load/store? + opcode << ldr_str << sign << type << t << ".w"; + args << Rt << ", [" << Rn << (post_index ? "]" : "") << ", #" << (U != 0 ? "" : "-") + << imm8 << (post_index ? "" : "]") << (pre_index ? "!" : ""); + unpred = (W != 0 && Rn.r == Rt.r); + } + if (unpred) { + args << " (UNPREDICTABLE)"; } break; } @@ -1413,75 +1455,6 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } // else unknown instruction break; } - case 0x05: case 0x0D: case 0x15: case 0x1D: { // 00xx101 - // Load word - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|10|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; - uint32_t op4 = (instr >> 6) & 0x3F; - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 1 || Rn.r == 15) { - // LDR.W Rt, [Rn, #imm12] - 111 11 00 00 101 nnnn tttt iiiiiiiiiiii - // LDR.W Rt, [PC, #imm12] - 111 11 00 0x 101 1111 tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op4 == 0) { - // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm - uint32_t imm2 = (instr >> 4) & 0xF; - ArmRegister rm(instr, 0); - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", " << rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } else { - bool p = (instr & (1 << 10)) != 0; - bool w = (instr & (1 << 8)) != 0; - bool u = (instr & (1 << 9)) != 0; - if (p && u && !w) { - // LDRT Rt, [Rn, #imm8] - 111 11 00 00 101 nnnn tttt 1110iiiiiiii - uint32_t imm8 = instr & 0xFF; - opcode << "ldrt"; - args << Rt << ", [" << Rn << ", #" << imm8 << "]"; - } else if (Rn.r == 13 && !p && u && w && (instr & 0xff) == 4) { - // POP - opcode << "pop"; - args << "{" << Rt << "}"; - } else { - bool wback = !p || w; - uint32_t offset = (instr & 0xff); - opcode << "ldr.w"; - args << Rt << ","; - if (p && !wback) { - args << "[" << Rn << ", #" << offset << "]"; - } else if (p && wback) { - args << "[" << Rn << ", #" << offset << "]!"; - } else if (!p && wback) { - args << "[" << Rn << "], #" << offset; - } else { - LOG(FATAL) << p << " " << w; - } - } - } - break; - } default: // more formats if ((op2 >> 4) == 2) { // 010xxxx // data processing (register) @@ -1808,6 +1781,23 @@ size_t DisassemblerArm::DumpThumb16(std::ostream& os, const uint8_t* instr_ptr) DumpBranchTarget(args, instr_ptr + 4, imm32); break; } + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: { + opcode << "push"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 6)); + break; + } + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: { + opcode << "pop"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 7)); + break; + } + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: { + opcode << "bkpt"; + args << "#" << (instr & 0xFF); + break; + } case 0x50: case 0x51: // 101000x case 0x52: case 0x53: // 101001x case 0x56: case 0x57: { // 101011x diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 556f2f8..5f5d3f7 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -3192,7 +3192,7 @@ static bool IsMethodPossiblyInlined(Thread* self, mirror::ArtMethod* m) Handle<mirror::ArtMethod> method(hs.NewHandle(m)); verifier::MethodVerifier verifier(self, dex_cache->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), code_item, m->GetDexMethodIndex(), method, - m->GetAccessFlags(), false, true, false); + m->GetAccessFlags(), false, true, false, true); // Note: we don't need to verify the method. return InlineMethodAnalyser::AnalyseMethodCode(&verifier, nullptr); } diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 1ef5221..ef63080 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -1001,14 +1001,9 @@ void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::O // the locks held in this stack frame. std::vector<uint32_t> monitor_enter_dex_pcs; verifier::MethodVerifier::FindLocksAtDexPc(m, dex_pc, &monitor_enter_dex_pcs); - if (monitor_enter_dex_pcs.empty()) { - return; - } - - for (size_t i = 0; i < monitor_enter_dex_pcs.size(); ++i) { + for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) { // The verifier works in terms of the dex pcs of the monitor-enter instructions. // We want the registers used by those instructions (so we can read the values out of them). - uint32_t monitor_dex_pc = monitor_enter_dex_pcs[i]; uint16_t monitor_enter_instruction = code_item->insns_[monitor_dex_pc]; // Quick sanity check. @@ -1018,8 +1013,8 @@ void Monitor::VisitLocks(StackVisitor* stack_visitor, void (*callback)(mirror::O } uint16_t monitor_register = ((monitor_enter_instruction >> 8) & 0xff); - mirror::Object* o = reinterpret_cast<mirror::Object*>(stack_visitor->GetVReg(m, monitor_register, - kReferenceVReg)); + mirror::Object* o = reinterpret_cast<mirror::Object*>( + stack_visitor->GetVReg(m, monitor_register, kReferenceVReg)); callback(o, callback_context); } } diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index f37312e..44c6d87 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -301,7 +301,10 @@ static jbyte IsDexOptNeededForFile(const std::string& oat_filename, const char* nullptr, false, &error_msg)); if (oat_file.get() == nullptr) { - if (kReasonLogging) { + // Note that even though this is kDexoptNeeded, we use + // kVerboseLogging instead of the usual kReasonLogging since it is + // the common case on first boot and very spammy. + if (kVerboseLogging) { LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << oat_filename << "' for file location '" << filename << "': " << error_msg; } diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 90c9fe7..3517848 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -214,7 +214,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { Handle<mirror::ArtMethod> h_method(hs.NewHandle(m)); verifier::MethodVerifier verifier(self_, h_dex_cache->GetDexFile(), h_dex_cache, h_class_loader, &m->GetClassDef(), code_item, m->GetDexMethodIndex(), - h_method, m->GetAccessFlags(), false, true, true); + h_method, m->GetAccessFlags(), false, true, true, true); verifier.Verify(); const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc)); for (uint16_t reg = 0; reg < num_regs; ++reg) { diff --git a/runtime/thread.cc b/runtime/thread.cc index 5ff7490..d2d5be7 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -932,7 +932,10 @@ struct StackDumpVisitor : public StackVisitor { os << StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)", reinterpret_cast<intptr_t>(o), PrettyTypeOf(o).c_str()); } else { - os << StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), PrettyTypeOf(o).c_str()); + // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So + // we get the pretty type beofre we call IdentityHashCode. + const std::string pretty_type(PrettyTypeOf(o)); + os << StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str()); } } os << "\n"; @@ -1339,7 +1342,6 @@ void Thread::HandleScopeVisitRoots(RootCallback* visitor, void* arg, uint32_t th } mirror::Object* Thread::DecodeJObject(jobject obj) const { - Locks::mutator_lock_->AssertSharedHeld(this); if (obj == nullptr) { return nullptr; } diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 66846b5..88944d7 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -286,7 +286,7 @@ MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self, uint32_t MethodVerifier verifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx, method, method_access_flags, true, allow_soft_failures, - need_precise_constants); + need_precise_constants, true); if (verifier.Verify()) { // Verification completed, however failures may be pending that didn't cause the verification // to hard fail. @@ -352,7 +352,8 @@ MethodVerifier::MethodVerifier(Thread* self, const DexFile::CodeItem* code_item, uint32_t dex_method_idx, Handle<mirror::ArtMethod> method, uint32_t method_access_flags, bool can_load_classes, bool allow_soft_failures, - bool need_precise_constants, bool verify_to_dump) + bool need_precise_constants, bool verify_to_dump, + bool allow_thread_suspension) : self_(self), reg_types_(can_load_classes), work_insn_idx_(-1), @@ -377,7 +378,8 @@ MethodVerifier::MethodVerifier(Thread* self, need_precise_constants_(need_precise_constants), has_check_casts_(false), has_virtual_or_interface_invokes_(false), - verify_to_dump_(verify_to_dump) { + verify_to_dump_(verify_to_dump), + allow_thread_suspension_(allow_thread_suspension) { Runtime::Current()->AddMethodVerifier(this); DCHECK(class_def != nullptr); } @@ -396,7 +398,7 @@ void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc, Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - false, true, false); + false, true, false, false); verifier.interesting_dex_pc_ = dex_pc; verifier.monitor_enter_dex_pcs_ = monitor_enter_dex_pcs; verifier.FindLocksAtDexPc(); @@ -443,7 +445,7 @@ mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m, Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - true, true, false); + true, true, false, true); return verifier.FindAccessedFieldAtDexPc(dex_pc); } @@ -475,7 +477,7 @@ mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m Handle<mirror::ArtMethod> method(hs.NewHandle(m)); MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), method, m->GetAccessFlags(), - true, true, false); + true, true, false, true); return verifier.FindInvokedMethodAtDexPc(dex_pc); } @@ -1402,7 +1404,9 @@ bool MethodVerifier::CodeFlowVerifyMethod() { /* Continue until no instructions are marked "changed". */ while (true) { - self_->AllowThreadSuspension(); + if (allow_thread_suspension_) { + self_->AllowThreadSuspension(); + } // Find the first marked one. Use "start_guess" as a way to find one quickly. uint32_t insn_idx = start_guess; for (; insn_idx < insns_size; insn_idx++) { diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h index 15a09c5..b83e647 100644 --- a/runtime/verifier/method_verifier.h +++ b/runtime/verifier/method_verifier.h @@ -207,10 +207,11 @@ class MethodVerifier { const DexFile::CodeItem* code_item, uint32_t method_idx, Handle<mirror::ArtMethod> method, uint32_t access_flags, bool can_load_classes, bool allow_soft_failures, - bool need_precise_constants) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + bool need_precise_constants, bool allow_thread_suspension) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : MethodVerifier(self, dex_file, dex_cache, class_loader, class_def, code_item, method_idx, method, access_flags, can_load_classes, allow_soft_failures, - need_precise_constants, false) {} + need_precise_constants, false, allow_thread_suspension) {} ~MethodVerifier(); @@ -260,7 +261,7 @@ class MethodVerifier { const DexFile::CodeItem* code_item, uint32_t method_idx, Handle<mirror::ArtMethod> method, uint32_t access_flags, bool can_load_classes, bool allow_soft_failures, bool need_precise_constants, - bool verify_to_dump) + bool verify_to_dump, bool allow_thread_suspension) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Adds the given string to the beginning of the last failure message. @@ -729,6 +730,11 @@ class MethodVerifier { // VerifyMethodAndDump. const bool verify_to_dump_; + // Whether or not we call AllowThreadSuspension periodically, we want a way to disable this for + // thread dumping checkpoints since we may get thread suspension at an inopportune time due to + // FindLocksAtDexPC, resulting in deadlocks. + const bool allow_thread_suspension_; + DISALLOW_COPY_AND_ASSIGN(MethodVerifier); }; std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs); diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc index 601e321..2eb518c 100644 --- a/sigchainlib/sigchain.cc +++ b/sigchainlib/sigchain.cc @@ -170,12 +170,13 @@ extern "C" int sigaction(int signal, const struct sigaction* new_action, struct // Note that we check that the signal number is in range here. An out of range signal // number should behave exactly as the libc sigaction. if (signal > 0 && signal < _NSIG && user_sigactions[signal].IsClaimed()) { - if (old_action != NULL) { - *old_action = user_sigactions[signal].GetAction(); - } + struct sigaction saved_action = user_sigactions[signal].GetAction(); if (new_action != NULL) { user_sigactions[signal].SetAction(*new_action, false); } + if (old_action != NULL) { + *old_action = saved_action; + } return 0; } diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index d7aede3..6cb08f4 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -12,4 +12,5 @@ FloatIntConstPassing b/18718277 b/18800943 (1) b/18800943 (2) +MoveExc Done! diff --git a/test/800-smali/smali/move_exc.smali b/test/800-smali/smali/move_exc.smali new file mode 100644 index 0000000..4ade4bc --- /dev/null +++ b/test/800-smali/smali/move_exc.smali @@ -0,0 +1,29 @@ +.class public LMoveExc; +.super Ljava/lang/Object; + + +.method public constructor <init>()V +.registers 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + return-void +.end method + +.method public static run()V +.registers 6 +:Label1 + const v1, 15 + const v2, 0 + div-int v0, v1, v2 + +:Label2 + goto :Label4 + +:Label3 + move-exception v3 + throw v3 + +:Label4 + return-void + +.catchall {:Label1 .. :Label2} :Label3 +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index ea25da6..2eda850 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -68,6 +68,7 @@ public class Main { testCases.add(new TestCase("b/18718277", "B18718277", "getInt", null, null, 0)); testCases.add(new TestCase("b/18800943 (1)", "B18800943_1", "n_a", null, new VerifyError(), 0)); testCases.add(new TestCase("b/18800943 (2)", "B18800943_2", "n_a", null, new VerifyError(), 0)); + testCases.add(new TestCase("MoveExc", "MoveExc", "run", null, new ArithmeticException(), null)); } public void runTests() { |