diff options
-rw-r--r-- | courgette/courgette.gyp | 1 | ||||
-rw-r--r-- | courgette/disassembler.h | 4 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.cc | 59 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.h | 55 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_arm.cc | 39 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_arm.h | 16 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86.cc | 9 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86.h | 11 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86_unittest.cc | 23 | ||||
-rw-r--r-- | courgette/typedrva_unittest.cc | 92 |
10 files changed, 276 insertions, 33 deletions
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp index f4d0365..d6d4c15 100644 --- a/courgette/courgette.gyp +++ b/courgette/courgette.gyp @@ -104,6 +104,7 @@ 'ensemble_unittest.cc', 'run_all_unittests.cc', 'streams_unittest.cc', + 'typedrva_unittest.cc', 'versioning_unittest.cc', 'third_party/paged_array_unittest.cc' ], diff --git a/courgette/disassembler.h b/courgette/disassembler.h index 2de67fd..8f6deb1 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -67,6 +67,10 @@ class Disassembler { return *reinterpret_cast<const uint32*>(address); } + static uint16 Read16LittleEndian(const void* address) { + return *reinterpret_cast<const uint16*>(address); + } + // Reduce the length of the image in memory. Does not actually free // (or realloc) any memory. Usually only called via ParseHeader() void ReduceLength(size_t reduced_length); diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc index 91ccd45..1033fd2 100644 --- a/courgette/disassembler_elf_32.cc +++ b/courgette/disassembler_elf_32.cc @@ -10,6 +10,7 @@ #include "base/basictypes.h" #include "base/logging.h" +#include "base/memory/scoped_vector.h" #include "courgette/assembly_program.h" #include "courgette/courgette.h" @@ -203,7 +204,7 @@ RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const { } CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas, - std::vector<size_t>* offsets) { + std::vector<size_t>* offsets) { offsets->clear(); for (std::vector<RVA>::iterator rva = rvas->begin(); @@ -221,24 +222,39 @@ CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas, return true; } +CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) { + for (ScopedVector<TypedRVA>::iterator rva = rvas->begin(); + rva != rvas->end(); + rva++) { + + size_t offset; + + if (!RVAToFileOffset((*rva)->rva(), &offset)) + return false; + + (*rva)->set_offset(offset); + } + + return true; +} + CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. uint32 file_offset = 0; std::vector<size_t> abs_offsets; - std::vector<size_t> rel_offsets; if (!RVAsToOffsets(&abs32_locations_, &abs_offsets)) return false; - if (!RVAsToOffsets(&rel32_locations_, &rel_offsets)) + if (!RVAsToOffsets(&rel32_locations_)) return false; std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin(); - std::vector<size_t>::iterator current_rel_offset = rel_offsets.begin(); + ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin(); std::vector<size_t>::iterator end_abs_offset = abs_offsets.end(); - std::vector<size_t>::iterator end_rel_offset = rel_offsets.end(); + ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end(); for (int section_id = 0; section_id < SectionHeaderCount(); @@ -261,7 +277,7 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { case SHT_PROGBITS: if (!ParseProgbitsSection(section_header, ¤t_abs_offset, end_abs_offset, - ¤t_rel_offset, end_rel_offset, + ¤t_rel, end_rel, program)) return false; file_offset = section_header->sh_offset + section_header->sh_size; @@ -306,8 +322,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( const Elf32_Shdr *section_header, std::vector<size_t>::iterator* current_abs_offset, std::vector<size_t>::iterator end_abs_offset, - std::vector<size_t>::iterator* current_rel_offset, - std::vector<size_t>::iterator end_rel_offset, + ScopedVector<TypedRVA>::iterator* current_rel, + ScopedVector<TypedRVA>::iterator end_rel, AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. @@ -325,9 +341,9 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( file_offset > **current_abs_offset) return false; - while (*current_rel_offset != end_rel_offset && - file_offset > **current_rel_offset) { - (*current_rel_offset)++; + while (*current_rel != end_rel && + file_offset > (**current_rel)->get_offset()) { + (*current_rel)++; } size_t next_relocation = section_end; @@ -339,9 +355,9 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( // Rel offsets are heuristically derived, and might (incorrectly) overlap // an Abs value, or the end of the section, so +3 to make sure there is // room for the full 4 byte value. - if (*current_rel_offset != end_rel_offset && - next_relocation > (**current_rel_offset + 3)) - next_relocation = **current_rel_offset; + if (*current_rel != end_rel && + next_relocation > ((**current_rel)->get_offset() + 3)) + next_relocation = (**current_rel)->get_offset(); if (next_relocation > file_offset) { if (!ParseSimpleRegion(file_offset, next_relocation, program)) @@ -364,20 +380,19 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( continue; } - if (*current_rel_offset != end_rel_offset && - file_offset == **current_rel_offset) { + if (*current_rel != end_rel && + file_offset == (**current_rel)->get_offset()) { - const uint8* p = OffsetToPointer(file_offset); - uint32 relative_target = Read32LittleEndian(p); + uint32 relative_target = (**current_rel)->relative_target(); // This cast is for 64 bit systems, and is only safe because we // are working on 32 bit executables. RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + - 4 + relative_target); + relative_target); if (!program->EmitRel32(program->FindOrMakeRel32Label(target_rva))) return false; file_offset += sizeof(RVA); - (*current_rel_offset)++; + (*current_rel)++; continue; } } @@ -482,7 +497,9 @@ CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() { return false; } - std::sort(rel32_locations_.begin(), rel32_locations_.end()); + std::sort(rel32_locations_.begin(), + rel32_locations_.end(), + TypedRVA::IsLessThan); return true; } diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h index 3f63c43..b3f6e59 100644 --- a/courgette/disassembler_elf_32.h +++ b/courgette/disassembler_elf_32.h @@ -6,6 +6,7 @@ #define COURGETTE_DISASSEMBLER_ELF_32_H_ #include "base/basictypes.h" +#include "base/memory/scoped_vector.h" #include "courgette/disassembler.h" #include "courgette/memory_allocator.h" #include "courgette/types_elf.h" @@ -24,8 +25,52 @@ class AssemblyProgram; // architecture's machine code. class DisassemblerElf32 : public Disassembler { public: + // Different instructions encode the target rva differently. This + // class encapsulates this behavior. public for use in unit tests. + class TypedRVA { + public: + explicit TypedRVA(RVA rva) : rva_(rva), offset_(-1) { + } + + virtual ~TypedRVA() { }; + + RVA rva() { + return rva_; + } + + RVA relative_target() { + return relative_target_; + } + + void set_relative_target(RVA relative_target) { + relative_target_ = relative_target; + } + + size_t get_offset() { + return offset_; + } + + void set_offset(size_t offset) { + offset_ = offset; + } + + virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer) = 0; + + static bool IsLessThan(TypedRVA *a, TypedRVA *b) { + return a->rva() < b->rva(); + } + + private: + const RVA rva_; + RVA relative_target_; + size_t offset_; + }; + + public: explicit DisassemblerElf32(const void* start, size_t length); + virtual ~DisassemblerElf32() { }; + virtual ExecutableType kind() = 0; virtual e_machine_values ElfEM() = 0; @@ -39,7 +84,7 @@ class DisassemblerElf32 : public Disassembler { // Public for unittests only std::vector<RVA> &Abs32Locations() { return abs32_locations_; } - std::vector<RVA> &Rel32Locations() { return rel32_locations_; } + ScopedVector<TypedRVA> &Rel32Locations() { return rel32_locations_; } protected: @@ -111,6 +156,8 @@ class DisassemblerElf32 : public Disassembler { CheckBool RVAsToOffsets(std::vector<RVA>* rvas /*in*/, std::vector<size_t>* offsets /*out*/); + CheckBool RVAsToOffsets(ScopedVector<TypedRVA>* rvas /*in and out*/); + // Parsing Code used to really implement Disassemble CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; @@ -121,8 +168,8 @@ class DisassemblerElf32 : public Disassembler { const Elf32_Shdr *section_header, std::vector<size_t>::iterator* current_abs_offset, std::vector<size_t>::iterator end_abs_offset, - std::vector<size_t>::iterator* current_rel_offset, - std::vector<size_t>::iterator end_rel_offset, + ScopedVector<TypedRVA>::iterator* current_rel, + ScopedVector<TypedRVA>::iterator end_rel, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseSimpleRegion(size_t start_file_offset, size_t end_file_offset, @@ -145,7 +192,7 @@ class DisassemblerElf32 : public Disassembler { const char *default_string_section_; std::vector<RVA> abs32_locations_; - std::vector<RVA> rel32_locations_; + ScopedVector<TypedRVA> rel32_locations_; DISALLOW_COPY_AND_ASSIGN(DisassemblerElf32); }; diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc index f271020..6270c64 100644 --- a/courgette/disassembler_elf_32_arm.cc +++ b/courgette/disassembler_elf_32_arm.cc @@ -17,6 +17,45 @@ namespace courgette { +CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget( + const uint8* op_pointer) { + uint32 temp = 0; + + switch (type_) { + case ARM_OFF24: + // The offset is given by the lower 24-bits of the op, shifted + // left 2 bits, and sign extended. + temp = Read32LittleEndian(op_pointer); + temp = (temp & 0x00FFFFFF) << 2; + if (temp & 0x02000000) + temp |= 0xFC000000; + temp += 8; + break; + case ARM_OFF8: + // The offset is given by lower 8 bits of the op. It is a 9-bit + // offset, shifted right one bit and signed extended. + temp = (Read16LittleEndian(op_pointer) & 0x00FF) << 1; + if (temp & 0x0100) + temp |= 0xFFFFFE00; + temp += 4; // Offset from _next_ PC. + break; + case ARM_OFF11: + // The offset is given by lower 11 bits of the op, and is a + // 12-bit offset, shifted right one bit and sign extended. + temp = (Read16LittleEndian(op_pointer) & 0x07FF) << 1; + if (temp & 0x00000800) + temp |= 0xFFFFF000; + temp += 4; // Offset from _next_ PC. + break; + default: + return false; + } + + set_relative_target(temp); + + return true; +} + DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length) : DisassemblerElf32(start, length) { } diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h index dcbc46e..57e750e 100644 --- a/courgette/disassembler_elf_32_arm.h +++ b/courgette/disassembler_elf_32_arm.h @@ -14,8 +14,24 @@ namespace courgette { class AssemblyProgram; +enum ARM_RVA { + ARM_OFF8, + ARM_OFF11, + ARM_OFF24, +}; + class DisassemblerElf32ARM : public DisassemblerElf32 { public: + class TypedRVAARM : public TypedRVA { + public: + TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { } + + virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer) OVERRIDE; + + private: + ARM_RVA type_; + }; + explicit DisassemblerElf32ARM(const void* start, size_t length); virtual ExecutableType kind() { return EXE_ELF_32_ARM; } diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc index 00bb650..bfd8e83 100644 --- a/courgette/disassembler_elf_32_x86.cc +++ b/courgette/disassembler_elf_32_x86.cc @@ -150,9 +150,14 @@ CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( } } if (rel32) { - RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); + RVA rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); + TypedRVAX86* rel32_rva = new TypedRVAX86(rva); - RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); + if (!rel32_rva->ComputeRelativeTarget(rel32)) { + return false; + } + + RVA target_rva = rel32_rva->rva() + rel32_rva->relative_target(); // To be valid, rel32 target must be within image, and within this // section. if (IsValidRVA(target_rva)) { diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h index 28de7cf..5e7cdff 100644 --- a/courgette/disassembler_elf_32_x86.h +++ b/courgette/disassembler_elf_32_x86.h @@ -16,6 +16,17 @@ class AssemblyProgram; class DisassemblerElf32X86 : public DisassemblerElf32 { public: + class TypedRVAX86 : public TypedRVA { + public: + explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { + } + + virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer) OVERRIDE { + set_relative_target(Read32LittleEndian(op_pointer) + 4); + return true; + } + }; + explicit DisassemblerElf32X86(const void* start, size_t length); virtual ExecutableType kind() { return EXE_ELF_32_X86; } diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc index 2624985..297ffcc 100644 --- a/courgette/disassembler_elf_32_x86_unittest.cc +++ b/courgette/disassembler_elf_32_x86_unittest.cc @@ -48,20 +48,31 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name, // Prove that none of the rel32 RVAs overlap with abs32 RVAs std::set<courgette::RVA> abs(disassembler->Abs32Locations().begin(), disassembler->Abs32Locations().end()); - std::set<courgette::RVA> rel(disassembler->Rel32Locations().begin(), - disassembler->Rel32Locations().end()); - for (std::vector<courgette::RVA>::iterator rel32 = - disassembler->Rel32Locations().begin(); + std::set<courgette::DisassemblerElf32::TypedRVA*> + rel(disassembler->Rel32Locations().begin(), + disassembler->Rel32Locations().end()); + for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator + rel32 = disassembler->Rel32Locations().begin(); rel32 != disassembler->Rel32Locations().end(); rel32++) { - EXPECT_TRUE(abs.find(*rel32) == abs.end()); + EXPECT_TRUE(abs.find((*rel32)->rva()) == abs.end()); } for (std::vector<courgette::RVA>::iterator abs32 = disassembler->Abs32Locations().begin(); abs32 != disassembler->Abs32Locations().end(); abs32++) { - EXPECT_TRUE(rel.find(*abs32) == rel.end()); + bool found = false; + for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator + rel32 = disassembler->Rel32Locations().begin(); + rel32 != disassembler->Rel32Locations().end(); + rel32++) { + if (*abs32 == (*rel32)->rva()) { + found = true; + break; + } + } + EXPECT_TRUE(!found); } delete program; } diff --git a/courgette/typedrva_unittest.cc b/courgette/typedrva_unittest.cc new file mode 100644 index 0000000..780c392 --- /dev/null +++ b/courgette/typedrva_unittest.cc @@ -0,0 +1,92 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/base_test_unittest.h" +#include "courgette/disassembler_elf_32_arm.h" +#include "courgette/disassembler_elf_32_x86.h" + +class TypedRVATest : public BaseTest { + public: + void TestRelativeTargetX86(courgette::RVA word, courgette::RVA expected) + const; + + void TestRelativeTargetARM(courgette::ARM_RVA arm_rva, + courgette::RVA rva, + uint32 op, + courgette::RVA expected) const; +}; + +void TypedRVATest::TestRelativeTargetX86(courgette::RVA word, + courgette::RVA expected) const { + courgette::DisassemblerElf32X86::TypedRVAX86* typed_rva + = new courgette::DisassemblerElf32X86::TypedRVAX86(0); + const uint8* op_pointer = reinterpret_cast<const uint8*>(&word); + + EXPECT_TRUE(typed_rva->ComputeRelativeTarget(op_pointer)); + EXPECT_EQ(typed_rva->relative_target(), expected); +} + +uint32 Read32LittleEndian(const void* address) { + return *reinterpret_cast<const uint32*>(address); +} + +void TypedRVATest::TestRelativeTargetARM(courgette::ARM_RVA arm_rva, + courgette::RVA rva, + uint32 op, + courgette::RVA expected) const { + courgette::DisassemblerElf32ARM::TypedRVAARM* typed_rva + = new courgette::DisassemblerElf32ARM::TypedRVAARM(arm_rva, 0); + uint8* op_pointer = reinterpret_cast<uint8*>(&op); + + EXPECT_TRUE(typed_rva->ComputeRelativeTarget(op_pointer)); + EXPECT_EQ(rva + typed_rva->relative_target(), expected); +} + +TEST_F(TypedRVATest, TestX86) { + TestRelativeTargetX86(0x0, 0x4); +} + +// ARM opcodes taken from and tested against the output of +// "arm-linux-gnueabi-objdump -d daisy_3701.98.0/bin/ls" + +TEST_F(TypedRVATest, TestARM_OFF8_PREFETCH) { + TestRelativeTargetARM(courgette::ARM_OFF8, 0x0, 0x0, 0x4); +} + +TEST_F(TypedRVATest, TestARM_OFF8_FORWARDS) { + TestRelativeTargetARM(courgette::ARM_OFF8, 0x2bcc, 0xd00e, 0x2bec); + TestRelativeTargetARM(courgette::ARM_OFF8, 0x3752, 0xd910, 0x3776); +} + +TEST_F(TypedRVATest, TestARM_OFF8_BACKWARDS) { + TestRelativeTargetARM(courgette::ARM_OFF8, 0x3774, 0xd1f6, 0x3764); +} + +TEST_F(TypedRVATest, TestARM_OFF11_PREFETCH) { + TestRelativeTargetARM(courgette::ARM_OFF11, 0x0, 0x0, 0x4); +} + +TEST_F(TypedRVATest, TestARM_OFF11_FORWARDS) { + TestRelativeTargetARM(courgette::ARM_OFF11, 0x2bea, 0xe005, 0x2bf8); +} + +TEST_F(TypedRVATest, TestARM_OFF11_BACKWARDS) { + TestRelativeTargetARM(courgette::ARM_OFF11, 0x2f80, 0xe6cd, 0x2d1e); + TestRelativeTargetARM(courgette::ARM_OFF11, 0x3610, 0xe56a, 0x30e8); +} + +TEST_F(TypedRVATest, TestARM_OFF24_PREFETCH) { + TestRelativeTargetARM(courgette::ARM_OFF24, 0x0, 0x0, 0x8); +} + +TEST_F(TypedRVATest, TestARM_OFF24_FORWARDS) { + TestRelativeTargetARM(courgette::ARM_OFF24, 0x2384, 0x4af3613a, 0xffcda874); + TestRelativeTargetARM(courgette::ARM_OFF24, 0x23bc, 0x6af961b9, 0xffe5aaa8); + TestRelativeTargetARM(courgette::ARM_OFF24, 0x23d4, 0x2b006823, 0x1c468); +} + +TEST_F(TypedRVATest, TestARM_OFF24_BACKWARDS) { + // TODO(paulgazz): find a real-world example of an ARM branch op + // that jumps backwards. +} |