From 58b822d441f5c982e879e536fa3c1cbac8fd339a Mon Sep 17 00:00:00 2001 From: huangs Date: Sat, 12 Mar 2016 12:56:11 -0800 Subject: [Courgette] Clean up Disassembler; fix ELF Memory leaks. Cleaning up code surrounding Disassembler: - Extract AddressTranslator interface to be used across subclasses. - Use FileOffset = size_t by context. - Detailed comments & TODOs in DisassemblerElf32ARM. - Fix DisassemblerElf32ARM memory leaks. - Lots of superficial stylistic changes. Except for AddressTranslator routines and unit tests, shying away from control flow and logic changes. BUG=579206 Review URL: https://codereview.chromium.org/1676683002 Cr-Commit-Position: refs/heads/master@{#380881} --- courgette/disassembler.cc | 20 +- courgette/disassembler.h | 34 +-- courgette/disassembler_elf_32.cc | 333 ++++++++++++-------------- courgette/disassembler_elf_32.h | 127 +++++----- courgette/disassembler_elf_32_arm.cc | 244 +++++++++++-------- courgette/disassembler_elf_32_arm.h | 56 +++-- courgette/disassembler_elf_32_x86.cc | 111 ++++----- courgette/disassembler_elf_32_x86.h | 46 ++-- courgette/disassembler_elf_32_x86_unittest.cc | 89 +++---- courgette/disassembler_win32_x64.cc | 222 +++++++++-------- courgette/disassembler_win32_x64.h | 78 ++---- courgette/disassembler_win32_x64_unittest.cc | 11 +- courgette/disassembler_win32_x86.cc | 214 ++++++++--------- courgette/disassembler_win32_x86.h | 75 ++---- courgette/disassembler_win32_x86_unittest.cc | 11 +- courgette/image_utils.h | 38 ++- courgette/rel32_finder_win32_x86.cc | 30 ++- courgette/rel32_finder_win32_x86.h | 20 +- courgette/rel32_finder_win32_x86_unittest.cc | 3 +- 19 files changed, 882 insertions(+), 880 deletions(-) (limited to 'courgette') diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index b9fce8b..9b58ba0 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -4,10 +4,12 @@ #include "courgette/disassembler.h" +#include "base/logging.h" + namespace courgette { Disassembler::Disassembler(const void* start, size_t length) - : failure_reason_("uninitialized") { + : failure_reason_("uninitialized") { start_ = reinterpret_cast(start); length_ = length; end_ = start_ + length_; @@ -15,13 +17,21 @@ Disassembler::Disassembler(const void* start, size_t length) Disassembler::~Disassembler() {}; -const uint8_t* Disassembler::OffsetToPointer(size_t offset) const { - assert(start_ + offset <= end_); - return start_ + offset; +const uint8_t* Disassembler::FileOffsetToPointer(FileOffset file_offset) const { + CHECK_LE(file_offset, static_cast(end_ - start_)); + return start_ + file_offset; +} + +const uint8_t* Disassembler::RVAToPointer(RVA rva) const { + FileOffset file_offset = RVAToFileOffset(rva); + if (file_offset == kNoFileOffset) + return nullptr; + + return FileOffsetToPointer(file_offset); } bool Disassembler::Good() { - failure_reason_ = NULL; + failure_reason_ = nullptr; return true; } diff --git a/courgette/disassembler.h b/courgette/disassembler.h index e833cfa..bc715b0 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -16,33 +16,35 @@ namespace courgette { class AssemblyProgram; -class Disassembler { +class Disassembler : public AddressTranslator { public: virtual ~Disassembler(); - virtual ExecutableType kind() { return EXE_UNKNOWN; } + // AddressTranslator interfaces. + virtual RVA FileOffsetToRVA(FileOffset file_offset) const override = 0; + virtual FileOffset RVAToFileOffset(RVA rva) const override = 0; + const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override; + const uint8_t* RVAToPointer(RVA rva) const override; - // ok() may always be called but returns 'true' only after ParseHeader - // succeeds. - bool ok() const { return failure_reason_ == NULL; } + virtual ExecutableType kind() const = 0; - // Returns 'true' if the buffer appears to be a valid executable of the - // expected type. It is not required that this be called before Disassemble. + // Returns true if the buffer appears to be a valid executable of the expected + // type, and false otherwise. This needs not be called before Disassemble(). virtual bool ParseHeader() = 0; // Disassembles the item passed to the factory method into the output // parameter 'program'. virtual bool Disassemble(AssemblyProgram* program) = 0; - // Returns the length of the source executable. May reduce after ParseHeader. + // ok() may always be called but returns true only after ParseHeader() + // succeeds. + bool ok() const { return failure_reason_ == nullptr; } + + // Returns the length of the image. May reduce after ParseHeader(). size_t length() const { return length_; } const uint8_t* start() const { return start_; } const uint8_t* end() const { return end_; } - // Returns a pointer into the memory copy of the file format. - // FileOffsetToPointer(0) returns a pointer to the start of the file format. - const uint8_t* OffsetToPointer(size_t offset) const; - protected: Disassembler(const void* start, size_t length); @@ -55,16 +57,16 @@ class Disassembler { } // Reduce the length of the image in memory. Does not actually free - // (or realloc) any memory. Usually only called via ParseHeader() + // (or realloc) any memory. Usually only called via ParseHeader(). void ReduceLength(size_t reduced_length); private: const char* failure_reason_; // - // Basic information that is always valid after Construction, though - // ParseHeader may shorten the length if the executable is shorter than - // the total data. + // Basic information that is always valid after construction, although + // ParseHeader() may shorten |length_| if the executable is shorter than the + // total data. // size_t length_; // In current memory. const uint8_t* start_; // In current memory, base for 'file offsets'. diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc index 84aa971..9ceb8ab 100644 --- a/courgette/disassembler_elf_32.cc +++ b/courgette/disassembler_elf_32.cc @@ -4,39 +4,70 @@ #include "courgette/disassembler_elf_32.h" -#include -#include - #include -#include -#include #include "base/logging.h" -#include "base/memory/scoped_vector.h" - #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/encoded_program.h" namespace courgette { DisassemblerElf32::DisassemblerElf32(const void* start, size_t length) - : Disassembler(start, length), - header_(NULL), - section_header_table_(NULL), - section_header_table_size_(0), - program_header_table_(NULL), - program_header_table_size_(0), - default_string_section_(NULL) { + : Disassembler(start, length), + header_(nullptr), + section_header_table_(nullptr), + section_header_table_size_(0), + program_header_table_(nullptr), + program_header_table_size_(0), + default_string_section_(nullptr) { +} + +RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const { + // File offsets can be 64-bit values, but we are dealing with 32-bit + // executables and so only need to support 32-bit file sizes. + uint32_t offset32 = static_cast(offset); + + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); + // These can appear to have a size in the file, but don't. + if (section_header->sh_type == SHT_NOBITS) + continue; + + Elf32_Off section_begin = section_header->sh_offset; + Elf32_Off section_end = section_begin + section_header->sh_size; + + if (offset32 >= section_begin && offset32 < section_end) { + return section_header->sh_addr + (offset32 - section_begin); + } + } + + return 0; +} + +FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const { + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); + // These can appear to have a size in the file, but don't. + if (section_header->sh_type == SHT_NOBITS) + continue; + Elf32_Addr begin = section_header->sh_addr; + Elf32_Addr end = begin + section_header->sh_size; + + if (rva >= begin && rva < end) + return section_header->sh_offset + (rva - begin); + } + return kNoFileOffset; } bool DisassemblerElf32::ParseHeader() { if (length() < sizeof(Elf32_Ehdr)) return Bad("Too small"); - header_ = (Elf32_Ehdr *)start(); + header_ = reinterpret_cast(start()); - // Have magic for elf header? + // Have magic for ELF header? if (header_->e_ident[0] != 0x7f || header_->e_ident[1] != 'E' || header_->e_ident[2] != 'L' || @@ -59,23 +90,25 @@ bool DisassemblerElf32::ParseHeader() { if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr))) return Bad("Out of bounds section header table"); - section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff); + section_header_table_ = reinterpret_cast( + FileOffsetToPointer(header_->e_shoff)); section_header_table_size_ = header_->e_shnum; if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr))) return Bad("Out of bounds program header table"); - program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff); + program_header_table_ = reinterpret_cast( + FileOffsetToPointer(header_->e_phoff)); program_header_table_size_ = header_->e_phnum; if (header_->e_shstrndx >= header_->e_shnum) return Bad("Out of bounds string section index"); - default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx); + default_string_section_ = reinterpret_cast( + SectionBody(static_cast(header_->e_shstrndx))); - if (!UpdateLength()) { + if (!UpdateLength()) return Bad("Out of bounds section or segment"); - } return Good(); } @@ -97,7 +130,6 @@ bool DisassemblerElf32::Disassemble(AssemblyProgram* target) { return false; target->DefaultAssignIndexes(); - return true; } @@ -105,8 +137,9 @@ bool DisassemblerElf32::UpdateLength() { Elf32_Off result = 0; // Find the end of the last section - for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { - const Elf32_Shdr *section_header = SectionHeader(section_id); + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_NOBITS) continue; @@ -119,8 +152,9 @@ bool DisassemblerElf32::UpdateLength() { } // Find the end of the last segment - for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { - const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); + for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount(); + ++segment_id) { + const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id); if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1)) return false; @@ -129,25 +163,26 @@ bool DisassemblerElf32::UpdateLength() { result = std::max(result, segment_end); } - Elf32_Off section_table_end = header_->e_shoff + - (header_->e_shnum * sizeof(Elf32_Shdr)); + Elf32_Off section_table_end = + header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr)); result = std::max(result, section_table_end); - Elf32_Off segment_table_end = header_->e_phoff + - (header_->e_phnum * sizeof(Elf32_Phdr)); + Elf32_Off segment_table_end = + header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr)); result = std::max(result, segment_table_end); ReduceLength(result); return true; } -CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const { +CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const { if (rva == kUnassignedRVA) return false; // It's valid if it's contained in any program segment - for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { - const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); + for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount(); + ++segment_id) { + const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id); if (segment_header->p_type != PT_LOAD) continue; @@ -162,114 +197,58 @@ CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const { return false; } -CheckBool DisassemblerElf32::RVAToFileOffset(RVA rva, - size_t* result) const { - for (int i = 0; i < SectionHeaderCount(); i++) { - const Elf32_Shdr *section_header = SectionHeader(i); - // These can appear to have a size in the file, but don't. - if (section_header->sh_type == SHT_NOBITS) - continue; - Elf32_Addr begin = section_header->sh_addr; - Elf32_Addr end = begin + section_header->sh_size; - - if (rva >= begin && rva < end) { - *result = section_header->sh_offset + (rva - begin); - return true; - } - } - return false; -} - -RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const { - // File offsets can be 64 bit values, but we are dealing with 32 - // bit executables and so only need to support 32bit file sizes. - uint32_t offset32 = (uint32_t)offset; - - for (int i = 0; i < SectionHeaderCount(); i++) { - - const Elf32_Shdr *section_header = SectionHeader(i); - - // These can appear to have a size in the file, but don't. - if (section_header->sh_type == SHT_NOBITS) - continue; - - Elf32_Off section_begin = section_header->sh_offset; - Elf32_Off section_end = section_begin + section_header->sh_size; - - if (offset32 >= section_begin && offset32 < section_end) { - return section_header->sh_addr + (offset32 - section_begin); - } - } - - return 0; -} - -CheckBool DisassemblerElf32::RVAsToOffsets(std::vector* rvas, - std::vector* offsets) { - offsets->clear(); - - for (std::vector::iterator rva = rvas->begin(); - rva != rvas->end(); - rva++) { - - size_t offset; - - if (!RVAToFileOffset(*rva, &offset)) +CheckBool DisassemblerElf32::RVAsToFileOffsets( + const std::vector& rvas, + std::vector* file_offsets) { + file_offsets->clear(); + for (RVA rva : rvas) { + FileOffset file_offset = RVAToFileOffset(rva); + if (file_offset == kNoFileOffset) return false; - - offsets->push_back(offset); + file_offsets->push_back(file_offset); } - return true; } -CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector* rvas) { - for (ScopedVector::iterator rva = rvas->begin(); - rva != rvas->end(); - rva++) { - - size_t offset; - - if (!RVAToFileOffset((*rva)->rva(), &offset)) +CheckBool DisassemblerElf32::RVAsToFileOffsets( + ScopedVector* typed_rvas) { + for (TypedRVA* typed_rva : *typed_rvas) { + FileOffset file_offset = RVAToFileOffset(typed_rva->rva()); + if (file_offset == kNoFileOffset) return false; - - (*rva)->set_offset(offset); + typed_rva->set_file_offset(file_offset); } - return true; } CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - uint32_t file_offset = 0; + FileOffset file_offset = 0; - std::vector abs_offsets; + std::vector abs_offsets; - if (!RVAsToOffsets(&abs32_locations_, &abs_offsets)) + if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets)) return false; - if (!RVAsToOffsets(&rel32_locations_)) + if (!RVAsToFileOffsets(&rel32_locations_)) return false; - std::vector::iterator current_abs_offset = abs_offsets.begin(); + std::vector::iterator current_abs_offset = abs_offsets.begin(); ScopedVector::iterator current_rel = rel32_locations_.begin(); - std::vector::iterator end_abs_offset = abs_offsets.end(); + std::vector::iterator end_abs_offset = abs_offsets.end(); ScopedVector::iterator end_rel = rel32_locations_.end(); - for (int section_id = 0; - section_id < SectionHeaderCount(); - section_id++) { - - const Elf32_Shdr *section_header = SectionHeader(section_id); + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_NOBITS) continue; - if (!ParseSimpleRegion(file_offset, - section_header->sh_offset, - program)) + if (!ParseSimpleRegion(file_offset, section_header->sh_offset, program)) return false; + file_offset = section_header->sh_offset; switch (section_header->sh_type) { @@ -280,10 +259,13 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { break; case SHT_PROGBITS: if (!ParseProgbitsSection(section_header, - ¤t_abs_offset, end_abs_offset, - ¤t_rel, end_rel, - program)) + ¤t_abs_offset, + end_abs_offset, + ¤t_rel, + end_rel, + program)) { return false; + } file_offset = section_header->sh_offset + section_header->sh_size; break; case SHT_INIT_ARRAY: @@ -292,28 +274,27 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { while (current_abs_offset != end_abs_offset && *current_abs_offset >= section_header->sh_offset && *current_abs_offset < - (section_header->sh_offset + section_header->sh_size)) { + section_header->sh_offset + section_header->sh_size) { // Skip any abs_offsets appear in the unsupported INIT_ARRAY section - VLOG(1) << "Skipping relocation entry for unsupported section: " << - section_header->sh_type; - current_abs_offset++; + VLOG(1) << "Skipping relocation entry for unsupported section: " + << section_header->sh_type; + ++current_abs_offset; } break; default: if (current_abs_offset != end_abs_offset && - *current_abs_offset >= section_header->sh_offset && - *current_abs_offset < - (section_header->sh_offset + section_header->sh_size)) - VLOG(1) << "Relocation address in unrecognized ELF section: " << \ - section_header->sh_type; - break; + *current_abs_offset >= section_header->sh_offset && + *current_abs_offset < + section_header->sh_offset + section_header->sh_size) { + VLOG(1) << "Relocation address in unrecognized ELF section: " + << section_header->sh_type; + } + break; } } // Rest of the file past the last section - if (!ParseSimpleRegion(file_offset, - length(), - program)) + if (!ParseSimpleRegion(file_offset, length(), program)) return false; // Make certain we consume all of the relocations as expected @@ -321,34 +302,32 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { } CheckBool DisassemblerElf32::ParseProgbitsSection( - const Elf32_Shdr *section_header, - std::vector::iterator* current_abs_offset, - std::vector::iterator end_abs_offset, + const Elf32_Shdr* section_header, + std::vector::iterator* current_abs_offset, + std::vector::iterator end_abs_offset, ScopedVector::iterator* current_rel, ScopedVector::iterator end_rel, AssemblyProgram* program) { - // Walk all the bytes in the file, whether or not in a section. - size_t file_offset = section_header->sh_offset; - size_t section_end = section_header->sh_offset + section_header->sh_size; + FileOffset file_offset = section_header->sh_offset; + FileOffset section_end = section_header->sh_offset + section_header->sh_size; Elf32_Addr origin = section_header->sh_addr; - size_t origin_offset = section_header->sh_offset; + FileOffset origin_offset = section_header->sh_offset; if (!program->EmitOriginInstruction(origin)) return false; while (file_offset < section_end) { - if (*current_abs_offset != end_abs_offset && file_offset > **current_abs_offset) return false; while (*current_rel != end_rel && - file_offset > (**current_rel)->get_offset()) { - (*current_rel)++; + file_offset > (**current_rel)->file_offset()) { + ++(*current_rel); } - size_t next_relocation = section_end; + FileOffset next_relocation = section_end; if (*current_abs_offset != end_abs_offset && next_relocation > **current_abs_offset) @@ -358,8 +337,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( // an Abs value, or the end of the section, so +3 to make sure there is // room for the full 4 byte value. if (*current_rel != end_rel && - next_relocation > ((**current_rel)->get_offset() + 3)) - next_relocation = (**current_rel)->get_offset(); + next_relocation > ((**current_rel)->file_offset() + 3)) + next_relocation = (**current_rel)->file_offset(); if (next_relocation > file_offset) { if (!ParseSimpleRegion(file_offset, next_relocation, program)) @@ -371,28 +350,28 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( if (*current_abs_offset != end_abs_offset && file_offset == **current_abs_offset) { - const uint8_t* p = OffsetToPointer(file_offset); + const uint8_t* p = FileOffsetToPointer(file_offset); RVA target_rva = Read32LittleEndian(p); if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva))) return false; file_offset += sizeof(RVA); - (*current_abs_offset)++; + ++(*current_abs_offset); continue; } if (*current_rel != end_rel && - file_offset == (**current_rel)->get_offset()) { + file_offset == (**current_rel)->file_offset()) { uint32_t relative_target = (**current_rel)->relative_target(); // This cast is for 64 bit systems, and is only safe because we // are working on 32 bit executables. RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + relative_target); - if (! (**current_rel)->EmitInstruction(program, target_rva)) + if (!(**current_rel)->EmitInstruction(program, target_rva)) return false; file_offset += (**current_rel)->op_size(); - (*current_rel)++; + ++(*current_rel); continue; } } @@ -401,17 +380,19 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( return ParseSimpleRegion(file_offset, section_end, program); } -CheckBool DisassemblerElf32::ParseSimpleRegion( - size_t start_file_offset, - size_t end_file_offset, - AssemblyProgram* program) { +CheckBool DisassemblerElf32::ParseSimpleRegion(FileOffset start_file_offset, + FileOffset end_file_offset, + AssemblyProgram* program) { // Callers don't guarantee start < end - if (start_file_offset >= end_file_offset) return true; + if (start_file_offset >= end_file_offset) + return true; const size_t len = end_file_offset - start_file_offset; - if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len)) + if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), + len)) { return false; + } return true; } @@ -420,12 +401,13 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { abs32_locations_.clear(); // Loop through sections for relocation sections - for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { - const Elf32_Shdr *section_header = SectionHeader(section_id); + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_REL) { - - Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id); + const Elf32_Rel* relocs_table = + reinterpret_cast(SectionBody(section_id)); int relocs_table_count = section_header->sh_size / section_header->sh_entsize; @@ -433,7 +415,7 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { // Elf32_Word relocation_section_id = section_header->sh_info; // Loop through relocation objects in the relocation section - for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) { + for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) { RVA rva; // Quite a few of these conversions fail, and we simply skip @@ -451,23 +433,18 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { } CheckBool DisassemblerElf32::CheckSection(RVA rva) { - size_t offset; - - if (!RVAToFileOffset(rva, &offset)) { + FileOffset file_offset = RVAToFileOffset(rva); + if (file_offset == kNoFileOffset) return false; - } - - for (int section_id = 0; - section_id < SectionHeaderCount(); - section_id++) { - const Elf32_Shdr *section_header = SectionHeader(section_id); + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); - if (offset >= section_header->sh_offset && - offset < (section_header->sh_offset + section_header->sh_size)) { + if (file_offset >= section_header->sh_offset && + file_offset < (section_header->sh_offset + section_header->sh_size)) { switch (section_header->sh_type) { - case SHT_REL: - // Fall-through + case SHT_REL: // Falls through. case SHT_PROGBITS: return true; } @@ -478,16 +455,14 @@ CheckBool DisassemblerElf32::CheckSection(RVA rva) { } CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() { - rel32_locations_.clear(); // Loop through sections for relocation sections - for (int section_id = 0; - section_id < SectionHeaderCount(); - section_id++) { - - const Elf32_Shdr *section_header = SectionHeader(section_id); + for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); + ++section_id) { + const Elf32_Shdr* section_header = SectionHeader(section_id); + // TODO(huangs): Add better checks to skip non-code sections. // Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0. if (section_header->sh_type != SHT_PROGBITS || section_header->sh_addr == 0) diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h index 8483ce3..e9d00ca 100644 --- a/courgette/disassembler_elf_32.h +++ b/courgette/disassembler_elf_32.h @@ -8,10 +8,12 @@ #include #include +#include + #include "base/macros.h" #include "base/memory/scoped_vector.h" -#include "courgette/assembly_program.h" #include "courgette/disassembler.h" +#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_elf.h" @@ -19,43 +21,33 @@ namespace courgette { class AssemblyProgram; -// A courgette disassembler for 32-bit ELF files. This class is only a -// partial implementation. Subclasses implement the -// architecture-specific parts of processing 32-bit ELF files. Specifically, -// RelToRVA processes entries in ELF relocation table, -// ParseRelocationSection verifies the organization of the ELF -// relocation table, and ParseRel32RelocsFromSection finds branch -// targets by looking for relative jump/call opcodes in the particular -// architecture's machine code. +// A Courgette disassembler for 32-bit ELF files. This is only a partial +// implementation that admits subclasses for the architecture-specific parts of +// 32-bit ELF file processing. Specifically: +// - RelToRVA() processes entries in ELF relocation table. +// - ParseRelocationSection() verifies the organization of the ELF relocation +// table. +// - ParseRel32RelocsFromSection() finds branch targets by looking for relative +// branch/call opcodes in the particular architecture's machine code. class DisassemblerElf32 : public Disassembler { public: // Different instructions encode the target rva differently. This // class encapsulates this behavior. public for use in unit tests. class TypedRVA { public: - explicit TypedRVA(RVA rva) : rva_(rva), offset_(static_cast(-1)) { - } + explicit TypedRVA(RVA rva) : rva_(rva) { } - virtual ~TypedRVA() { }; + virtual ~TypedRVA() { } - RVA rva() { - return rva_; - } - - RVA relative_target() { - return relative_target_; - } + RVA rva() const { return rva_; } + RVA relative_target() const { return relative_target_; } + FileOffset file_offset() const { return file_offset_; } void set_relative_target(RVA relative_target) { relative_target_ = relative_target; } - - size_t get_offset() { - return offset_; - } - - void set_offset(size_t offset) { - offset_ = offset; + void set_file_offset(FileOffset file_offset) { + file_offset_ = file_offset; } // Computes the relative jump's offset from the op in p. @@ -65,33 +57,33 @@ class DisassemblerElf32 : public Disassembler { virtual CheckBool EmitInstruction(AssemblyProgram* program, RVA target_rva) = 0; + // Returns the size of the instruction containing the RVA. virtual uint16_t op_size() const = 0; - static bool IsLessThan(TypedRVA *a, TypedRVA *b) { + // Comparator for sorting, which assumes uniqueness of RVAs. + static bool IsLessThan(TypedRVA* a, TypedRVA* b) { return a->rva() < b->rva(); } private: const RVA rva_; - RVA relative_target_; - size_t offset_; + RVA relative_target_ = kNoRVA; + FileOffset file_offset_ = kNoFileOffset; }; public: - explicit DisassemblerElf32(const void* start, size_t length); - - virtual ~DisassemblerElf32() { }; + DisassemblerElf32(const void* start, size_t length); - virtual ExecutableType kind() = 0; + ~DisassemblerElf32() override { } - virtual e_machine_values ElfEM() = 0; + // Disassembler interfaces. + RVA FileOffsetToRVA(FileOffset file_offset) const override; + FileOffset RVAToFileOffset(RVA rva) const override; + virtual ExecutableType kind() const override = 0; + bool ParseHeader() override; + bool Disassemble(AssemblyProgram* target) override; - // Returns 'true' if the buffer appears to point to a valid ELF executable - // for 32 bit. If ParseHeader() succeeds, other member - // functions may be called. - virtual bool ParseHeader(); - - virtual bool Disassemble(AssemblyProgram* target); + virtual e_machine_values ElfEM() const = 0; // Public for unittests only std::vector &Abs32Locations() { return abs32_locations_; } @@ -107,13 +99,13 @@ class DisassemblerElf32 : public Disassembler { return section_header_table_size_; } - const Elf32_Shdr *SectionHeader(int id) const { + const Elf32_Shdr* SectionHeader(Elf32_Half id) const { assert(id >= 0 && id < SectionHeaderCount()); return section_header_table_ + id; } - const uint8_t* SectionBody(int id) const { - return OffsetToPointer(SectionHeader(id)->sh_offset); + const uint8_t* SectionBody(Elf32_Half id) const { + return FileOffsetToPointer(SectionHeader(id)->sh_offset); } // Misc Segment Helpers @@ -122,61 +114,62 @@ class DisassemblerElf32 : public Disassembler { return program_header_table_size_; } - const Elf32_Phdr *ProgramSegmentHeader(int id) const { + const Elf32_Phdr* ProgramSegmentHeader(Elf32_Half id) const { assert(id >= 0 && id < ProgramSegmentHeaderCount()); return program_header_table_ + id; } // Misc address space helpers - CheckBool IsValidRVA(RVA rva) const WARN_UNUSED_RESULT; + CheckBool IsValidTargetRVA(RVA rva) const WARN_UNUSED_RESULT; - // Convert an ELF relocation struction into an RVA + // Converts an ELF relocation instruction into an RVA. virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) const WARN_UNUSED_RESULT = 0; - // Returns kNoOffset if there is no file offset corresponding to 'rva'. - CheckBool RVAToFileOffset(RVA rva, size_t* result) const WARN_UNUSED_RESULT; + CheckBool RVAsToFileOffsets(const std::vector& rvas, + std::vector* file_offsets); - RVA FileOffsetToRVA(size_t offset) const WARN_UNUSED_RESULT; + CheckBool RVAsToFileOffsets(ScopedVector* typed_rvas); - CheckBool RVAsToOffsets(std::vector* rvas /*in*/, - std::vector* offsets /*out*/); + // Parsing code for Disassemble(). - CheckBool RVAsToOffsets(ScopedVector* rvas /*in and out*/); + virtual CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, + AssemblyProgram* program) + WARN_UNUSED_RESULT = 0; - // Parsing Code used to really implement Disassemble + virtual CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) + WARN_UNUSED_RESULT = 0; CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; - virtual CheckBool ParseRelocationSection( - const Elf32_Shdr *section_header, - AssemblyProgram* program) WARN_UNUSED_RESULT = 0; + CheckBool ParseProgbitsSection( - const Elf32_Shdr *section_header, - std::vector::iterator* current_abs_offset, - std::vector::iterator end_abs_offset, + const Elf32_Shdr* section_header, + std::vector::iterator* current_abs_offset, + std::vector::iterator end_abs_offset, ScopedVector::iterator* current_rel, ScopedVector::iterator end_rel, AssemblyProgram* program) WARN_UNUSED_RESULT; - CheckBool ParseSimpleRegion(size_t start_file_offset, - size_t end_file_offset, + + CheckBool ParseSimpleRegion(FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseAbs32Relocs() WARN_UNUSED_RESULT; + CheckBool CheckSection(RVA rva) WARN_UNUSED_RESULT; + CheckBool ParseRel32RelocsFromSections() WARN_UNUSED_RESULT; - virtual CheckBool ParseRel32RelocsFromSection( - const Elf32_Shdr* section) WARN_UNUSED_RESULT = 0; - Elf32_Ehdr *header_; - Elf32_Shdr *section_header_table_; + const Elf32_Ehdr* header_; + const Elf32_Shdr* section_header_table_; Elf32_Half section_header_table_size_; - Elf32_Phdr *program_header_table_; + const Elf32_Phdr* program_header_table_; Elf32_Half program_header_table_size_; // Section header for default - const char *default_string_section_; + const char* default_string_section_; std::vector abs32_locations_; ScopedVector rel32_locations_; diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc index f6490d9..39172f4 100644 --- a/courgette/disassembler_elf_32_arm.cc +++ b/courgette/disassembler_elf_32_arm.cc @@ -4,18 +4,12 @@ #include "courgette/disassembler_elf_32_arm.h" -#include -#include - -#include -#include #include #include "base/logging.h" - +#include "base/memory/scoped_ptr.h" #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/encoded_program.h" namespace courgette { @@ -24,31 +18,34 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, RVA rva, uint16_t* c_op, uint32_t* addr) { - // This method takes an ARM or thumb opcode, extracts the relative - // target address from it (addr), and creates a corresponding - // Courgette opcode (c_op). - // - // Details on ARM the opcodes, and how the relative targets are - // computed were taken from the "ARM Architecture Reference Manual", - // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12. - // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes. + // Notation for bit ranges in comments: + // - Listing bits from highest to lowest. + // - A-Z or (j1), (j2), etc.: single bit in source. + // - a-z: multiple, consecutive bits in source. switch (type) { case ARM_OFF8: { - // The offset is given by lower 8 bits of the op. It is a 9-bit - // offset, shifted right one bit and signed extended. + // Encoding T1. + // The offset is given by lower 8 bits of the op. It is a 9-bit offset, + // shifted right 1 bit, and signed extended. + // arm_op = aaaaaaaa Snnnnnnn + // *addr := SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100 + // *c_op := 00010000 aaaaaaaa uint32_t temp = (arm_op & 0x00FF) << 1; if (temp & 0x0100) temp |= 0xFFFFFE00; temp += 4; // Offset from _next_ PC. - fflush(stdout); (*addr) = temp; (*c_op) = static_cast(arm_op >> 8) | 0x1000; break; } case ARM_OFF11: { - // The offset is given by lower 11 bits of the op, and is a - // 12-bit offset, shifted right one bit and sign extended. + // Encoding T2. + // The offset is given by lower 11 bits of the op, and is a 12-bit offset, + // shifted right 1 bit, and sign extended. + // arm_op = aaaaaSnn nnnnnnnn + // *addr := SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100 + // *c_op := 00100000 000aaaaa uint32_t temp = (arm_op & 0x07FF) << 1; if (temp & 0x00000800) temp |= 0xFFFFF000; @@ -61,6 +58,9 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, case ARM_OFF24: { // The offset is given by the lower 24-bits of the op, shifted // left 2 bits, and sign extended. + // arm_op = aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn + // *addr := SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000 + // *c_op := 00110000 aaaaaaaa uint32_t temp = (arm_op & 0x00FFFFFF) << 2; if (temp & 0x02000000) temp |= 0xFC000000; @@ -71,6 +71,18 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, break; } case ARM_OFF25: { + // Encoding T4. + // arm_op = aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn + // where CD is in {01, 10, 11} + // i1 := ~(j1 ^ S) + // i2 := ~(j2 ^ S) + // If CD == 10: + // pppp := (rva % 4 == 0) ? 0100 : 0010 + // Else: + // pppp := 0100 + // *addr := SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp + // *c_op := 0100pppp aaaaaBCD + // TODO(huangs): aaaaa = 11110 and B = 1 always? Investigate and fix. uint32_t temp = 0; temp |= (arm_op & 0x000007FF) << 1; // imm11 temp |= (arm_op & 0x03FF0000) >> 4; // imm10 @@ -78,8 +90,8 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, uint32_t S = (arm_op & (1 << 26)) >> 26; uint32_t j2 = (arm_op & (1 << 11)) >> 11; uint32_t j1 = (arm_op & (1 << 13)) >> 13; - bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; - bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; + bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; // D + bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; // C uint32_t i2 = ~(j2 ^ S) & 1; uint32_t i1 = ~(j1 ^ S) & 1; @@ -91,7 +103,7 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, temp |= 0xFE000000; uint32_t prefetch; if (toARM) { - // Align PC on 4-byte boundary + // Align PC on 4-byte boundary. uint32_t align4byte = (rva % 4) ? 2 : 4; prefetch = align4byte; } else { @@ -101,20 +113,25 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, (*addr) = temp; uint32_t temp2 = 0x4000; - temp2 |= (arm_op & (1 << 12)) >> 12; - temp2 |= (arm_op & (1 << 14)) >> 13; - temp2 |= (arm_op & (1 << 15)) >> 13; - temp2 |= (arm_op & 0xF8000000) >> 24; + temp2 |= (arm_op & (1 << 12)) >> 12; // .......D + temp2 |= (arm_op & (1 << 14)) >> 13; // ......C. + temp2 |= (arm_op & (1 << 15)) >> 13; // .....B.. + temp2 |= (arm_op & 0xF8000000) >> 24; // aaaaa... temp2 |= (prefetch & 0x0000000F) << 8; (*c_op) = static_cast(temp2); break; } case ARM_OFF21: { + // Encoding T3. + // arm_op = 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn + // *addr := SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100 + // *c_op := 01010000 0000cccc uint32_t temp = 0; temp |= (arm_op & 0x000007FF) << 1; // imm11 temp |= (arm_op & 0x003F0000) >> 4; // imm6 uint32_t S = (arm_op & (1 << 26)) >> 26; + // TODO(huangs): Check with docs: Perhaps j1, j2 should swap? uint32_t j2 = (arm_op & (1 << 11)) >> 11; uint32_t j1 = (arm_op & (1 << 13)) >> 13; @@ -140,20 +157,31 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, uint16_t c_op, uint32_t addr, uint32_t* arm_op) { - // Reverses the process in the compress() method. Takes the - // Courgette op and relative address and reconstructs the original - // ARM or thumb op. switch (type) { case ARM_OFF8: + // addr = SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100 + // c_op = 00010000 aaaaaaaa + // *arm_op := aaaaaaaa Snnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF); break; case ARM_OFF11: + // addr = SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100 + // c_op = 00100000 000aaaaa + // *arm_op := aaaaaSnn nnnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF); break; case ARM_OFF24: + // addr = SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000 + // c_op = 00110000 aaaaaaaa + // *arm_op := aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF); break; case ARM_OFF25: { + // addr = SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp + // c_op = 0100pppp aaaaaBCD + // j1 := ~i1 ^ S + // j2 := ~i2 ^ S + // *arm_op := aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn uint32_t temp = 0; temp |= (c_op & (1 << 0)) << 12; temp |= (c_op & (1 << 1)) << 13; @@ -183,6 +211,9 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, break; } case ARM_OFF21: { + // addr = SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100 + // c_op = 01010000 0000cccc + // *arm_op := 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn uint32_t temp = 0xF0008000; temp |= (c_op & (0x03C00000 >> 22)) << 22; @@ -230,24 +261,28 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget( const uint8_t* op_pointer) { arm_op_ = op_pointer; switch (type_) { - case ARM_OFF8: - // Fall through + case ARM_OFF8: // Falls through. case ARM_OFF11: { RVA relative_target; - CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(), - &c_op_, &relative_target); + CheckBool ret = Compress(type_, + Read16LittleEndian(op_pointer), + rva(), + &c_op_, + &relative_target); set_relative_target(relative_target); return ret; } case ARM_OFF24: { RVA relative_target; - CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(), - &c_op_, &relative_target); + CheckBool ret = Compress(type_, + Read32LittleEndian(op_pointer), + rva(), + &c_op_, + &relative_target); set_relative_target(relative_target); return ret; } - case ARM_OFF25: - // Fall through + case ARM_OFF25: // Falls through. case ARM_OFF21: { // A thumb-2 op is 32 bits stored as two 16-bit words uint32_t pval = (Read16LittleEndian(op_pointer) << 16) | @@ -272,26 +307,24 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction( } DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length) - : DisassemblerElf32(start, length) { + : DisassemblerElf32(start, length) { } -// Convert an ELF relocation struction into an RVA +// Convert an ELF relocation struction into an RVA. CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const { - - // The rightmost byte of r_info is the type... + // The rightmost byte of r_info is the type. elf32_rel_arm_type_values type = - (elf32_rel_arm_type_values)(unsigned char)rel.r_info; + static_cast(rel.r_info & 0xFF); - // The other 3 bytes of r_info are the symbol + // The other 3 bytes of r_info are the symbol. uint32_t symbol = rel.r_info >> 8; - switch(type) - { + switch (type) { case R_ARM_RELATIVE: if (symbol != 0) return false; - // This is a basic ABS32 relocation address + // This is a basic ABS32 relocation address. *result = rel.r_offset; return true; @@ -301,32 +334,33 @@ CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const { } CheckBool DisassemblerElf32ARM::ParseRelocationSection( - const Elf32_Shdr *section_header, - AssemblyProgram* program) { - // This method compresses a contiguous stretch of R_ARM_RELATIVE - // entries in the relocation table with a Courgette relocation table - // instruction. It skips any entries at the beginning that appear - // in a section that Courgette doesn't support, e.g. INIT. + const Elf32_Shdr* section_header, + AssemblyProgram* program) { + // This method compresses a contiguous stretch of R_ARM_RELATIVE entries in + // the relocation table with a Courgette relocation table instruction. + // It skips any entries at the beginning that appear in a section that + // Courgette doesn't support, e.g. INIT. + // // Specifically, the entries should be // (1) In the same relocation table // (2) Are consecutive // (3) Are sorted in memory address order // - // Happily, this is normally the case, but it's not required by spec - // so we check, and just don't do it if we don't match up. + // Happily, this is normally the case, but it's not required by spec so we + // check, and just don't do it if we don't match up. // - // The expectation is that one relocation section will contain - // all of our R_ARM_RELATIVE entries in the expected order followed - // by assorted other entries we can't use special handling for. + // The expectation is that one relocation section will contain all of our + // R_ARM_RELATIVE entries in the expected order followed by assorted other + // entries we can't use special handling for. bool match = true; - // Walk all the bytes in the section, matching relocation table or not - size_t file_offset = section_header->sh_offset; - size_t section_end = section_header->sh_offset + section_header->sh_size; + // Walk all the bytes in the section, matching relocation table or not. + FileOffset file_offset = section_header->sh_offset; + FileOffset section_end = section_header->sh_offset + section_header->sh_size; - Elf32_Rel *section_relocs_iter = - (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); + const Elf32_Rel* section_relocs_iter = reinterpret_cast( + FileOffsetToPointer(section_header->sh_offset)); uint32_t section_relocs_count = section_header->sh_size / section_header->sh_entsize; @@ -337,13 +371,15 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( if (!abs32_locations_.empty()) { std::vector::iterator reloc_iter = abs32_locations_.begin(); - for (uint32_t i = 0; i < section_relocs_count; i++) { + for (uint32_t i = 0; i < section_relocs_count; ++i) { if (section_relocs_iter->r_offset == *reloc_iter) break; - if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel), - program)) + if (!ParseSimpleRegion(file_offset, + file_offset + sizeof(Elf32_Rel), + program)) { return false; + } file_offset += sizeof(Elf32_Rel); ++section_relocs_iter; @@ -351,11 +387,12 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( while (match && (reloc_iter != abs32_locations_.end())) { if (section_relocs_iter->r_info != R_ARM_RELATIVE || - section_relocs_iter->r_offset != *reloc_iter) + section_relocs_iter->r_offset != *reloc_iter) { match = false; + } - section_relocs_iter++; - reloc_iter++; + ++section_relocs_iter; + ++reloc_iter; file_offset += sizeof(Elf32_Rel); } @@ -369,118 +406,119 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( return ParseSimpleRegion(file_offset, section_end, program); } +// TODO(huangs): Detect and avoid overlap with abs32 addresses. CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection( const Elf32_Shdr* section_header) { - uint32_t start_file_offset = section_header->sh_offset; - uint32_t end_file_offset = start_file_offset + section_header->sh_size; + FileOffset start_file_offset = section_header->sh_offset; + FileOffset end_file_offset = start_file_offset + section_header->sh_size; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. + // subtract |pointer_to_rva|. const uint8_t* const adjust_pointer_to_rva = start_pointer - section_header->sh_addr; // Find the rel32 relocations. const uint8_t* p = start_pointer; - bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it + bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it while (p < end_pointer) { // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - - TypedRVAARM* rel32_rva = NULL; + scoped_ptr rel32_rva; RVA target_rva = 0; bool found = false; // 16-bit thumb ops - if (!found && (p + 3) <= end_pointer) { + if (!found && p + 3 <= end_pointer) { uint16_t pval = Read16LittleEndian(p); if ((pval & 0xF000) == 0xD000) { RVA rva = static_cast(p - adjust_pointer_to_rva); - rel32_rva = new TypedRVAARM(ARM_OFF8, rva); - if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { + rel32_rva.reset(new TypedRVAARM(ARM_OFF8, rva)); + if (!rel32_rva->ComputeRelativeTarget(p)) return false; - } + target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } else if ((pval & 0xF800) == 0xE000) { RVA rva = static_cast(p - adjust_pointer_to_rva); - rel32_rva = new TypedRVAARM(ARM_OFF11, rva); - if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { + rel32_rva.reset(new TypedRVAARM(ARM_OFF11, rva)); + if (!rel32_rva->ComputeRelativeTarget(p)) return false; - } + target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } - // thumb-2 ops comprised of two 16-bit words - if (!found && (p + 5) <= end_pointer) { + // thumb-2 ops comprised of two 16-bit words. + if (!found && p + 5 <= end_pointer) { // This is really two 16-bit words, not one 32-bit word. uint32_t pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2); if ((pval & 0xF8008000) == 0xF0008000) { // Covers thumb-2's 32-bit conditional/unconditional branches - - if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) { + if ((pval & (1 << 14)) || (pval & (1 << 12))) { // A branch, with link, or with link and exchange. RVA rva = static_cast(p - adjust_pointer_to_rva); - rel32_rva = new TypedRVAARM(ARM_OFF25, rva); - if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { + rel32_rva.reset(new TypedRVAARM(ARM_OFF25, rva)); + if (!rel32_rva->ComputeRelativeTarget(p)) return false; - } + target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; + } else { // TODO(paulgazz) make sure cond is not 111 // A conditional branch instruction RVA rva = static_cast(p - adjust_pointer_to_rva); - rel32_rva = new TypedRVAARM(ARM_OFF21, rva); - if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { + rel32_rva.reset(new TypedRVAARM(ARM_OFF21, rva)); + if (!rel32_rva->ComputeRelativeTarget(p)) return false; - } + target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } } - // 32-bit ARM ops + // 32-bit ARM ops. if (!found && on_32bit && (p + 5) <= end_pointer) { uint32_t pval = Read32LittleEndian(p); if ((pval & 0x0E000000) == 0x0A000000) { // Covers both 0x0A 0x0B ARM relative branches RVA rva = static_cast(p - adjust_pointer_to_rva); - rel32_rva = new TypedRVAARM(ARM_OFF24, rva); - if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { + rel32_rva.reset(new TypedRVAARM(ARM_OFF24, rva)); + if (!rel32_rva->ComputeRelativeTarget(p)) return false; - } + target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } - if (found && IsValidRVA(target_rva)) { - rel32_locations_.push_back(rel32_rva); + if (found && IsValidTargetRVA(target_rva)) { + uint16_t op_size = rel32_rva->op_size(); + rel32_locations_.push_back(rel32_rva.release()); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; #endif - p += rel32_rva->op_size(); + p += op_size; - // A tricky way to update the on_32bit flag. Here is the truth table: + // A tricky way to update the on_32bit flag. Here is the truth table: // on_32bit | on_32bit size is 4 // ---------+--------------------- // 1 | 0 0 // 0 | 0 1 // 0 | 1 0 // 1 | 1 1 - on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0; + on_32bit = (~(on_32bit ^ (op_size == 4))) != 0; } else { // Move 2 bytes at a time, but track 32-bit boundaries p += 2; diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h index 17ebb25..5dc6897 100644 --- a/courgette/disassembler_elf_32_arm.h +++ b/courgette/disassembler_elf_32_arm.h @@ -8,9 +8,10 @@ #include #include +#include + #include "base/macros.h" #include "courgette/disassembler_elf_32.h" -#include "courgette/memory_allocator.h" #include "courgette/types_elf.h" namespace courgette { @@ -30,51 +31,60 @@ class DisassemblerElf32ARM : public DisassemblerElf32 { class TypedRVAARM : public TypedRVA { public: TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { } + ~TypedRVAARM() override { } - uint16_t c_op() const { return c_op_; } - - virtual CheckBool ComputeRelativeTarget(const uint8_t* op_pointer); + // TypedRVA interfaces. + CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override; + CheckBool EmitInstruction(AssemblyProgram* program, + RVA target_rva) override; + uint16_t op_size() const override; - virtual CheckBool EmitInstruction(AssemblyProgram* program, - RVA target_rva); - - virtual uint16_t op_size() const; + uint16_t c_op() const { return c_op_; } private: ARM_RVA type_; - - uint16_t c_op_; // set by ComputeRelativeTarget() + uint16_t c_op_; // Set by ComputeRelativeTarget(). const uint8_t* arm_op_; }; - explicit DisassemblerElf32ARM(const void* start, size_t length); + DisassemblerElf32ARM(const void* start, size_t length); - virtual ExecutableType kind() { return EXE_ELF_32_ARM; } + ~DisassemblerElf32ARM() override { } - virtual e_machine_values ElfEM() { return EM_ARM; } + // DisassemblerElf32 interfaces. + ExecutableType kind() const override { return EXE_ELF_32_ARM; } + e_machine_values ElfEM() const override { return EM_ARM; } + // Takes an ARM or thumb opcode |arm_op| of specified |type| and located at + // |rva|, extracts the instruction-relative target RVA into |*addr| and + // encodes the corresponding Courgette opcode as |*c_op|. + // + // Details on ARM opcodes, and target RVA extraction are taken from + // "ARM Architecture Reference Manual", section A4.1.5 and + // "Thumb-2 supplement", section 4.6.12. + // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes. static CheckBool Compress(ARM_RVA type, uint32_t arm_op, RVA rva, uint16_t* c_op /* out */, uint32_t* addr /* out */); + // Inverse for Compress(). Takes Courgette op |c_op| and relative address + // |addr| to reconstruct the original ARM or thumb op |*arm_op|. static CheckBool Decompress(ARM_RVA type, uint16_t c_op, uint32_t addr, uint32_t* arm_op /* out */); protected: - - virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) - const WARN_UNUSED_RESULT; - - virtual CheckBool ParseRelocationSection( - const Elf32_Shdr *section_header, - AssemblyProgram* program) WARN_UNUSED_RESULT; - - virtual CheckBool ParseRel32RelocsFromSection( - const Elf32_Shdr* section) WARN_UNUSED_RESULT; + // DisassemblerElf32 interfaces. + CheckBool RelToRVA(Elf32_Rel rel, + RVA* result) const override WARN_UNUSED_RESULT; + CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, + AssemblyProgram* program) + override WARN_UNUSED_RESULT; + CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) + override WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS std::map rel32_target_rvas_; diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc index 98084c1..45f7cf6 100644 --- a/courgette/disassembler_elf_32_x86.cc +++ b/courgette/disassembler_elf_32_x86.cc @@ -4,37 +4,45 @@ #include "courgette/disassembler_elf_32_x86.h" -#include -#include - -#include -#include #include #include "base/logging.h" - +#include "base/memory/scoped_ptr.h" #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/encoded_program.h" namespace courgette { +CheckBool DisassemblerElf32X86::TypedRVAX86::ComputeRelativeTarget( + const uint8_t* op_pointer) { + set_relative_target(Read32LittleEndian(op_pointer) + 4); + return true; +} + +CheckBool DisassemblerElf32X86::TypedRVAX86::EmitInstruction( + AssemblyProgram* program, + RVA target_rva) { + return program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); +} + +uint16_t DisassemblerElf32X86::TypedRVAX86::op_size() const { + return 4; +} + DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length) - : DisassemblerElf32(start, length) { + : DisassemblerElf32(start, length) { } -// Convert an ELF relocation struction into an RVA +// Convert an ELF relocation struction into an RVA. CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { - - // The rightmost byte of r_info is the type... + // The rightmost byte of r_info is the type. elf32_rel_386_type_values type = - (elf32_rel_386_type_values)(unsigned char)rel.r_info; + static_cast(rel.r_info & 0xFF); - // The other 3 bytes of r_info are the symbol + // The other 3 bytes of r_info are the symbol. uint32_t symbol = rel.r_info >> 8; - switch(type) - { + switch (type) { case R_386_NONE: case R_386_32: case R_386_PC32: @@ -49,7 +57,7 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { if (symbol != 0) return false; - // This is a basic ABS32 relocation address + // This is a basic ABS32 relocation address. *result = rel.r_offset; return true; @@ -63,32 +71,31 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { } CheckBool DisassemblerElf32X86::ParseRelocationSection( - const Elf32_Shdr *section_header, - AssemblyProgram* program) { - // We can reproduce the R_386_RELATIVE entries in one of the relocation - // table based on other information in the patch, given these - // conditions.... + const Elf32_Shdr* section_header, + AssemblyProgram* program) { + // We can reproduce the R_386_RELATIVE entries in one of the relocation table + // based on other information in the patch, given these conditions: // // All R_386_RELATIVE entries are: // 1) In the same relocation table // 2) Are consecutive // 3) Are sorted in memory address order // - // Happily, this is normally the case, but it's not required by spec - // so we check, and just don't do it if we don't match up. + // Happily, this is normally the case, but it's not required by spec, so we + // check, and just don't do it if we don't match up. - // The expectation is that one relocation section will contain - // all of our R_386_RELATIVE entries in the expected order followed - // by assorted other entries we can't use special handling for. + // The expectation is that one relocation section will contain all of our + // R_386_RELATIVE entries in the expected order followed by assorted other + // entries we can't use special handling for. bool match = true; - // Walk all the bytes in the section, matching relocation table or not - size_t file_offset = section_header->sh_offset; - size_t section_end = section_header->sh_offset + section_header->sh_size; + // Walk all the bytes in the section, matching relocation table or not. + FileOffset file_offset = section_header->sh_offset; + FileOffset section_end = file_offset + section_header->sh_size; - Elf32_Rel *section_relocs_iter = - (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); + const Elf32_Rel* section_relocs_iter = reinterpret_cast( + FileOffsetToPointer(section_header->sh_offset)); uint32_t section_relocs_count = section_header->sh_size / section_header->sh_entsize; @@ -101,16 +108,17 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection( std::vector::iterator reloc_iter = abs32_locations_.begin(); - while (match && (reloc_iter != abs32_locations_.end())) { + while (match && (reloc_iter != abs32_locations_.end())) { if (section_relocs_iter->r_info != R_386_RELATIVE || - section_relocs_iter->r_offset != *reloc_iter) + section_relocs_iter->r_offset != *reloc_iter) { match = false; - section_relocs_iter++; - reloc_iter++; + } + ++section_relocs_iter; + ++reloc_iter; } if (match) { - // Skip over relocation tables + // Skip over relocation tables. if (!program->EmitElfRelocationInstruction()) return false; file_offset += sizeof(Elf32_Rel) * abs32_locations_.size(); @@ -119,28 +127,27 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection( return ParseSimpleRegion(file_offset, section_end, program); } +// TODO(huangs): Detect and avoid overlap with abs32 addresses. CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( const Elf32_Shdr* section_header) { - uint32_t start_file_offset = section_header->sh_offset; - uint32_t end_file_offset = start_file_offset + section_header->sh_size; + FileOffset start_file_offset = section_header->sh_offset; + FileOffset end_file_offset = start_file_offset + section_header->sh_size; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. + // subtract |pointer_to_rva|. const uint8_t* const adjust_pointer_to_rva = start_pointer - section_header->sh_addr; // Find the rel32 relocations. const uint8_t* p = start_pointer; while (p < end_pointer) { - //RVA current_rva = static_cast(p - adjust_pointer_to_rva); - // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = NULL; + const uint8_t* rel32 = nullptr; if (p + 5 <= end_pointer) { if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 @@ -148,32 +155,26 @@ CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( } } if (p + 6 <= end_pointer) { - if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form + if (*p == 0x0F && (p[1] & 0xF0) == 0x80) { // Jcc long form if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely rel32 = p + 2; } } if (rel32) { RVA rva = static_cast(rel32 - adjust_pointer_to_rva); - TypedRVAX86* rel32_rva = new TypedRVAX86(rva); + scoped_ptr rel32_rva(new TypedRVAX86(rva)); - if (!rel32_rva->ComputeRelativeTarget(rel32)) { - delete rel32_rva; + if (!rel32_rva->ComputeRelativeTarget(rel32)) return false; - } RVA target_rva = rel32_rva->rva() + rel32_rva->relative_target(); - // To be valid, rel32 target must be within image, and within this - // section. - if (IsValidRVA(target_rva)) { - rel32_locations_.push_back(rel32_rva); + if (IsValidTargetRVA(target_rva)) { + rel32_locations_.push_back(rel32_rva.release()); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; #endif p = rel32 + 4; continue; - } else { - delete rel32_rva; } } p += 1; diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h index 5c87d4c..63be755 100644 --- a/courgette/disassembler_elf_32_x86.h +++ b/courgette/disassembler_elf_32_x86.h @@ -8,9 +8,10 @@ #include #include +#include + #include "base/macros.h" #include "courgette/disassembler_elf_32.h" -#include "courgette/memory_allocator.h" #include "courgette/types_elf.h" namespace courgette { @@ -21,38 +22,33 @@ class DisassemblerElf32X86 : public DisassemblerElf32 { public: class TypedRVAX86 : public TypedRVA { public: - explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { - } - - CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override { - set_relative_target(Read32LittleEndian(op_pointer) + 4); - return true; - } + explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { } + ~TypedRVAX86() override { } + // TypedRVA interfaces. + CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override; CheckBool EmitInstruction(AssemblyProgram* program, - RVA target_rva) override { - return program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); - } - - uint16_t op_size() const override { return 4; } + RVA target_rva) override; + uint16_t op_size() const override; }; - explicit DisassemblerElf32X86(const void* start, size_t length); + DisassemblerElf32X86(const void* start, size_t length); - virtual ExecutableType kind() { return EXE_ELF_32_X86; } + ~DisassemblerElf32X86() override { } - virtual e_machine_values ElfEM() { return EM_386; } + // DisassemblerElf32 interfaces. + ExecutableType kind() const override { return EXE_ELF_32_X86; } + e_machine_values ElfEM() const override { return EM_386; } protected: - virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) - const WARN_UNUSED_RESULT; - - virtual CheckBool ParseRelocationSection( - const Elf32_Shdr *section_header, - AssemblyProgram* program) WARN_UNUSED_RESULT; - - virtual CheckBool ParseRel32RelocsFromSection( - const Elf32_Shdr* section) WARN_UNUSED_RESULT; + // DisassemblerElf32 interfaces. + CheckBool RelToRVA(Elf32_Rel rel, + RVA* result) const override WARN_UNUSED_RESULT; + CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, + AssemblyProgram* program) + override WARN_UNUSED_RESULT; + CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) + override WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS std::map rel32_target_rvas_; diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc index 3ce6a63..c15b8df 100644 --- a/courgette/disassembler_elf_32_x86_unittest.cc +++ b/courgette/disassembler_elf_32_x86_unittest.cc @@ -2,16 +2,25 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "courgette/disassembler_elf_32_x86.h" + #include #include +#include +#include + +#include "base/memory/scoped_ptr.h" #include "courgette/assembly_program.h" #include "courgette/base_test_unittest.h" -#include "courgette/disassembler_elf_32_x86.h" +#include "courgette/image_utils.h" + +namespace courgette { + +namespace { class DisassemblerElf32X86Test : public BaseTest { public: - void TestExe(const char* file_name, size_t expected_abs_count, size_t expected_rel_count) const; @@ -20,10 +29,11 @@ class DisassemblerElf32X86Test : public BaseTest { void DisassemblerElf32X86Test::TestExe(const char* file_name, size_t expected_abs_count, size_t expected_rel_count) const { + using TypedRVA = DisassemblerElf32::TypedRVA; std::string file1 = FileContents(file_name); - scoped_ptr disassembler( - new courgette::DisassemblerElf32X86(file1.c_str(), file1.length())); + scoped_ptr disassembler( + new DisassemblerElf32X86(file1.c_str(), file1.length())); bool can_parse_header = disassembler->ParseHeader(); EXPECT_TRUE(can_parse_header); @@ -33,7 +43,7 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name, // real file, since trailing debug info is not included EXPECT_EQ(file1.length(), disassembler->length()); - const uint8_t* offset_p = disassembler->OffsetToPointer(0); + const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); EXPECT_EQ(reinterpret_cast(file1.c_str()), reinterpret_cast(offset_p)); EXPECT_EQ(0x7F, offset_p[0]); @@ -41,46 +51,45 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name, EXPECT_EQ('L', offset_p[2]); EXPECT_EQ('F', offset_p[3]); - courgette::AssemblyProgram* program = - new courgette::AssemblyProgram(courgette::EXE_ELF_32_X86); - - EXPECT_TRUE(disassembler->Disassemble(program)); - - EXPECT_EQ(disassembler->Abs32Locations().size(), expected_abs_count); - EXPECT_EQ(disassembler->Rel32Locations().size(), expected_rel_count); - - // Prove that none of the rel32 RVAs overlap with abs32 RVAs - std::set abs(disassembler->Abs32Locations().begin(), - disassembler->Abs32Locations().end()); - std::set - rel(disassembler->Rel32Locations().begin(), - disassembler->Rel32Locations().end()); - for (std::vector::iterator - rel32 = disassembler->Rel32Locations().begin(); - rel32 != disassembler->Rel32Locations().end(); - rel32++) { - EXPECT_TRUE(abs.find((*rel32)->rva()) == abs.end()); - } + scoped_ptr program(new AssemblyProgram(EXE_ELF_32_X86)); - for (std::vector::iterator abs32 = - disassembler->Abs32Locations().begin(); - abs32 != disassembler->Abs32Locations().end(); - abs32++) { - bool found = false; - for (std::vector::iterator - rel32 = disassembler->Rel32Locations().begin(); - rel32 != disassembler->Rel32Locations().end(); - rel32++) { - if (*abs32 == (*rel32)->rva()) { - found = true; - break; - } + EXPECT_TRUE(disassembler->Disassemble(program.get())); + + const std::vector& abs32_list = disassembler->Abs32Locations(); + + // Flatten the list typed rel32 to a list of rel32 RVAs. + std::vector rel32_list; + rel32_list.reserve(disassembler->Rel32Locations().size()); + for (TypedRVA* typed_rel32 : disassembler->Rel32Locations()) + rel32_list.push_back(typed_rel32->rva()); + + EXPECT_EQ(expected_abs_count, abs32_list.size()); + EXPECT_EQ(expected_rel_count, rel32_list.size()); + + EXPECT_TRUE(std::is_sorted(abs32_list.begin(), abs32_list.end())); + EXPECT_TRUE(std::is_sorted(rel32_list.begin(), rel32_list.end())); + + // Verify that rel32 RVAs do not overlap with abs32 RVAs. + // TODO(huangs): Fix this to account for RVA's 4-byte width. + bool found_match = false; + std::vector::const_iterator abs32_it = abs32_list.begin(); + std::vector::const_iterator rel32_it = rel32_list.begin(); + while (abs32_it != abs32_list.end() && rel32_it != rel32_list.end()) { + if (*abs32_it < *rel32_it) { + ++abs32_it; + } else if (*abs32_it > *rel32_it) { + ++rel32_it; + } else { + found_match = true; } - EXPECT_TRUE(!found); } - delete program; + EXPECT_FALSE(found_match); } +} // namespace + TEST_F(DisassemblerElf32X86Test, All) { TestExe("elf-32-1", 200, 3442); } + +} // namespace courgette diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc index 74b0fe4..819b7f2 100644 --- a/courgette/disassembler_win32_x64.cc +++ b/courgette/disassembler_win32_x64.cc @@ -8,37 +8,73 @@ #include #include -#include -#include +#include #include "base/logging.h" #include "base/numerics/safe_conversions.h" - #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/encoded_program.h" namespace courgette { DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) - : Disassembler(start, length), - incomplete_disassembly_(false), - is_PE32_plus_(false), - optional_header_(NULL), - size_of_optional_header_(0), - offset_of_data_directories_(0), - machine_type_(0), - number_of_sections_(0), - sections_(NULL), - has_text_section_(false), - size_of_code_(0), - size_of_initialized_data_(0), - size_of_uninitialized_data_(0), - base_of_code_(0), - base_of_data_(0), - image_base_(0), - size_of_image_(0), - number_of_data_directories_(0) { + : Disassembler(start, length), + incomplete_disassembly_(false), + is_PE32_plus_(false), + optional_header_(nullptr), + size_of_optional_header_(0), + offset_of_data_directories_(0), + machine_type_(0), + number_of_sections_(0), + sections_(nullptr), + has_text_section_(false), + size_of_code_(0), + size_of_initialized_data_(0), + size_of_uninitialized_data_(0), + base_of_code_(0), + base_of_data_(0), + image_base_(0), + size_of_image_(0), + number_of_data_directories_(0) { +} + +FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { + const Section* section = RVAToSection(rva); + if (section != nullptr) { + FileOffset offset_in_section = rva - section->virtual_address; + // Need this extra check, since an |rva| may be valid for a section, but is + // non-existent in an image (e.g. uninit data). + if (offset_in_section >= section->size_of_raw_data) + return kNoFileOffset; + + return static_cast(section->file_offset_of_raw_data + + offset_in_section); + } + + // Small RVA values point into the file header in the loaded image. + // RVA 0 is the module load address which Windows uses as the module handle. + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the + // DOS header. + if (rva == 0 || rva == 2) + return static_cast(rva); + + NOTREACHED(); + return kNoFileOffset; +} + +RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { + for (int i = 0; i < number_of_sections_; ++i) { + const Section* section = §ions_[i]; + if (file_offset >= section->file_offset_of_raw_data) { + FileOffset offset_in_section = + file_offset - section->file_offset_of_raw_data; + if (offset_in_section < section->size_of_raw_data) + return static_cast(section->virtual_address + offset_in_section); + } + } + + NOTREACHED(); + return kNoRVA; } // ParseHeader attempts to match up the buffer with the Windows data @@ -57,18 +93,19 @@ bool DisassemblerWin32X64::ParseHeader() { return Bad("Not MZ"); // offset from DOS header to PE header is stored in DOS header. - uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); + FileOffset file_offset = static_cast( + ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); - if (offset >= length()) + if (file_offset >= length()) return Bad("Bad offset to PE header"); - const uint8_t* const pe_header = OffsetToPointer(offset); + const uint8_t* const pe_header = FileOffsetToPointer(file_offset); const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; if (pe_header <= start() || pe_header >= end() - kMinPEHeaderSize) - return Bad("Bad offset to PE header"); + return Bad("Bad file offset to PE header"); - if (offset % 8 != 0) + if (file_offset % 8 != 0) return Bad("Misaligned PE header"); // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. @@ -169,7 +206,7 @@ bool DisassemblerWin32X64::ParseHeader() { size_of_optional_header_); size_t detected_length = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; // TODO(sra): consider using the 'characteristics' field of the section @@ -267,7 +304,7 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector *relocs) { RVA rva = page_rva + offset; // TODO(sebmarchand): Skip the relocs that live outside of the image. See // the version of this function in disassembler_win32_x86.cc. - if (type == 10) { // IMAGE_REL_BASED_DIR64 + if (type == 10) { // IMAGE_REL_BASED_DIR64 relocs->push_back(rva); } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE // Ignore, used as padding. @@ -287,48 +324,19 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector *relocs) { } const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { - for (int i = 0; i < number_of_sections_; i++) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; - uint32_t offset = rva - section->virtual_address; - if (offset < section->virtual_size) { - return section; - } - } - return NULL; -} - -int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { - const Section* section = RVAToSection(rva); - if (section) { - uint32_t offset = rva - section->virtual_address; - if (offset < section->size_of_raw_data) { - return section->file_offset_of_raw_data + offset; - } else { - return kNoOffset; // In section but not in file (e.g. uninit data). + if (rva >= section->virtual_address) { + FileOffset offset_in_section = rva - section->virtual_address; + if (offset_in_section < section->virtual_size) + return section; } } - - // Small RVA values point into the file header in the loaded image. - // RVA 0 is the module load address which Windows uses as the module handle. - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the - // DOS header. - if (rva == 0 || rva == 2) - return rva; - - NOTREACHED(); - return kNoOffset; -} - -const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const { - int file_offset = RVAToFileOffset(rva); - if (file_offset == kNoOffset) - return NULL; - else - return OffsetToPointer(file_offset); + return nullptr; } std::string DisassemblerWin32X64::SectionName(const Section* section) { - if (section == NULL) + if (section == nullptr) return ""; char name[9]; memcpy(name, section->name, 8); @@ -338,24 +346,25 @@ std::string DisassemblerWin32X64::SectionName(const Section* section) { CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - uint32_t file_offset = 0; + FileOffset file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == NULL) { - // No more sections. There should not be extra stuff following last + if (section == nullptr) { + // No more sections. There should not be extra stuff following last // section. // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); break; } if (file_offset < section->file_offset_of_raw_data) { - uint32_t section_start_offset = section->file_offset_of_raw_data; - if(!ParseNonSectionFileRegion(file_offset, section_start_offset, - program)) + FileOffset section_start_offset = section->file_offset_of_raw_data; + if (!ParseNonSectionFileRegion(file_offset, section_start_offset, + program)) { return false; + } file_offset = section_start_offset; } - uint32_t end = file_offset + section->size_of_raw_data; + FileOffset end = file_offset + section->size_of_raw_data; if (!ParseFileRegion(section, file_offset, end, program)) return false; file_offset = end; @@ -375,7 +384,7 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() { return false; #if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { + for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); @@ -386,10 +395,10 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() { } void DisassemblerWin32X64::ParseRel32RelocsFromSections() { - uint32_t file_offset = 0; + FileOffset file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == NULL) + if (section == nullptr) break; if (file_offset < section->file_offset_of_raw_data) file_offset = section->file_offset_of_raw_data; @@ -411,11 +420,11 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() { std::map::iterator rel32_iter = rel32_target_rvas_.begin(); while (abs32_iter != abs32_target_rvas_.end() && rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) + if (abs32_iter->first < rel32_iter->first) { ++abs32_iter; - else if (rel32_iter->first < abs32_iter->first) + } else if (rel32_iter->first < abs32_iter->first) { ++rel32_iter; - else { + } else { ++common; ++abs32_iter; ++rel32_iter; @@ -431,18 +440,18 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { if (!isCode) return; - uint32_t start_file_offset = section->file_offset_of_raw_data; - uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; + FileOffset start_file_offset = section->file_offset_of_raw_data; + FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. + // subtract |pointer_to_rva|. const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; std::vector::iterator abs32_pos = abs32_locations_.begin(); @@ -459,13 +468,10 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { } } - //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) - // ++abs32_pos; - // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = NULL; + const uint8_t* rel32 = nullptr; bool is_rip_relative = false; if (p + 5 <= end_pointer) { @@ -516,7 +522,7 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); // To be valid, rel32 target must be within image, and within this // section. - if (IsValidRVA(target_rva) && + if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA. (is_rip_relative || (start_rva <= target_rva && target_rva < end_rva))) { rel32_locations_.push_back(rel32_rva); @@ -532,14 +538,14 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { } CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) { if (incomplete_disassembly_) return true; if (end_file_offset > start_file_offset) { - if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), + if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), end_file_offset - start_file_offset)) { return false; } @@ -549,13 +555,13 @@ CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( } CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) { RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; @@ -664,7 +670,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, size_t count = p->second.size(); std::cout << std::dec << p->first << ": " << count; if (count <= 2) { - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < count; ++i) std::cout << " " << DescribeRVA(p->second[i]); } std::cout << std::endl; @@ -676,7 +682,6 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, } #endif // COURGETTE_HISTOGRAM_TARGETS - // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except // that during development I'm finding I need to call it when compiled in // Release mode. Hence: @@ -695,12 +700,12 @@ std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { } const Section* DisassemblerWin32X64::FindNextSection( - uint32_t fileOffset) const { + FileOffset file_offset) const { const Section* best = 0; - for (int i = 0; i < number_of_sections_; i++) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; if (section->size_of_raw_data > 0) { // i.e. has data in file. - if (fileOffset <= section->file_offset_of_raw_data) { + if (file_offset <= section->file_offset_of_raw_data) { if (best == 0 || section->file_offset_of_raw_data < best->file_offset_of_raw_data) { best = section; @@ -711,26 +716,15 @@ const Section* DisassemblerWin32X64::FindNextSection( return best; } -RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const { - for (int i = 0; i < number_of_sections_; i++) { - const Section* section = §ions_[i]; - uint32_t offset = file_offset - section->file_offset_of_raw_data; - if (offset < section->size_of_raw_data) { - return section->virtual_address + offset; - } - } - return 0; -} - bool DisassemblerWin32X64::ReadDataDirectory( int index, ImageDataDirectory* directory) { if (index < number_of_data_directories_) { - size_t offset = index * 8 + offset_of_data_directories_; - if (offset >= size_of_optional_header_) + FileOffset file_offset = index * 8 + offset_of_data_directories_; + if (file_offset >= size_of_optional_header_) return Bad("number of data directories inconsistent"); - const uint8_t* data_directory = optional_header_ + offset; + const uint8_t* data_directory = optional_header_ + file_offset; if (data_directory < start() || data_directory + 8 >= end()) return Bad("data directory outside image"); diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h index 23aee66..20cfc7e 100644 --- a/courgette/disassembler_win32_x64.h +++ b/courgette/disassembler_win32_x64.h @@ -8,15 +8,16 @@ #include #include +#include +#include +#include + #include "base/macros.h" #include "courgette/disassembler.h" +#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_win_pe.h" -#ifdef COURGETTE_HISTOGRAM_TARGETS -#include -#endif - namespace courgette { class AssemblyProgram; @@ -25,19 +26,14 @@ class DisassemblerWin32X64 : public Disassembler { public: explicit DisassemblerWin32X64(const void* start, size_t length); - virtual ExecutableType kind() { return EXE_WIN_32_X64; } - - // Returns 'true' if the buffer appears to point to a Windows 32 bit - // executable, 'false' otherwise. If ParseHeader() succeeds, other member - // functions may be called. - virtual bool ParseHeader(); + // Disassembler interfaces. + RVA FileOffsetToRVA(FileOffset file_offset) const override; + FileOffset RVAToFileOffset(RVA rva) const override; + ExecutableType kind() const override { return EXE_WIN_32_X64; } + bool ParseHeader() override; + bool Disassemble(AssemblyProgram* target) override; - virtual bool Disassemble(AssemblyProgram* target); - - // // Exposed for test purposes - // - bool has_text_section() const { return has_text_section_; } uint32_t size_of_code() const { return size_of_code_; } bool is_32bit() const { return !is_PE32_plus_; } @@ -47,17 +43,9 @@ class DisassemblerWin32X64 : public Disassembler { // that are listed in the base relocation table. bool ParseRelocs(std::vector *addresses); - // Returns Section containing the relative virtual address, or NULL if none. + // Returns Section containing the relative virtual address, or null if none. const Section* RVAToSection(RVA rva) const; - static const int kNoOffset = -1; - // Returns kNoOffset if there is no file offset corresponding to 'rva'. - int RVAToFileOffset(RVA rva) const; - - // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL - // is returned if there is no file offset corresponding to 'rva'. - const uint8_t* RVAToPointer(RVA rva) const; - static std::string SectionName(const Section* section); protected: @@ -66,62 +54,46 @@ class DisassemblerWin32X64 : public Disassembler { void ParseRel32RelocsFromSections(); void ParseRel32RelocsFromSection(const Section* section); - CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset, - uint32_t end_file_offset, + CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseFileRegion(const Section* section, - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS void HistogramTargets(const char* kind, const std::map& map); #endif - // Most addresses are represented as 32-bit RVAs. The one address we can't - // do this with is the image base address. 'image_base' is valid only for - // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. + // Most addresses are represented as 32-bit RVAs. The one address we can't + // do this with is the image base address. uint64_t image_base() const { return image_base_; } const ImageDataDirectory& base_relocation_table() const { return base_relocation_table_; } - // Subsumes rva != kUnassignedRVA. - bool IsValidRVA(RVA rva) const { return rva < size_of_image_; } - - // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. + // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. std::string DescribeRVA(RVA rva) const; - // Finds the first section at file_offset or above. Does not return sections + // Finds the first section at file_offset or above. Does not return sections // that have no raw bytes in the file. - const Section* FindNextSection(uint32_t file_offset) const; - - // There are 2 'coordinate systems' for reasoning about executables. - // FileOffset - the the offset within a single .EXE or .DLL *file*. - // RVA - relative virtual address (offset within *loaded image*) - // FileOffsetToRVA and RVAToFileOffset convert between these representations. - - RVA FileOffsetToRVA(uint32_t offset) const; + const Section* FindNextSection(FileOffset file_offset) const; private: - bool ReadDataDirectory(int index, ImageDataDirectory* dir); - bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits + bool incomplete_disassembly_; // true if can omit "uninteresting" bits. std::vector abs32_locations_; std::vector rel32_locations_; // - // Fields that are always valid. + // Information that is valid after ParseHeader() succeeds. // - - // - // Information that is valid after successful ParseHeader. - // - bool is_PE32_plus_; // PE32_plus is for 64 bit executables. + bool is_PE32_plus_; // PE32_plus is for 64 bit executables. // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. const uint8_t* optional_header_; @@ -158,9 +130,9 @@ class DisassemblerWin32X64 : public Disassembler { std::map rel32_target_rvas_; #endif - DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X64); }; } // namespace courgette + #endif // COURGETTE_DISASSEMBLER_WIN32_X64_H_ diff --git a/courgette/disassembler_win32_x64_unittest.cc b/courgette/disassembler_win32_x64_unittest.cc index 8f732b3..1121c10 100644 --- a/courgette/disassembler_win32_x64_unittest.cc +++ b/courgette/disassembler_win32_x64_unittest.cc @@ -6,6 +6,9 @@ #include +#include +#include + #include "base/memory/scoped_ptr.h" #include "base/stl_util.h" #include "courgette/base_test_unittest.h" @@ -37,16 +40,16 @@ void DisassemblerWin32X64Test::TestExe() const { disassembler->RVAToSection(0x00401234 - 0x00400000)), std::string(".text")); - EXPECT_EQ(0, disassembler->RVAToFileOffset(0)); - EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096)); - EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000)); + EXPECT_EQ(0U, disassembler->RVAToFileOffset(0)); + EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096)); + EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000)); std::vector relocs; bool can_parse_relocs = disassembler->ParseRelocs(&relocs); EXPECT_TRUE(can_parse_relocs); EXPECT_TRUE(base::STLIsSorted(relocs)); - const uint8_t* offset_p = disassembler->OffsetToPointer(0); + const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); EXPECT_EQ(reinterpret_cast(file1.c_str()), reinterpret_cast(offset_p)); EXPECT_EQ('M', offset_p[0]); diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc index aed26c7..07bdfbc 100644 --- a/courgette/disassembler_win32_x86.cc +++ b/courgette/disassembler_win32_x86.cc @@ -8,37 +8,73 @@ #include #include -#include -#include +#include #include "base/logging.h" - #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/encoded_program.h" #include "courgette/rel32_finder_win32_x86.h" namespace courgette { DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length) - : Disassembler(start, length), - incomplete_disassembly_(false), - is_PE32_plus_(false), - optional_header_(NULL), - size_of_optional_header_(0), - offset_of_data_directories_(0), - machine_type_(0), - number_of_sections_(0), - sections_(NULL), - has_text_section_(false), - size_of_code_(0), - size_of_initialized_data_(0), - size_of_uninitialized_data_(0), - base_of_code_(0), - base_of_data_(0), - image_base_(0), - size_of_image_(0), - number_of_data_directories_(0) { + : Disassembler(start, length), + incomplete_disassembly_(false), + is_PE32_plus_(false), + optional_header_(nullptr), + size_of_optional_header_(0), + offset_of_data_directories_(0), + machine_type_(0), + number_of_sections_(0), + sections_(nullptr), + has_text_section_(false), + size_of_code_(0), + size_of_initialized_data_(0), + size_of_uninitialized_data_(0), + base_of_code_(0), + base_of_data_(0), + image_base_(0), + size_of_image_(0), + number_of_data_directories_(0) { +} + +FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { + const Section* section = RVAToSection(rva); + if (section != nullptr) { + FileOffset offset_in_section = rva - section->virtual_address; + // Need this extra check, since an |rva| may be valid for a section, but is + // non-existent in an image (e.g. uninit data). + if (offset_in_section >= section->size_of_raw_data) + return kNoFileOffset; + + return static_cast(section->file_offset_of_raw_data + + offset_in_section); + } + + // Small RVA values point into the file header in the loaded image. + // RVA 0 is the module load address which Windows uses as the module handle. + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the + // DOS header. + if (rva == 0 || rva == 2) + return static_cast(rva); + + NOTREACHED(); + return kNoFileOffset; +} + +RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const { + for (int i = 0; i < number_of_sections_; ++i) { + const Section* section = §ions_[i]; + if (file_offset >= section->file_offset_of_raw_data) { + FileOffset offset_in_section = + file_offset - section->file_offset_of_raw_data; + if (offset_in_section < section->size_of_raw_data) + return static_cast(section->virtual_address + offset_in_section); + } + } + + NOTREACHED(); + return kNoRVA; } // ParseHeader attempts to match up the buffer with the Windows data @@ -57,18 +93,19 @@ bool DisassemblerWin32X86::ParseHeader() { return Bad("Not MZ"); // offset from DOS header to PE header is stored in DOS header. - uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); + FileOffset file_offset = static_cast( + ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); - if (offset >= length()) + if (file_offset >= length()) return Bad("Bad offset to PE header"); - const uint8_t* const pe_header = OffsetToPointer(offset); + const uint8_t* const pe_header = FileOffsetToPointer(file_offset); const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; if (pe_header <= start() || pe_header >= end() - kMinPEHeaderSize) - return Bad("Bad offset to PE header"); + return Bad("Bad file offset to PE header"); - if (offset % 8 != 0) + if (file_offset % 8 != 0) return Bad("Misaligned PE header"); // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. @@ -169,7 +206,7 @@ bool DisassemblerWin32X86::ParseHeader() { size_of_optional_header_); size_t detected_length = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; // TODO(sra): consider using the 'characteristics' field of the section @@ -293,48 +330,19 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector *relocs) { } const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const { - for (int i = 0; i < number_of_sections_; i++) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; - uint32_t offset = rva - section->virtual_address; - if (offset < section->virtual_size) { - return section; - } - } - return NULL; -} - -int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { - const Section* section = RVAToSection(rva); - if (section) { - uint32_t offset = rva - section->virtual_address; - if (offset < section->size_of_raw_data) { - return section->file_offset_of_raw_data + offset; - } else { - return kNoOffset; // In section but not in file (e.g. uninit data). + if (rva >= section->virtual_address) { + FileOffset offset_in_section = rva - section->virtual_address; + if (offset_in_section < section->virtual_size) + return section; } } - - // Small RVA values point into the file header in the loaded image. - // RVA 0 is the module load address which Windows uses as the module handle. - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the - // DOS header. - if (rva == 0 || rva == 2) - return rva; - - NOTREACHED(); - return kNoOffset; -} - -const uint8_t* DisassemblerWin32X86::RVAToPointer(RVA rva) const { - int file_offset = RVAToFileOffset(rva); - if (file_offset == kNoOffset) - return NULL; - else - return OffsetToPointer(file_offset); + return nullptr; } std::string DisassemblerWin32X86::SectionName(const Section* section) { - if (section == NULL) + if (section == nullptr) return ""; char name[9]; memcpy(name, section->name, 8); @@ -344,24 +352,25 @@ std::string DisassemblerWin32X86::SectionName(const Section* section) { CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - uint32_t file_offset = 0; + FileOffset file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == NULL) { - // No more sections. There should not be extra stuff following last + if (section == nullptr) { + // No more sections. There should not be extra stuff following last // section. // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); break; } if (file_offset < section->file_offset_of_raw_data) { - uint32_t section_start_offset = section->file_offset_of_raw_data; - if(!ParseNonSectionFileRegion(file_offset, section_start_offset, - program)) + FileOffset section_start_offset = section->file_offset_of_raw_data; + if (!ParseNonSectionFileRegion(file_offset, section_start_offset, + program)) { return false; + } file_offset = section_start_offset; } - uint32_t end = file_offset + section->size_of_raw_data; + FileOffset end = file_offset + section->size_of_raw_data; if (!ParseFileRegion(section, file_offset, end, program)) return false; file_offset = end; @@ -381,7 +390,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { return false; #if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { + for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); @@ -392,10 +401,10 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { } void DisassemblerWin32X86::ParseRel32RelocsFromSections() { - uint32_t file_offset = 0; + FileOffset file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == NULL) + if (section == nullptr) break; if (file_offset < section->file_offset_of_raw_data) file_offset = section->file_offset_of_raw_data; @@ -417,11 +426,11 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSections() { std::map::iterator rel32_iter = rel32_target_rvas_.begin(); while (abs32_iter != abs32_target_rvas_.end() && rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) + if (abs32_iter->first < rel32_iter->first) { ++abs32_iter; - else if (rel32_iter->first < abs32_iter->first) + } else if (rel32_iter->first < abs32_iter->first) { ++rel32_iter; - else { + } else { ++common; ++abs32_iter; ++rel32_iter; @@ -437,19 +446,18 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { if (!isCode) return; - uint32_t start_file_offset = section->file_offset_of_raw_data; - uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; + FileOffset start_file_offset = section->file_offset_of_raw_data; + FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; Rel32FinderWin32X86_Basic finder( base_relocation_table().address_, - base_relocation_table().address_ + base_relocation_table().size_, - size_of_image_); + base_relocation_table().address_ + base_relocation_table().size_); finder.Find(start_pointer, end_pointer, start_rva, end_rva, abs32_locations_); finder.SwapRel32Locations(&rel32_locations_); @@ -460,14 +468,14 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { } CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) { if (incomplete_disassembly_) return true; if (end_file_offset > start_file_offset) { - if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), + if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), end_file_offset - start_file_offset)) { return false; } @@ -477,13 +485,13 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( } CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section, - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) { RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; @@ -592,7 +600,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, size_t count = p->second.size(); std::cout << std::dec << p->first << ": " << count; if (count <= 2) { - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < count; ++i) std::cout << " " << DescribeRVA(p->second[i]); } std::cout << std::endl; @@ -604,7 +612,6 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, } #endif // COURGETTE_HISTOGRAM_TARGETS - // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except // that during development I'm finding I need to call it when compiled in // Release mode. Hence: @@ -623,12 +630,12 @@ std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const { } const Section* DisassemblerWin32X86::FindNextSection( - uint32_t fileOffset) const { + FileOffset file_offset) const { const Section* best = 0; - for (int i = 0; i < number_of_sections_; i++) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; if (section->size_of_raw_data > 0) { // i.e. has data in file. - if (fileOffset <= section->file_offset_of_raw_data) { + if (file_offset <= section->file_offset_of_raw_data) { if (best == 0 || section->file_offset_of_raw_data < best->file_offset_of_raw_data) { best = section; @@ -639,26 +646,15 @@ const Section* DisassemblerWin32X86::FindNextSection( return best; } -RVA DisassemblerWin32X86::FileOffsetToRVA(uint32_t file_offset) const { - for (int i = 0; i < number_of_sections_; i++) { - const Section* section = §ions_[i]; - uint32_t offset = file_offset - section->file_offset_of_raw_data; - if (offset < section->size_of_raw_data) { - return section->virtual_address + offset; - } - } - return 0; -} - bool DisassemblerWin32X86::ReadDataDirectory( int index, ImageDataDirectory* directory) { if (index < number_of_data_directories_) { - size_t offset = index * 8 + offset_of_data_directories_; - if (offset >= size_of_optional_header_) + FileOffset file_offset = index * 8 + offset_of_data_directories_; + if (file_offset >= size_of_optional_header_) return Bad("number of data directories inconsistent"); - const uint8_t* data_directory = optional_header_ + offset; + const uint8_t* data_directory = optional_header_ + file_offset; if (data_directory < start() || data_directory + 8 >= end()) return Bad("data directory outside image"); diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h index 891636c..c22872b 100644 --- a/courgette/disassembler_win32_x86.h +++ b/courgette/disassembler_win32_x86.h @@ -8,15 +8,16 @@ #include #include +#include +#include +#include + #include "base/macros.h" #include "courgette/disassembler.h" +#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_win_pe.h" -#ifdef COURGETTE_HISTOGRAM_TARGETS -#include -#endif - namespace courgette { class AssemblyProgram; @@ -25,19 +26,14 @@ class DisassemblerWin32X86 : public Disassembler { public: explicit DisassemblerWin32X86(const void* start, size_t length); - virtual ExecutableType kind() { return EXE_WIN_32_X86; } + // Disassembler interfaces. + RVA FileOffsetToRVA(FileOffset file_offset) const override; + FileOffset RVAToFileOffset(RVA rva) const override; + ExecutableType kind() const override { return EXE_WIN_32_X86; } + bool ParseHeader() override; + bool Disassemble(AssemblyProgram* target) override; - // Returns 'true' if the buffer appears to point to a Windows 32 bit - // executable, 'false' otherwise. If ParseHeader() succeeds, other member - // functions may be called. - virtual bool ParseHeader(); - - virtual bool Disassemble(AssemblyProgram* target); - - // // Exposed for test purposes - // - bool has_text_section() const { return has_text_section_; } uint32_t size_of_code() const { return size_of_code_; } bool is_32bit() const { return !is_PE32_plus_; } @@ -47,17 +43,9 @@ class DisassemblerWin32X86 : public Disassembler { // that are listed in the base relocation table. bool ParseRelocs(std::vector *addresses); - // Returns Section containing the relative virtual address, or NULL if none. + // Returns Section containing the relative virtual address, or null if none. const Section* RVAToSection(RVA rva) const; - static const int kNoOffset = -1; - // Returns kNoOffset if there is no file offset corresponding to 'rva'. - int RVAToFileOffset(RVA rva) const; - - // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL - // is returned if there is no file offset corresponding to 'rva'. - const uint8_t* RVAToPointer(RVA rva) const; - static std::string SectionName(const Section* section); protected: @@ -66,59 +54,46 @@ class DisassemblerWin32X86 : public Disassembler { void ParseRel32RelocsFromSections(); void ParseRel32RelocsFromSection(const Section* section); - CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset, - uint32_t end_file_offset, + CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseFileRegion(const Section* section, - uint32_t start_file_offset, - uint32_t end_file_offset, + FileOffset start_file_offset, + FileOffset end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS void HistogramTargets(const char* kind, const std::map& map); #endif - // Most addresses are represented as 32-bit RVAs. The one address we can't - // do this with is the image base address. 'image_base' is valid only for - // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. + // Most addresses are represented as 32-bit RVAs. The one address we can't + // do this with is the image base address. uint32_t image_base() const { return static_cast(image_base_); } const ImageDataDirectory& base_relocation_table() const { return base_relocation_table_; } - // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. + // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. std::string DescribeRVA(RVA rva) const; - // Finds the first section at file_offset or above. Does not return sections + // Finds the first section at file_offset or above. Does not return sections // that have no raw bytes in the file. - const Section* FindNextSection(uint32_t file_offset) const; - - // There are 2 'coordinate systems' for reasoning about executables. - // FileOffset - the the offset within a single .EXE or .DLL *file*. - // RVA - relative virtual address (offset within *loaded image*) - // FileOffsetToRVA and RVAToFileOffset convert between these representations. - - RVA FileOffsetToRVA(uint32_t offset) const; + const Section* FindNextSection(FileOffset file_offset) const; private: - bool ReadDataDirectory(int index, ImageDataDirectory* dir); - bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits + bool incomplete_disassembly_; // true if can omit "uninteresting" bits. std::vector abs32_locations_; std::vector rel32_locations_; // - // Fields that are always valid. + // Information that is valid after ParseHeader() succeeds. // - - // - // Information that is valid after successful ParseHeader. - // - bool is_PE32_plus_; // PE32_plus is for 64 bit executables. + bool is_PE32_plus_; // PE32_plus is for 64 bit executables. // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. const uint8_t* optional_header_; @@ -155,9 +130,9 @@ class DisassemblerWin32X86 : public Disassembler { std::map rel32_target_rvas_; #endif - DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86); }; } // namespace courgette + #endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_ diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc index 4e16464..3e43273 100644 --- a/courgette/disassembler_win32_x86_unittest.cc +++ b/courgette/disassembler_win32_x86_unittest.cc @@ -6,6 +6,9 @@ #include +#include +#include + #include "base/memory/scoped_ptr.h" #include "base/stl_util.h" #include "courgette/base_test_unittest.h" @@ -37,16 +40,16 @@ void DisassemblerWin32X86Test::TestExe() const { disassembler->RVAToSection(0x00401234 - 0x00400000)), std::string(".text")); - EXPECT_EQ(0, disassembler->RVAToFileOffset(0)); - EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096)); - EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000)); + EXPECT_EQ(0U, disassembler->RVAToFileOffset(0)); + EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096)); + EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000)); std::vector relocs; bool can_parse_relocs = disassembler->ParseRelocs(&relocs); EXPECT_TRUE(can_parse_relocs); EXPECT_TRUE(base::STLIsSorted(relocs)); - const uint8_t* offset_p = disassembler->OffsetToPointer(0); + const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); EXPECT_EQ(reinterpret_cast(file1.c_str()), reinterpret_cast(offset_p)); EXPECT_EQ('M', offset_p[0]); diff --git a/courgette/image_utils.h b/courgette/image_utils.h index f958cc1..cfbfcfe 100644 --- a/courgette/image_utils.h +++ b/courgette/image_utils.h @@ -14,8 +14,44 @@ namespace courgette { -typedef uint32_t RVA; +// There are several ways to reason about addresses in an image: +// - File Offset: Position relative to start of image. +// - VA (Virtual Address): Virtual memory address of a loaded image. This is +// subject to relocation by the OS. +// - RVA (Relative Virtual Address): VA relative to some base address. This is +// the preferred way to specify pointers in an image. Two ways to encode RVA +// are: +// - abs32: RVA value is encoded directly. +// - rel32: RVA is encoded as offset from an instruction address. This is +// commonly used for relative branch/call opcodes. +// Courgette operates on File Offsets and RVAs only. + +using RVA = uint32_t; const RVA kUnassignedRVA = 0xFFFFFFFFU; +const RVA kNoRVA = 0xFFFFFFFFU; + +using FileOffset = size_t; +const FileOffset kNoFileOffset = UINTPTR_MAX; + +// An interface for {File Offset, RVA, pointer to image data} translation. +class AddressTranslator { + public: + // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent. + virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0; + + // Returns the file offset corresponding to |rva|, or kNoFileOffset if + // nonexistent. + virtual FileOffset RVAToFileOffset(RVA rva) const = 0; + + // Returns the pointer to the image data for |file_offset|. Assumes that + // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting + // pointer is an end bound for iteration that should never be dereferenced. + virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0; + + // Returns the pointer to the image data for |rva|, or null if |rva| is + // invalid. + virtual const uint8_t* RVAToPointer(RVA rva) const = 0; +}; // A Label is a symbolic reference to an address. Unlike a conventional // assembly language, we always know the address. The address will later be diff --git a/courgette/rel32_finder_win32_x86.cc b/courgette/rel32_finder_win32_x86.cc index 171b781..0ed492f 100644 --- a/courgette/rel32_finder_win32_x86.cc +++ b/courgette/rel32_finder_win32_x86.cc @@ -8,11 +8,9 @@ namespace courgette { -Rel32FinderWin32X86::Rel32FinderWin32X86( - RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva) - : relocs_start_rva_(relocs_start_rva), - relocs_end_rva_(relocs_end_rva), - image_end_rva_(image_end_rva) { +Rel32FinderWin32X86::Rel32FinderWin32X86(RVA relocs_start_rva, + RVA relocs_end_rva) + : relocs_start_rva_(relocs_start_rva), relocs_end_rva_(relocs_end_rva) { } Rel32FinderWin32X86::~Rel32FinderWin32X86() { @@ -28,9 +26,9 @@ void Rel32FinderWin32X86::SwapRel32TargetRVAs(std::map* dest) { } #endif -Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic( - RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva) - : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva, image_end_rva) { +Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(RVA relocs_start_rva, + RVA relocs_end_rva) + : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva) { } Rel32FinderWin32X86_Basic::~Rel32FinderWin32X86_Basic() { @@ -51,6 +49,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, const uint8_t* p = start_pointer; while (p < end_pointer) { RVA current_rva = static_cast(p - adjust_pointer_to_rva); + + // Skip the base reloation table if we encounter it. + // Note: We're not bothering to handle the edge case where a Rel32 pointer + // collides with |relocs_start_rva_| by being {1, 2, 3}-bytes before it. if (current_rva == relocs_start_rva_) { if (relocs_start_rva_ < relocs_end_rva_) { p += relocs_end_rva_ - relocs_start_rva_; @@ -58,13 +60,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, } } - //while (abs32_pos != abs32_locations.end() && *abs32_pos < current_rva) - // ++abs32_pos; - // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = NULL; + const uint8_t* rel32 = nullptr; if (p + 5 <= end_pointer) { if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 @@ -95,10 +94,9 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, } RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); - // To be valid, rel32 target must be within image, and within this - // section. - if (IsValidRVA(target_rva) && - start_rva <= target_rva && target_rva < end_rva) { + // Valid, rel32 target must be within image, and within this section. + // Subsumes |target_rva| != |kUnassignedRVA|. + if (start_rva <= target_rva && target_rva < end_rva) { rel32_locations_.push_back(rel32_rva); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h index 01226ae..98ebd98 100644 --- a/courgette/rel32_finder_win32_x86.h +++ b/courgette/rel32_finder_win32_x86.h @@ -7,9 +7,7 @@ #include -#if COURGETTE_HISTOGRAM_TARGETS #include -#endif #include #include "courgette/image_utils.h" @@ -19,25 +17,21 @@ namespace courgette { // A helper class to scan through a section of code to extract RVAs. class Rel32FinderWin32X86 { public: - Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva, - RVA image_end_rva); + Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva); virtual ~Rel32FinderWin32X86(); - // Subsumes rva != kUnassignedRVA. - bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; } - - // Swaps data in |rel32_locations_| to |dest|. + // Swaps data in |rel32_locations_| with |dest|. void SwapRel32Locations(std::vector* dest); #if COURGETTE_HISTOGRAM_TARGETS - // Swaps data in |rel32_target_rvas_| to |dest|. + // Swaps data in |rel32_target_rvas_| with |dest|. void SwapRel32TargetRVAs(std::map* dest); #endif // Scans through [|start_pointer|, |end_pointer|) for rel32 addresses. Seeks // RVAs that satisfy the following: - // - Do not collide with |abs32_pos| (assumed sorted). - // - Do not collide with |base_relocation_table|'s RVA range, + // - Do not overlap with |abs32_locations| (assumed sorted). + // - Do not overlap with [relocs_start_rva, relocs_end_rva). // - Whose targets are in [|start_rva|, |end_rva|). // The sorted results are written to |rel32_locations_|. virtual void Find(const uint8_t* start_pointer, @@ -49,7 +43,6 @@ class Rel32FinderWin32X86 { protected: const RVA relocs_start_rva_; const RVA relocs_end_rva_; - const RVA image_end_rva_; std::vector rel32_locations_; @@ -62,8 +55,7 @@ class Rel32FinderWin32X86 { // (excluding JPO/JPE) disregarding instruction alignment. class Rel32FinderWin32X86_Basic : public Rel32FinderWin32X86 { public: - Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva, - RVA image_end_rva); + Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva); virtual ~Rel32FinderWin32X86_Basic(); // Rel32FinderWin32X86 implementation. diff --git a/courgette/rel32_finder_win32_x86_unittest.cc b/courgette/rel32_finder_win32_x86_unittest.cc index aed5c13..496f0b9 100644 --- a/courgette/rel32_finder_win32_x86_unittest.cc +++ b/courgette/rel32_finder_win32_x86_unittest.cc @@ -33,8 +33,7 @@ class Rel32FinderWin32X86TestCase { } void RunTestBasic(std::string name) { - Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_, - image_end_rva_); + Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_); ASSERT_FALSE(text_data_.empty()); finder.Find(&text_data_[0], &text_data_[0] + text_data_.size(), text_start_rva_, text_end_rva_, abs32_locations_); -- cgit v1.1