diff options
author | scottmg <scottmg@chromium.org> | 2016-03-12 15:54:56 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-03-12 23:56:45 +0000 |
commit | 4a95ca5a4bab60f9f54325036516b640d263e2ec (patch) | |
tree | e4cb16184171a68d73caed9b37f3f0ee977c5a3d /courgette | |
parent | 2eb2d38dde83688f266012a04120c3253156d2b7 (diff) | |
download | chromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.zip chromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.tar.gz chromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.tar.bz2 |
Revert of [Courgette] Clean up Disassembler; fix ELF Memory leaks. (patchset #15 id:270001 of https://codereview.chromium.org/1676683002/ )
Reason for revert:
Regressed linux sizes (iostream maybe?)
https://build.chromium.org/p/chromium/builders/Linux/builds/72899/steps/sizes/logs/stdio
Original issue's description:
> [Courgette] Clean up Disassembler; fix ELF Memory leaks.
>
> Cleaning up code surrounding Disassembler:
> - Extract AddressTranslator interface to be used across subclasses.
> - Use FileOffset = size_t by context.
> - Detailed comments & TODOs in DisassemblerElf32ARM.
> - Fix DisassemblerElf32ARM memory leaks.
> - Lots of superficial stylistic changes.
>
> Except for AddressTranslator routines and unit tests, shying away
> from control flow and logic changes.
>
> BUG=579206
>
> Committed: https://crrev.com/58b822d441f5c982e879e536fa3c1cbac8fd339a
> Cr-Commit-Position: refs/heads/master@{#380881}
TBR=grt@chromium.org,wfh@chromium.org,chrisha@chromium.org,andrewhayden@chromium.org,huangs@chromium.org
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=579206
Review URL: https://codereview.chromium.org/1792603006
Cr-Commit-Position: refs/heads/master@{#380885}
Diffstat (limited to 'courgette')
-rw-r--r-- | courgette/disassembler.cc | 20 | ||||
-rw-r--r-- | courgette/disassembler.h | 34 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.cc | 333 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.h | 127 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_arm.cc | 244 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_arm.h | 56 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86.cc | 111 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86.h | 46 | ||||
-rw-r--r-- | courgette/disassembler_elf_32_x86_unittest.cc | 89 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.cc | 222 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.h | 78 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64_unittest.cc | 11 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 214 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.h | 75 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86_unittest.cc | 11 | ||||
-rw-r--r-- | courgette/image_utils.h | 38 | ||||
-rw-r--r-- | courgette/rel32_finder_win32_x86.cc | 30 | ||||
-rw-r--r-- | courgette/rel32_finder_win32_x86.h | 20 | ||||
-rw-r--r-- | courgette/rel32_finder_win32_x86_unittest.cc | 3 |
19 files changed, 880 insertions, 882 deletions
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index 9b58ba0..b9fce8b 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -4,12 +4,10 @@ #include "courgette/disassembler.h" -#include "base/logging.h" - namespace courgette { Disassembler::Disassembler(const void* start, size_t length) - : failure_reason_("uninitialized") { + : failure_reason_("uninitialized") { start_ = reinterpret_cast<const uint8_t*>(start); length_ = length; end_ = start_ + length_; @@ -17,21 +15,13 @@ Disassembler::Disassembler(const void* start, size_t length) Disassembler::~Disassembler() {}; -const uint8_t* Disassembler::FileOffsetToPointer(FileOffset file_offset) const { - CHECK_LE(file_offset, static_cast<FileOffset>(end_ - start_)); - return start_ + file_offset; -} - -const uint8_t* Disassembler::RVAToPointer(RVA rva) const { - FileOffset file_offset = RVAToFileOffset(rva); - if (file_offset == kNoFileOffset) - return nullptr; - - return FileOffsetToPointer(file_offset); +const uint8_t* Disassembler::OffsetToPointer(size_t offset) const { + assert(start_ + offset <= end_); + return start_ + offset; } bool Disassembler::Good() { - failure_reason_ = nullptr; + failure_reason_ = NULL; return true; } diff --git a/courgette/disassembler.h b/courgette/disassembler.h index bc715b0..e833cfa 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -16,35 +16,33 @@ namespace courgette { class AssemblyProgram; -class Disassembler : public AddressTranslator { +class Disassembler { public: virtual ~Disassembler(); - // AddressTranslator interfaces. - virtual RVA FileOffsetToRVA(FileOffset file_offset) const override = 0; - virtual FileOffset RVAToFileOffset(RVA rva) const override = 0; - const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override; - const uint8_t* RVAToPointer(RVA rva) const override; + virtual ExecutableType kind() { return EXE_UNKNOWN; } - virtual ExecutableType kind() const = 0; + // ok() may always be called but returns 'true' only after ParseHeader + // succeeds. + bool ok() const { return failure_reason_ == NULL; } - // Returns true if the buffer appears to be a valid executable of the expected - // type, and false otherwise. This needs not be called before Disassemble(). + // Returns 'true' if the buffer appears to be a valid executable of the + // expected type. It is not required that this be called before Disassemble. virtual bool ParseHeader() = 0; // Disassembles the item passed to the factory method into the output // parameter 'program'. virtual bool Disassemble(AssemblyProgram* program) = 0; - // ok() may always be called but returns true only after ParseHeader() - // succeeds. - bool ok() const { return failure_reason_ == nullptr; } - - // Returns the length of the image. May reduce after ParseHeader(). + // Returns the length of the source executable. May reduce after ParseHeader. size_t length() const { return length_; } const uint8_t* start() const { return start_; } const uint8_t* end() const { return end_; } + // Returns a pointer into the memory copy of the file format. + // FileOffsetToPointer(0) returns a pointer to the start of the file format. + const uint8_t* OffsetToPointer(size_t offset) const; + protected: Disassembler(const void* start, size_t length); @@ -57,16 +55,16 @@ class Disassembler : public AddressTranslator { } // Reduce the length of the image in memory. Does not actually free - // (or realloc) any memory. Usually only called via ParseHeader(). + // (or realloc) any memory. Usually only called via ParseHeader() void ReduceLength(size_t reduced_length); private: const char* failure_reason_; // - // Basic information that is always valid after construction, although - // ParseHeader() may shorten |length_| if the executable is shorter than the - // total data. + // Basic information that is always valid after Construction, though + // ParseHeader may shorten the length if the executable is shorter than + // the total data. // size_t length_; // In current memory. const uint8_t* start_; // In current memory, base for 'file offsets'. diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc index 9ceb8ab..84aa971 100644 --- a/courgette/disassembler_elf_32.cc +++ b/courgette/disassembler_elf_32.cc @@ -4,70 +4,39 @@ #include "courgette/disassembler_elf_32.h" +#include <stddef.h> +#include <stdint.h> + #include <algorithm> +#include <string> +#include <vector> #include "base/logging.h" +#include "base/memory/scoped_vector.h" + #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/encoded_program.h" namespace courgette { DisassemblerElf32::DisassemblerElf32(const void* start, size_t length) - : Disassembler(start, length), - header_(nullptr), - section_header_table_(nullptr), - section_header_table_size_(0), - program_header_table_(nullptr), - program_header_table_size_(0), - default_string_section_(nullptr) { -} - -RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const { - // File offsets can be 64-bit values, but we are dealing with 32-bit - // executables and so only need to support 32-bit file sizes. - uint32_t offset32 = static_cast<uint32_t>(offset); - - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); - // These can appear to have a size in the file, but don't. - if (section_header->sh_type == SHT_NOBITS) - continue; - - Elf32_Off section_begin = section_header->sh_offset; - Elf32_Off section_end = section_begin + section_header->sh_size; - - if (offset32 >= section_begin && offset32 < section_end) { - return section_header->sh_addr + (offset32 - section_begin); - } - } - - return 0; -} - -FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const { - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); - // These can appear to have a size in the file, but don't. - if (section_header->sh_type == SHT_NOBITS) - continue; - Elf32_Addr begin = section_header->sh_addr; - Elf32_Addr end = begin + section_header->sh_size; - - if (rva >= begin && rva < end) - return section_header->sh_offset + (rva - begin); - } - return kNoFileOffset; + : Disassembler(start, length), + header_(NULL), + section_header_table_(NULL), + section_header_table_size_(0), + program_header_table_(NULL), + program_header_table_size_(0), + default_string_section_(NULL) { } bool DisassemblerElf32::ParseHeader() { if (length() < sizeof(Elf32_Ehdr)) return Bad("Too small"); - header_ = reinterpret_cast<const Elf32_Ehdr*>(start()); + header_ = (Elf32_Ehdr *)start(); - // Have magic for ELF header? + // Have magic for elf header? if (header_->e_ident[0] != 0x7f || header_->e_ident[1] != 'E' || header_->e_ident[2] != 'L' || @@ -90,25 +59,23 @@ bool DisassemblerElf32::ParseHeader() { if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr))) return Bad("Out of bounds section header table"); - section_header_table_ = reinterpret_cast<const Elf32_Shdr*>( - FileOffsetToPointer(header_->e_shoff)); + section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff); section_header_table_size_ = header_->e_shnum; if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr))) return Bad("Out of bounds program header table"); - program_header_table_ = reinterpret_cast<const Elf32_Phdr*>( - FileOffsetToPointer(header_->e_phoff)); + program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff); program_header_table_size_ = header_->e_phnum; if (header_->e_shstrndx >= header_->e_shnum) return Bad("Out of bounds string section index"); - default_string_section_ = reinterpret_cast<const char*>( - SectionBody(static_cast<int>(header_->e_shstrndx))); + default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx); - if (!UpdateLength()) + if (!UpdateLength()) { return Bad("Out of bounds section or segment"); + } return Good(); } @@ -130,6 +97,7 @@ bool DisassemblerElf32::Disassemble(AssemblyProgram* target) { return false; target->DefaultAssignIndexes(); + return true; } @@ -137,9 +105,8 @@ bool DisassemblerElf32::UpdateLength() { Elf32_Off result = 0; // Find the end of the last section - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); + for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { + const Elf32_Shdr *section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_NOBITS) continue; @@ -152,9 +119,8 @@ bool DisassemblerElf32::UpdateLength() { } // Find the end of the last segment - for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount(); - ++segment_id) { - const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id); + for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { + const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1)) return false; @@ -163,26 +129,25 @@ bool DisassemblerElf32::UpdateLength() { result = std::max(result, segment_end); } - Elf32_Off section_table_end = - header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr)); + Elf32_Off section_table_end = header_->e_shoff + + (header_->e_shnum * sizeof(Elf32_Shdr)); result = std::max(result, section_table_end); - Elf32_Off segment_table_end = - header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr)); + Elf32_Off segment_table_end = header_->e_phoff + + (header_->e_phnum * sizeof(Elf32_Phdr)); result = std::max(result, segment_table_end); ReduceLength(result); return true; } -CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const { +CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const { if (rva == kUnassignedRVA) return false; // It's valid if it's contained in any program segment - for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount(); - ++segment_id) { - const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id); + for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { + const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); if (segment_header->p_type != PT_LOAD) continue; @@ -197,58 +162,114 @@ CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const { return false; } -CheckBool DisassemblerElf32::RVAsToFileOffsets( - const std::vector<RVA>& rvas, - std::vector<FileOffset>* file_offsets) { - file_offsets->clear(); - for (RVA rva : rvas) { - FileOffset file_offset = RVAToFileOffset(rva); - if (file_offset == kNoFileOffset) +CheckBool DisassemblerElf32::RVAToFileOffset(RVA rva, + size_t* result) const { + for (int i = 0; i < SectionHeaderCount(); i++) { + const Elf32_Shdr *section_header = SectionHeader(i); + // These can appear to have a size in the file, but don't. + if (section_header->sh_type == SHT_NOBITS) + continue; + Elf32_Addr begin = section_header->sh_addr; + Elf32_Addr end = begin + section_header->sh_size; + + if (rva >= begin && rva < end) { + *result = section_header->sh_offset + (rva - begin); + return true; + } + } + return false; +} + +RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const { + // File offsets can be 64 bit values, but we are dealing with 32 + // bit executables and so only need to support 32bit file sizes. + uint32_t offset32 = (uint32_t)offset; + + for (int i = 0; i < SectionHeaderCount(); i++) { + + const Elf32_Shdr *section_header = SectionHeader(i); + + // These can appear to have a size in the file, but don't. + if (section_header->sh_type == SHT_NOBITS) + continue; + + Elf32_Off section_begin = section_header->sh_offset; + Elf32_Off section_end = section_begin + section_header->sh_size; + + if (offset32 >= section_begin && offset32 < section_end) { + return section_header->sh_addr + (offset32 - section_begin); + } + } + + return 0; +} + +CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas, + std::vector<size_t>* offsets) { + offsets->clear(); + + for (std::vector<RVA>::iterator rva = rvas->begin(); + rva != rvas->end(); + rva++) { + + size_t offset; + + if (!RVAToFileOffset(*rva, &offset)) return false; - file_offsets->push_back(file_offset); + + offsets->push_back(offset); } + return true; } -CheckBool DisassemblerElf32::RVAsToFileOffsets( - ScopedVector<TypedRVA>* typed_rvas) { - for (TypedRVA* typed_rva : *typed_rvas) { - FileOffset file_offset = RVAToFileOffset(typed_rva->rva()); - if (file_offset == kNoFileOffset) +CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) { + for (ScopedVector<TypedRVA>::iterator rva = rvas->begin(); + rva != rvas->end(); + rva++) { + + size_t offset; + + if (!RVAToFileOffset((*rva)->rva(), &offset)) return false; - typed_rva->set_file_offset(file_offset); + + (*rva)->set_offset(offset); } + return true; } CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - FileOffset file_offset = 0; + uint32_t file_offset = 0; - std::vector<FileOffset> abs_offsets; + std::vector<size_t> abs_offsets; - if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets)) + if (!RVAsToOffsets(&abs32_locations_, &abs_offsets)) return false; - if (!RVAsToFileOffsets(&rel32_locations_)) + if (!RVAsToOffsets(&rel32_locations_)) return false; - std::vector<FileOffset>::iterator current_abs_offset = abs_offsets.begin(); + std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin(); ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin(); - std::vector<FileOffset>::iterator end_abs_offset = abs_offsets.end(); + std::vector<size_t>::iterator end_abs_offset = abs_offsets.end(); ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end(); - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); + for (int section_id = 0; + section_id < SectionHeaderCount(); + section_id++) { + + const Elf32_Shdr *section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_NOBITS) continue; - if (!ParseSimpleRegion(file_offset, section_header->sh_offset, program)) + if (!ParseSimpleRegion(file_offset, + section_header->sh_offset, + program)) return false; - file_offset = section_header->sh_offset; switch (section_header->sh_type) { @@ -259,13 +280,10 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { break; case SHT_PROGBITS: if (!ParseProgbitsSection(section_header, - ¤t_abs_offset, - end_abs_offset, - ¤t_rel, - end_rel, - program)) { + ¤t_abs_offset, end_abs_offset, + ¤t_rel, end_rel, + program)) return false; - } file_offset = section_header->sh_offset + section_header->sh_size; break; case SHT_INIT_ARRAY: @@ -274,27 +292,28 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { while (current_abs_offset != end_abs_offset && *current_abs_offset >= section_header->sh_offset && *current_abs_offset < - section_header->sh_offset + section_header->sh_size) { + (section_header->sh_offset + section_header->sh_size)) { // Skip any abs_offsets appear in the unsupported INIT_ARRAY section - VLOG(1) << "Skipping relocation entry for unsupported section: " - << section_header->sh_type; - ++current_abs_offset; + VLOG(1) << "Skipping relocation entry for unsupported section: " << + section_header->sh_type; + current_abs_offset++; } break; default: if (current_abs_offset != end_abs_offset && - *current_abs_offset >= section_header->sh_offset && - *current_abs_offset < - section_header->sh_offset + section_header->sh_size) { - VLOG(1) << "Relocation address in unrecognized ELF section: " - << section_header->sh_type; - } - break; + *current_abs_offset >= section_header->sh_offset && + *current_abs_offset < + (section_header->sh_offset + section_header->sh_size)) + VLOG(1) << "Relocation address in unrecognized ELF section: " << \ + section_header->sh_type; + break; } } // Rest of the file past the last section - if (!ParseSimpleRegion(file_offset, length(), program)) + if (!ParseSimpleRegion(file_offset, + length(), + program)) return false; // Make certain we consume all of the relocations as expected @@ -302,32 +321,34 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { } CheckBool DisassemblerElf32::ParseProgbitsSection( - const Elf32_Shdr* section_header, - std::vector<FileOffset>::iterator* current_abs_offset, - std::vector<FileOffset>::iterator end_abs_offset, + const Elf32_Shdr *section_header, + std::vector<size_t>::iterator* current_abs_offset, + std::vector<size_t>::iterator end_abs_offset, ScopedVector<TypedRVA>::iterator* current_rel, ScopedVector<TypedRVA>::iterator end_rel, AssemblyProgram* program) { + // Walk all the bytes in the file, whether or not in a section. - FileOffset file_offset = section_header->sh_offset; - FileOffset section_end = section_header->sh_offset + section_header->sh_size; + size_t file_offset = section_header->sh_offset; + size_t section_end = section_header->sh_offset + section_header->sh_size; Elf32_Addr origin = section_header->sh_addr; - FileOffset origin_offset = section_header->sh_offset; + size_t origin_offset = section_header->sh_offset; if (!program->EmitOriginInstruction(origin)) return false; while (file_offset < section_end) { + if (*current_abs_offset != end_abs_offset && file_offset > **current_abs_offset) return false; while (*current_rel != end_rel && - file_offset > (**current_rel)->file_offset()) { - ++(*current_rel); + file_offset > (**current_rel)->get_offset()) { + (*current_rel)++; } - FileOffset next_relocation = section_end; + size_t next_relocation = section_end; if (*current_abs_offset != end_abs_offset && next_relocation > **current_abs_offset) @@ -337,8 +358,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( // an Abs value, or the end of the section, so +3 to make sure there is // room for the full 4 byte value. if (*current_rel != end_rel && - next_relocation > ((**current_rel)->file_offset() + 3)) - next_relocation = (**current_rel)->file_offset(); + next_relocation > ((**current_rel)->get_offset() + 3)) + next_relocation = (**current_rel)->get_offset(); if (next_relocation > file_offset) { if (!ParseSimpleRegion(file_offset, next_relocation, program)) @@ -350,28 +371,28 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( if (*current_abs_offset != end_abs_offset && file_offset == **current_abs_offset) { - const uint8_t* p = FileOffsetToPointer(file_offset); + const uint8_t* p = OffsetToPointer(file_offset); RVA target_rva = Read32LittleEndian(p); if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva))) return false; file_offset += sizeof(RVA); - ++(*current_abs_offset); + (*current_abs_offset)++; continue; } if (*current_rel != end_rel && - file_offset == (**current_rel)->file_offset()) { + file_offset == (**current_rel)->get_offset()) { uint32_t relative_target = (**current_rel)->relative_target(); // This cast is for 64 bit systems, and is only safe because we // are working on 32 bit executables. RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + relative_target); - if (!(**current_rel)->EmitInstruction(program, target_rva)) + if (! (**current_rel)->EmitInstruction(program, target_rva)) return false; file_offset += (**current_rel)->op_size(); - ++(*current_rel); + (*current_rel)++; continue; } } @@ -380,19 +401,17 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( return ParseSimpleRegion(file_offset, section_end, program); } -CheckBool DisassemblerElf32::ParseSimpleRegion(FileOffset start_file_offset, - FileOffset end_file_offset, - AssemblyProgram* program) { +CheckBool DisassemblerElf32::ParseSimpleRegion( + size_t start_file_offset, + size_t end_file_offset, + AssemblyProgram* program) { // Callers don't guarantee start < end - if (start_file_offset >= end_file_offset) - return true; + if (start_file_offset >= end_file_offset) return true; const size_t len = end_file_offset - start_file_offset; - if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), - len)) { + if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len)) return false; - } return true; } @@ -401,13 +420,12 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { abs32_locations_.clear(); // Loop through sections for relocation sections - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); + for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { + const Elf32_Shdr *section_header = SectionHeader(section_id); if (section_header->sh_type == SHT_REL) { - const Elf32_Rel* relocs_table = - reinterpret_cast<const Elf32_Rel*>(SectionBody(section_id)); + + Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id); int relocs_table_count = section_header->sh_size / section_header->sh_entsize; @@ -415,7 +433,7 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { // Elf32_Word relocation_section_id = section_header->sh_info; // Loop through relocation objects in the relocation section - for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) { + for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) { RVA rva; // Quite a few of these conversions fail, and we simply skip @@ -433,18 +451,23 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { } CheckBool DisassemblerElf32::CheckSection(RVA rva) { - FileOffset file_offset = RVAToFileOffset(rva); - if (file_offset == kNoFileOffset) + size_t offset; + + if (!RVAToFileOffset(rva, &offset)) { return false; + } + + for (int section_id = 0; + section_id < SectionHeaderCount(); + section_id++) { - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); + const Elf32_Shdr *section_header = SectionHeader(section_id); - if (file_offset >= section_header->sh_offset && - file_offset < (section_header->sh_offset + section_header->sh_size)) { + if (offset >= section_header->sh_offset && + offset < (section_header->sh_offset + section_header->sh_size)) { switch (section_header->sh_type) { - case SHT_REL: // Falls through. + case SHT_REL: + // Fall-through case SHT_PROGBITS: return true; } @@ -455,14 +478,16 @@ CheckBool DisassemblerElf32::CheckSection(RVA rva) { } CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() { + rel32_locations_.clear(); // Loop through sections for relocation sections - for (Elf32_Half section_id = 0; section_id < SectionHeaderCount(); - ++section_id) { - const Elf32_Shdr* section_header = SectionHeader(section_id); + for (int section_id = 0; + section_id < SectionHeaderCount(); + section_id++) { + + const Elf32_Shdr *section_header = SectionHeader(section_id); - // TODO(huangs): Add better checks to skip non-code sections. // Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0. if (section_header->sh_type != SHT_PROGBITS || section_header->sh_addr == 0) diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h index e9d00ca..8483ce3 100644 --- a/courgette/disassembler_elf_32.h +++ b/courgette/disassembler_elf_32.h @@ -8,12 +8,10 @@ #include <stddef.h> #include <stdint.h> -#include <vector> - #include "base/macros.h" #include "base/memory/scoped_vector.h" +#include "courgette/assembly_program.h" #include "courgette/disassembler.h" -#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_elf.h" @@ -21,33 +19,43 @@ namespace courgette { class AssemblyProgram; -// A Courgette disassembler for 32-bit ELF files. This is only a partial -// implementation that admits subclasses for the architecture-specific parts of -// 32-bit ELF file processing. Specifically: -// - RelToRVA() processes entries in ELF relocation table. -// - ParseRelocationSection() verifies the organization of the ELF relocation -// table. -// - ParseRel32RelocsFromSection() finds branch targets by looking for relative -// branch/call opcodes in the particular architecture's machine code. +// A courgette disassembler for 32-bit ELF files. This class is only a +// partial implementation. Subclasses implement the +// architecture-specific parts of processing 32-bit ELF files. Specifically, +// RelToRVA processes entries in ELF relocation table, +// ParseRelocationSection verifies the organization of the ELF +// relocation table, and ParseRel32RelocsFromSection finds branch +// targets by looking for relative jump/call opcodes in the particular +// architecture's machine code. class DisassemblerElf32 : public Disassembler { public: // Different instructions encode the target rva differently. This // class encapsulates this behavior. public for use in unit tests. class TypedRVA { public: - explicit TypedRVA(RVA rva) : rva_(rva) { } + explicit TypedRVA(RVA rva) : rva_(rva), offset_(static_cast<size_t>(-1)) { + } - virtual ~TypedRVA() { } + virtual ~TypedRVA() { }; - RVA rva() const { return rva_; } - RVA relative_target() const { return relative_target_; } - FileOffset file_offset() const { return file_offset_; } + RVA rva() { + return rva_; + } + + RVA relative_target() { + return relative_target_; + } void set_relative_target(RVA relative_target) { relative_target_ = relative_target; } - void set_file_offset(FileOffset file_offset) { - file_offset_ = file_offset; + + size_t get_offset() { + return offset_; + } + + void set_offset(size_t offset) { + offset_ = offset; } // Computes the relative jump's offset from the op in p. @@ -57,33 +65,33 @@ class DisassemblerElf32 : public Disassembler { virtual CheckBool EmitInstruction(AssemblyProgram* program, RVA target_rva) = 0; - // Returns the size of the instruction containing the RVA. virtual uint16_t op_size() const = 0; - // Comparator for sorting, which assumes uniqueness of RVAs. - static bool IsLessThan(TypedRVA* a, TypedRVA* b) { + static bool IsLessThan(TypedRVA *a, TypedRVA *b) { return a->rva() < b->rva(); } private: const RVA rva_; - RVA relative_target_ = kNoRVA; - FileOffset file_offset_ = kNoFileOffset; + RVA relative_target_; + size_t offset_; }; public: - DisassemblerElf32(const void* start, size_t length); + explicit DisassemblerElf32(const void* start, size_t length); + + virtual ~DisassemblerElf32() { }; - ~DisassemblerElf32() override { } + virtual ExecutableType kind() = 0; - // Disassembler interfaces. - RVA FileOffsetToRVA(FileOffset file_offset) const override; - FileOffset RVAToFileOffset(RVA rva) const override; - virtual ExecutableType kind() const override = 0; - bool ParseHeader() override; - bool Disassemble(AssemblyProgram* target) override; + virtual e_machine_values ElfEM() = 0; - virtual e_machine_values ElfEM() const = 0; + // Returns 'true' if the buffer appears to point to a valid ELF executable + // for 32 bit. If ParseHeader() succeeds, other member + // functions may be called. + virtual bool ParseHeader(); + + virtual bool Disassemble(AssemblyProgram* target); // Public for unittests only std::vector<RVA> &Abs32Locations() { return abs32_locations_; } @@ -99,13 +107,13 @@ class DisassemblerElf32 : public Disassembler { return section_header_table_size_; } - const Elf32_Shdr* SectionHeader(Elf32_Half id) const { + const Elf32_Shdr *SectionHeader(int id) const { assert(id >= 0 && id < SectionHeaderCount()); return section_header_table_ + id; } - const uint8_t* SectionBody(Elf32_Half id) const { - return FileOffsetToPointer(SectionHeader(id)->sh_offset); + const uint8_t* SectionBody(int id) const { + return OffsetToPointer(SectionHeader(id)->sh_offset); } // Misc Segment Helpers @@ -114,62 +122,61 @@ class DisassemblerElf32 : public Disassembler { return program_header_table_size_; } - const Elf32_Phdr* ProgramSegmentHeader(Elf32_Half id) const { + const Elf32_Phdr *ProgramSegmentHeader(int id) const { assert(id >= 0 && id < ProgramSegmentHeaderCount()); return program_header_table_ + id; } // Misc address space helpers - CheckBool IsValidTargetRVA(RVA rva) const WARN_UNUSED_RESULT; + CheckBool IsValidRVA(RVA rva) const WARN_UNUSED_RESULT; - // Converts an ELF relocation instruction into an RVA. + // Convert an ELF relocation struction into an RVA virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) const WARN_UNUSED_RESULT = 0; - CheckBool RVAsToFileOffsets(const std::vector<RVA>& rvas, - std::vector<FileOffset>* file_offsets); + // Returns kNoOffset if there is no file offset corresponding to 'rva'. + CheckBool RVAToFileOffset(RVA rva, size_t* result) const WARN_UNUSED_RESULT; - CheckBool RVAsToFileOffsets(ScopedVector<TypedRVA>* typed_rvas); + RVA FileOffsetToRVA(size_t offset) const WARN_UNUSED_RESULT; - // Parsing code for Disassemble(). + CheckBool RVAsToOffsets(std::vector<RVA>* rvas /*in*/, + std::vector<size_t>* offsets /*out*/); - virtual CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, - AssemblyProgram* program) - WARN_UNUSED_RESULT = 0; + CheckBool RVAsToOffsets(ScopedVector<TypedRVA>* rvas /*in and out*/); - virtual CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) - WARN_UNUSED_RESULT = 0; + // Parsing Code used to really implement Disassemble CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; - + virtual CheckBool ParseRelocationSection( + const Elf32_Shdr *section_header, + AssemblyProgram* program) WARN_UNUSED_RESULT = 0; CheckBool ParseProgbitsSection( - const Elf32_Shdr* section_header, - std::vector<FileOffset>::iterator* current_abs_offset, - std::vector<FileOffset>::iterator end_abs_offset, + const Elf32_Shdr *section_header, + std::vector<size_t>::iterator* current_abs_offset, + std::vector<size_t>::iterator end_abs_offset, ScopedVector<TypedRVA>::iterator* current_rel, ScopedVector<TypedRVA>::iterator end_rel, AssemblyProgram* program) WARN_UNUSED_RESULT; - - CheckBool ParseSimpleRegion(FileOffset start_file_offset, - FileOffset end_file_offset, + CheckBool ParseSimpleRegion(size_t start_file_offset, + size_t end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseAbs32Relocs() WARN_UNUSED_RESULT; - CheckBool CheckSection(RVA rva) WARN_UNUSED_RESULT; - CheckBool ParseRel32RelocsFromSections() WARN_UNUSED_RESULT; + virtual CheckBool ParseRel32RelocsFromSection( + const Elf32_Shdr* section) WARN_UNUSED_RESULT = 0; - const Elf32_Ehdr* header_; - const Elf32_Shdr* section_header_table_; + Elf32_Ehdr *header_; + Elf32_Shdr *section_header_table_; Elf32_Half section_header_table_size_; - const Elf32_Phdr* program_header_table_; + Elf32_Phdr *program_header_table_; Elf32_Half program_header_table_size_; // Section header for default - const char* default_string_section_; + const char *default_string_section_; std::vector<RVA> abs32_locations_; ScopedVector<TypedRVA> rel32_locations_; diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc index 39172f4..f6490d9 100644 --- a/courgette/disassembler_elf_32_arm.cc +++ b/courgette/disassembler_elf_32_arm.cc @@ -4,12 +4,18 @@ #include "courgette/disassembler_elf_32_arm.h" +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <string> #include <vector> #include "base/logging.h" -#include "base/memory/scoped_ptr.h" + #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/encoded_program.h" namespace courgette { @@ -18,34 +24,31 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, RVA rva, uint16_t* c_op, uint32_t* addr) { - // Notation for bit ranges in comments: - // - Listing bits from highest to lowest. - // - A-Z or (j1), (j2), etc.: single bit in source. - // - a-z: multiple, consecutive bits in source. + // This method takes an ARM or thumb opcode, extracts the relative + // target address from it (addr), and creates a corresponding + // Courgette opcode (c_op). + // + // Details on ARM the opcodes, and how the relative targets are + // computed were taken from the "ARM Architecture Reference Manual", + // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12. + // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes. switch (type) { case ARM_OFF8: { - // Encoding T1. - // The offset is given by lower 8 bits of the op. It is a 9-bit offset, - // shifted right 1 bit, and signed extended. - // arm_op = aaaaaaaa Snnnnnnn - // *addr := SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100 - // *c_op := 00010000 aaaaaaaa + // The offset is given by lower 8 bits of the op. It is a 9-bit + // offset, shifted right one bit and signed extended. uint32_t temp = (arm_op & 0x00FF) << 1; if (temp & 0x0100) temp |= 0xFFFFFE00; temp += 4; // Offset from _next_ PC. + fflush(stdout); (*addr) = temp; (*c_op) = static_cast<uint16_t>(arm_op >> 8) | 0x1000; break; } case ARM_OFF11: { - // Encoding T2. - // The offset is given by lower 11 bits of the op, and is a 12-bit offset, - // shifted right 1 bit, and sign extended. - // arm_op = aaaaaSnn nnnnnnnn - // *addr := SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100 - // *c_op := 00100000 000aaaaa + // The offset is given by lower 11 bits of the op, and is a + // 12-bit offset, shifted right one bit and sign extended. uint32_t temp = (arm_op & 0x07FF) << 1; if (temp & 0x00000800) temp |= 0xFFFFF000; @@ -58,9 +61,6 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, case ARM_OFF24: { // The offset is given by the lower 24-bits of the op, shifted // left 2 bits, and sign extended. - // arm_op = aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn - // *addr := SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000 - // *c_op := 00110000 aaaaaaaa uint32_t temp = (arm_op & 0x00FFFFFF) << 2; if (temp & 0x02000000) temp |= 0xFC000000; @@ -71,18 +71,6 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, break; } case ARM_OFF25: { - // Encoding T4. - // arm_op = aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn - // where CD is in {01, 10, 11} - // i1 := ~(j1 ^ S) - // i2 := ~(j2 ^ S) - // If CD == 10: - // pppp := (rva % 4 == 0) ? 0100 : 0010 - // Else: - // pppp := 0100 - // *addr := SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp - // *c_op := 0100pppp aaaaaBCD - // TODO(huangs): aaaaa = 11110 and B = 1 always? Investigate and fix. uint32_t temp = 0; temp |= (arm_op & 0x000007FF) << 1; // imm11 temp |= (arm_op & 0x03FF0000) >> 4; // imm10 @@ -90,8 +78,8 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, uint32_t S = (arm_op & (1 << 26)) >> 26; uint32_t j2 = (arm_op & (1 << 11)) >> 11; uint32_t j1 = (arm_op & (1 << 13)) >> 13; - bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; // D - bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; // C + bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; + bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; uint32_t i2 = ~(j2 ^ S) & 1; uint32_t i1 = ~(j1 ^ S) & 1; @@ -103,7 +91,7 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, temp |= 0xFE000000; uint32_t prefetch; if (toARM) { - // Align PC on 4-byte boundary. + // Align PC on 4-byte boundary uint32_t align4byte = (rva % 4) ? 2 : 4; prefetch = align4byte; } else { @@ -113,25 +101,20 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, (*addr) = temp; uint32_t temp2 = 0x4000; - temp2 |= (arm_op & (1 << 12)) >> 12; // .......D - temp2 |= (arm_op & (1 << 14)) >> 13; // ......C. - temp2 |= (arm_op & (1 << 15)) >> 13; // .....B.. - temp2 |= (arm_op & 0xF8000000) >> 24; // aaaaa... + temp2 |= (arm_op & (1 << 12)) >> 12; + temp2 |= (arm_op & (1 << 14)) >> 13; + temp2 |= (arm_op & (1 << 15)) >> 13; + temp2 |= (arm_op & 0xF8000000) >> 24; temp2 |= (prefetch & 0x0000000F) << 8; (*c_op) = static_cast<uint16_t>(temp2); break; } case ARM_OFF21: { - // Encoding T3. - // arm_op = 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn - // *addr := SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100 - // *c_op := 01010000 0000cccc uint32_t temp = 0; temp |= (arm_op & 0x000007FF) << 1; // imm11 temp |= (arm_op & 0x003F0000) >> 4; // imm6 uint32_t S = (arm_op & (1 << 26)) >> 26; - // TODO(huangs): Check with docs: Perhaps j1, j2 should swap? uint32_t j2 = (arm_op & (1 << 11)) >> 11; uint32_t j1 = (arm_op & (1 << 13)) >> 13; @@ -157,31 +140,20 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, uint16_t c_op, uint32_t addr, uint32_t* arm_op) { + // Reverses the process in the compress() method. Takes the + // Courgette op and relative address and reconstructs the original + // ARM or thumb op. switch (type) { case ARM_OFF8: - // addr = SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100 - // c_op = 00010000 aaaaaaaa - // *arm_op := aaaaaaaa Snnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF); break; case ARM_OFF11: - // addr = SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100 - // c_op = 00100000 000aaaaa - // *arm_op := aaaaaSnn nnnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF); break; case ARM_OFF24: - // addr = SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000 - // c_op = 00110000 aaaaaaaa - // *arm_op := aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF); break; case ARM_OFF25: { - // addr = SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp - // c_op = 0100pppp aaaaaBCD - // j1 := ~i1 ^ S - // j2 := ~i2 ^ S - // *arm_op := aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn uint32_t temp = 0; temp |= (c_op & (1 << 0)) << 12; temp |= (c_op & (1 << 1)) << 13; @@ -211,9 +183,6 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, break; } case ARM_OFF21: { - // addr = SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100 - // c_op = 01010000 0000cccc - // *arm_op := 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn uint32_t temp = 0xF0008000; temp |= (c_op & (0x03C00000 >> 22)) << 22; @@ -261,28 +230,24 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget( const uint8_t* op_pointer) { arm_op_ = op_pointer; switch (type_) { - case ARM_OFF8: // Falls through. + case ARM_OFF8: + // Fall through case ARM_OFF11: { RVA relative_target; - CheckBool ret = Compress(type_, - Read16LittleEndian(op_pointer), - rva(), - &c_op_, - &relative_target); + CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(), + &c_op_, &relative_target); set_relative_target(relative_target); return ret; } case ARM_OFF24: { RVA relative_target; - CheckBool ret = Compress(type_, - Read32LittleEndian(op_pointer), - rva(), - &c_op_, - &relative_target); + CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(), + &c_op_, &relative_target); set_relative_target(relative_target); return ret; } - case ARM_OFF25: // Falls through. + case ARM_OFF25: + // Fall through case ARM_OFF21: { // A thumb-2 op is 32 bits stored as two 16-bit words uint32_t pval = (Read16LittleEndian(op_pointer) << 16) | @@ -307,24 +272,26 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction( } DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length) - : DisassemblerElf32(start, length) { + : DisassemblerElf32(start, length) { } -// Convert an ELF relocation struction into an RVA. +// Convert an ELF relocation struction into an RVA CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const { - // The rightmost byte of r_info is the type. + + // The rightmost byte of r_info is the type... elf32_rel_arm_type_values type = - static_cast<elf32_rel_arm_type_values>(rel.r_info & 0xFF); + (elf32_rel_arm_type_values)(unsigned char)rel.r_info; - // The other 3 bytes of r_info are the symbol. + // The other 3 bytes of r_info are the symbol uint32_t symbol = rel.r_info >> 8; - switch (type) { + switch(type) + { case R_ARM_RELATIVE: if (symbol != 0) return false; - // This is a basic ABS32 relocation address. + // This is a basic ABS32 relocation address *result = rel.r_offset; return true; @@ -334,33 +301,32 @@ CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const { } CheckBool DisassemblerElf32ARM::ParseRelocationSection( - const Elf32_Shdr* section_header, - AssemblyProgram* program) { - // This method compresses a contiguous stretch of R_ARM_RELATIVE entries in - // the relocation table with a Courgette relocation table instruction. - // It skips any entries at the beginning that appear in a section that - // Courgette doesn't support, e.g. INIT. - // + const Elf32_Shdr *section_header, + AssemblyProgram* program) { + // This method compresses a contiguous stretch of R_ARM_RELATIVE + // entries in the relocation table with a Courgette relocation table + // instruction. It skips any entries at the beginning that appear + // in a section that Courgette doesn't support, e.g. INIT. // Specifically, the entries should be // (1) In the same relocation table // (2) Are consecutive // (3) Are sorted in memory address order // - // Happily, this is normally the case, but it's not required by spec so we - // check, and just don't do it if we don't match up. + // Happily, this is normally the case, but it's not required by spec + // so we check, and just don't do it if we don't match up. // - // The expectation is that one relocation section will contain all of our - // R_ARM_RELATIVE entries in the expected order followed by assorted other - // entries we can't use special handling for. + // The expectation is that one relocation section will contain + // all of our R_ARM_RELATIVE entries in the expected order followed + // by assorted other entries we can't use special handling for. bool match = true; - // Walk all the bytes in the section, matching relocation table or not. - FileOffset file_offset = section_header->sh_offset; - FileOffset section_end = section_header->sh_offset + section_header->sh_size; + // Walk all the bytes in the section, matching relocation table or not + size_t file_offset = section_header->sh_offset; + size_t section_end = section_header->sh_offset + section_header->sh_size; - const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>( - FileOffsetToPointer(section_header->sh_offset)); + Elf32_Rel *section_relocs_iter = + (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); uint32_t section_relocs_count = section_header->sh_size / section_header->sh_entsize; @@ -371,15 +337,13 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( if (!abs32_locations_.empty()) { std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin(); - for (uint32_t i = 0; i < section_relocs_count; ++i) { + for (uint32_t i = 0; i < section_relocs_count; i++) { if (section_relocs_iter->r_offset == *reloc_iter) break; - if (!ParseSimpleRegion(file_offset, - file_offset + sizeof(Elf32_Rel), - program)) { + if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel), + program)) return false; - } file_offset += sizeof(Elf32_Rel); ++section_relocs_iter; @@ -387,12 +351,11 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( while (match && (reloc_iter != abs32_locations_.end())) { if (section_relocs_iter->r_info != R_ARM_RELATIVE || - section_relocs_iter->r_offset != *reloc_iter) { + section_relocs_iter->r_offset != *reloc_iter) match = false; - } - ++section_relocs_iter; - ++reloc_iter; + section_relocs_iter++; + reloc_iter++; file_offset += sizeof(Elf32_Rel); } @@ -406,119 +369,118 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection( return ParseSimpleRegion(file_offset, section_end, program); } -// TODO(huangs): Detect and avoid overlap with abs32 addresses. CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection( const Elf32_Shdr* section_header) { - FileOffset start_file_offset = section_header->sh_offset; - FileOffset end_file_offset = start_file_offset + section_header->sh_size; + uint32_t start_file_offset = section_header->sh_offset; + uint32_t end_file_offset = start_file_offset + section_header->sh_size; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); // Quick way to convert from Pointer to RVA within a single Section is to - // subtract |pointer_to_rva|. + // subtract 'pointer_to_rva'. const uint8_t* const adjust_pointer_to_rva = start_pointer - section_header->sh_addr; // Find the rel32 relocations. const uint8_t* p = start_pointer; - bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it + bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it while (p < end_pointer) { // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - scoped_ptr<TypedRVAARM> rel32_rva; + + TypedRVAARM* rel32_rva = NULL; RVA target_rva = 0; bool found = false; // 16-bit thumb ops - if (!found && p + 3 <= end_pointer) { + if (!found && (p + 3) <= end_pointer) { uint16_t pval = Read16LittleEndian(p); if ((pval & 0xF000) == 0xD000) { RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva); - rel32_rva.reset(new TypedRVAARM(ARM_OFF8, rva)); - if (!rel32_rva->ComputeRelativeTarget(p)) + rel32_rva = new TypedRVAARM(ARM_OFF8, rva); + if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { return false; - + } target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } else if ((pval & 0xF800) == 0xE000) { RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva); - rel32_rva.reset(new TypedRVAARM(ARM_OFF11, rva)); - if (!rel32_rva->ComputeRelativeTarget(p)) + rel32_rva = new TypedRVAARM(ARM_OFF11, rva); + if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { return false; - + } target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } - // thumb-2 ops comprised of two 16-bit words. - if (!found && p + 5 <= end_pointer) { + // thumb-2 ops comprised of two 16-bit words + if (!found && (p + 5) <= end_pointer) { // This is really two 16-bit words, not one 32-bit word. uint32_t pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2); if ((pval & 0xF8008000) == 0xF0008000) { // Covers thumb-2's 32-bit conditional/unconditional branches - if ((pval & (1 << 14)) || (pval & (1 << 12))) { + + if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) { // A branch, with link, or with link and exchange. RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva); - rel32_rva.reset(new TypedRVAARM(ARM_OFF25, rva)); - if (!rel32_rva->ComputeRelativeTarget(p)) + rel32_rva = new TypedRVAARM(ARM_OFF25, rva); + if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { return false; - + } target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; - } else { // TODO(paulgazz) make sure cond is not 111 // A conditional branch instruction RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva); - rel32_rva.reset(new TypedRVAARM(ARM_OFF21, rva)); - if (!rel32_rva->ComputeRelativeTarget(p)) + rel32_rva = new TypedRVAARM(ARM_OFF21, rva); + if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { return false; - + } target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } } - // 32-bit ARM ops. + // 32-bit ARM ops if (!found && on_32bit && (p + 5) <= end_pointer) { uint32_t pval = Read32LittleEndian(p); if ((pval & 0x0E000000) == 0x0A000000) { // Covers both 0x0A 0x0B ARM relative branches RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva); - rel32_rva.reset(new TypedRVAARM(ARM_OFF24, rva)); - if (!rel32_rva->ComputeRelativeTarget(p)) + rel32_rva = new TypedRVAARM(ARM_OFF24, rva); + if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) { return false; - + } target_rva = rel32_rva->rva() + rel32_rva->relative_target(); found = true; } } - if (found && IsValidTargetRVA(target_rva)) { - uint16_t op_size = rel32_rva->op_size(); - rel32_locations_.push_back(rel32_rva.release()); + if (found && IsValidRVA(target_rva)) { + rel32_locations_.push_back(rel32_rva); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; #endif - p += op_size; + p += rel32_rva->op_size(); - // A tricky way to update the on_32bit flag. Here is the truth table: + // A tricky way to update the on_32bit flag. Here is the truth table: // on_32bit | on_32bit size is 4 // ---------+--------------------- // 1 | 0 0 // 0 | 0 1 // 0 | 1 0 // 1 | 1 1 - on_32bit = (~(on_32bit ^ (op_size == 4))) != 0; + on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0; } else { // Move 2 bytes at a time, but track 32-bit boundaries p += 2; diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h index 5dc6897..17ebb25 100644 --- a/courgette/disassembler_elf_32_arm.h +++ b/courgette/disassembler_elf_32_arm.h @@ -8,10 +8,9 @@ #include <stddef.h> #include <stdint.h> -#include <map> - #include "base/macros.h" #include "courgette/disassembler_elf_32.h" +#include "courgette/memory_allocator.h" #include "courgette/types_elf.h" namespace courgette { @@ -31,60 +30,51 @@ class DisassemblerElf32ARM : public DisassemblerElf32 { class TypedRVAARM : public TypedRVA { public: TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { } - ~TypedRVAARM() override { } - - // TypedRVA interfaces. - CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override; - CheckBool EmitInstruction(AssemblyProgram* program, - RVA target_rva) override; - uint16_t op_size() const override; uint16_t c_op() const { return c_op_; } + virtual CheckBool ComputeRelativeTarget(const uint8_t* op_pointer); + + virtual CheckBool EmitInstruction(AssemblyProgram* program, + RVA target_rva); + + virtual uint16_t op_size() const; + private: ARM_RVA type_; - uint16_t c_op_; // Set by ComputeRelativeTarget(). + + uint16_t c_op_; // set by ComputeRelativeTarget() const uint8_t* arm_op_; }; - DisassemblerElf32ARM(const void* start, size_t length); + explicit DisassemblerElf32ARM(const void* start, size_t length); - ~DisassemblerElf32ARM() override { } + virtual ExecutableType kind() { return EXE_ELF_32_ARM; } - // DisassemblerElf32 interfaces. - ExecutableType kind() const override { return EXE_ELF_32_ARM; } - e_machine_values ElfEM() const override { return EM_ARM; } + virtual e_machine_values ElfEM() { return EM_ARM; } - // Takes an ARM or thumb opcode |arm_op| of specified |type| and located at - // |rva|, extracts the instruction-relative target RVA into |*addr| and - // encodes the corresponding Courgette opcode as |*c_op|. - // - // Details on ARM opcodes, and target RVA extraction are taken from - // "ARM Architecture Reference Manual", section A4.1.5 and - // "Thumb-2 supplement", section 4.6.12. - // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes. static CheckBool Compress(ARM_RVA type, uint32_t arm_op, RVA rva, uint16_t* c_op /* out */, uint32_t* addr /* out */); - // Inverse for Compress(). Takes Courgette op |c_op| and relative address - // |addr| to reconstruct the original ARM or thumb op |*arm_op|. static CheckBool Decompress(ARM_RVA type, uint16_t c_op, uint32_t addr, uint32_t* arm_op /* out */); protected: - // DisassemblerElf32 interfaces. - CheckBool RelToRVA(Elf32_Rel rel, - RVA* result) const override WARN_UNUSED_RESULT; - CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, - AssemblyProgram* program) - override WARN_UNUSED_RESULT; - CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) - override WARN_UNUSED_RESULT; + + virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) + const WARN_UNUSED_RESULT; + + virtual CheckBool ParseRelocationSection( + const Elf32_Shdr *section_header, + AssemblyProgram* program) WARN_UNUSED_RESULT; + + virtual CheckBool ParseRel32RelocsFromSection( + const Elf32_Shdr* section) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS std::map<RVA, int> rel32_target_rvas_; diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc index 45f7cf6..98084c1 100644 --- a/courgette/disassembler_elf_32_x86.cc +++ b/courgette/disassembler_elf_32_x86.cc @@ -4,45 +4,37 @@ #include "courgette/disassembler_elf_32_x86.h" +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <string> #include <vector> #include "base/logging.h" -#include "base/memory/scoped_ptr.h" + #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/encoded_program.h" namespace courgette { -CheckBool DisassemblerElf32X86::TypedRVAX86::ComputeRelativeTarget( - const uint8_t* op_pointer) { - set_relative_target(Read32LittleEndian(op_pointer) + 4); - return true; -} - -CheckBool DisassemblerElf32X86::TypedRVAX86::EmitInstruction( - AssemblyProgram* program, - RVA target_rva) { - return program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); -} - -uint16_t DisassemblerElf32X86::TypedRVAX86::op_size() const { - return 4; -} - DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length) - : DisassemblerElf32(start, length) { + : DisassemblerElf32(start, length) { } -// Convert an ELF relocation struction into an RVA. +// Convert an ELF relocation struction into an RVA CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { - // The rightmost byte of r_info is the type. + + // The rightmost byte of r_info is the type... elf32_rel_386_type_values type = - static_cast<elf32_rel_386_type_values>(rel.r_info & 0xFF); + (elf32_rel_386_type_values)(unsigned char)rel.r_info; - // The other 3 bytes of r_info are the symbol. + // The other 3 bytes of r_info are the symbol uint32_t symbol = rel.r_info >> 8; - switch (type) { + switch(type) + { case R_386_NONE: case R_386_32: case R_386_PC32: @@ -57,7 +49,7 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { if (symbol != 0) return false; - // This is a basic ABS32 relocation address. + // This is a basic ABS32 relocation address *result = rel.r_offset; return true; @@ -71,31 +63,32 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const { } CheckBool DisassemblerElf32X86::ParseRelocationSection( - const Elf32_Shdr* section_header, - AssemblyProgram* program) { - // We can reproduce the R_386_RELATIVE entries in one of the relocation table - // based on other information in the patch, given these conditions: + const Elf32_Shdr *section_header, + AssemblyProgram* program) { + // We can reproduce the R_386_RELATIVE entries in one of the relocation + // table based on other information in the patch, given these + // conditions.... // // All R_386_RELATIVE entries are: // 1) In the same relocation table // 2) Are consecutive // 3) Are sorted in memory address order // - // Happily, this is normally the case, but it's not required by spec, so we - // check, and just don't do it if we don't match up. + // Happily, this is normally the case, but it's not required by spec + // so we check, and just don't do it if we don't match up. - // The expectation is that one relocation section will contain all of our - // R_386_RELATIVE entries in the expected order followed by assorted other - // entries we can't use special handling for. + // The expectation is that one relocation section will contain + // all of our R_386_RELATIVE entries in the expected order followed + // by assorted other entries we can't use special handling for. bool match = true; - // Walk all the bytes in the section, matching relocation table or not. - FileOffset file_offset = section_header->sh_offset; - FileOffset section_end = file_offset + section_header->sh_size; + // Walk all the bytes in the section, matching relocation table or not + size_t file_offset = section_header->sh_offset; + size_t section_end = section_header->sh_offset + section_header->sh_size; - const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>( - FileOffsetToPointer(section_header->sh_offset)); + Elf32_Rel *section_relocs_iter = + (Elf32_Rel *)OffsetToPointer(section_header->sh_offset); uint32_t section_relocs_count = section_header->sh_size / section_header->sh_entsize; @@ -108,17 +101,16 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection( std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin(); - while (match && (reloc_iter != abs32_locations_.end())) { + while (match && (reloc_iter != abs32_locations_.end())) { if (section_relocs_iter->r_info != R_386_RELATIVE || - section_relocs_iter->r_offset != *reloc_iter) { + section_relocs_iter->r_offset != *reloc_iter) match = false; - } - ++section_relocs_iter; - ++reloc_iter; + section_relocs_iter++; + reloc_iter++; } if (match) { - // Skip over relocation tables. + // Skip over relocation tables if (!program->EmitElfRelocationInstruction()) return false; file_offset += sizeof(Elf32_Rel) * abs32_locations_.size(); @@ -127,27 +119,28 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection( return ParseSimpleRegion(file_offset, section_end, program); } -// TODO(huangs): Detect and avoid overlap with abs32 addresses. CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( const Elf32_Shdr* section_header) { - FileOffset start_file_offset = section_header->sh_offset; - FileOffset end_file_offset = start_file_offset + section_header->sh_size; + uint32_t start_file_offset = section_header->sh_offset; + uint32_t end_file_offset = start_file_offset + section_header->sh_size; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); // Quick way to convert from Pointer to RVA within a single Section is to - // subtract |pointer_to_rva|. + // subtract 'pointer_to_rva'. const uint8_t* const adjust_pointer_to_rva = start_pointer - section_header->sh_addr; // Find the rel32 relocations. const uint8_t* p = start_pointer; while (p < end_pointer) { + //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = nullptr; + const uint8_t* rel32 = NULL; if (p + 5 <= end_pointer) { if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 @@ -155,26 +148,32 @@ CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection( } } if (p + 6 <= end_pointer) { - if (*p == 0x0F && (p[1] & 0xF0) == 0x80) { // Jcc long form + if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely rel32 = p + 2; } } if (rel32) { RVA rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); - scoped_ptr<TypedRVAX86> rel32_rva(new TypedRVAX86(rva)); + TypedRVAX86* rel32_rva = new TypedRVAX86(rva); - if (!rel32_rva->ComputeRelativeTarget(rel32)) + if (!rel32_rva->ComputeRelativeTarget(rel32)) { + delete rel32_rva; return false; + } RVA target_rva = rel32_rva->rva() + rel32_rva->relative_target(); - if (IsValidTargetRVA(target_rva)) { - rel32_locations_.push_back(rel32_rva.release()); + // To be valid, rel32 target must be within image, and within this + // section. + if (IsValidRVA(target_rva)) { + rel32_locations_.push_back(rel32_rva); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; #endif p = rel32 + 4; continue; + } else { + delete rel32_rva; } } p += 1; diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h index 63be755..5c87d4c 100644 --- a/courgette/disassembler_elf_32_x86.h +++ b/courgette/disassembler_elf_32_x86.h @@ -8,10 +8,9 @@ #include <stddef.h> #include <stdint.h> -#include <map> - #include "base/macros.h" #include "courgette/disassembler_elf_32.h" +#include "courgette/memory_allocator.h" #include "courgette/types_elf.h" namespace courgette { @@ -22,33 +21,38 @@ class DisassemblerElf32X86 : public DisassemblerElf32 { public: class TypedRVAX86 : public TypedRVA { public: - explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { } - ~TypedRVAX86() override { } + explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { + } + + CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override { + set_relative_target(Read32LittleEndian(op_pointer) + 4); + return true; + } - // TypedRVA interfaces. - CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override; CheckBool EmitInstruction(AssemblyProgram* program, - RVA target_rva) override; - uint16_t op_size() const override; + RVA target_rva) override { + return program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); + } + + uint16_t op_size() const override { return 4; } }; - DisassemblerElf32X86(const void* start, size_t length); + explicit DisassemblerElf32X86(const void* start, size_t length); - ~DisassemblerElf32X86() override { } + virtual ExecutableType kind() { return EXE_ELF_32_X86; } - // DisassemblerElf32 interfaces. - ExecutableType kind() const override { return EXE_ELF_32_X86; } - e_machine_values ElfEM() const override { return EM_386; } + virtual e_machine_values ElfEM() { return EM_386; } protected: - // DisassemblerElf32 interfaces. - CheckBool RelToRVA(Elf32_Rel rel, - RVA* result) const override WARN_UNUSED_RESULT; - CheckBool ParseRelocationSection(const Elf32_Shdr* section_header, - AssemblyProgram* program) - override WARN_UNUSED_RESULT; - CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section) - override WARN_UNUSED_RESULT; + virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result) + const WARN_UNUSED_RESULT; + + virtual CheckBool ParseRelocationSection( + const Elf32_Shdr *section_header, + AssemblyProgram* program) WARN_UNUSED_RESULT; + + virtual CheckBool ParseRel32RelocsFromSection( + const Elf32_Shdr* section) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS std::map<RVA, int> rel32_target_rvas_; diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc index c15b8df..3ce6a63 100644 --- a/courgette/disassembler_elf_32_x86_unittest.cc +++ b/courgette/disassembler_elf_32_x86_unittest.cc @@ -2,25 +2,16 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "courgette/disassembler_elf_32_x86.h" - #include <stddef.h> #include <stdint.h> -#include <algorithm> -#include <string> - -#include "base/memory/scoped_ptr.h" #include "courgette/assembly_program.h" #include "courgette/base_test_unittest.h" -#include "courgette/image_utils.h" - -namespace courgette { - -namespace { +#include "courgette/disassembler_elf_32_x86.h" class DisassemblerElf32X86Test : public BaseTest { public: + void TestExe(const char* file_name, size_t expected_abs_count, size_t expected_rel_count) const; @@ -29,11 +20,10 @@ class DisassemblerElf32X86Test : public BaseTest { void DisassemblerElf32X86Test::TestExe(const char* file_name, size_t expected_abs_count, size_t expected_rel_count) const { - using TypedRVA = DisassemblerElf32::TypedRVA; std::string file1 = FileContents(file_name); - scoped_ptr<DisassemblerElf32X86> disassembler( - new DisassemblerElf32X86(file1.c_str(), file1.length())); + scoped_ptr<courgette::DisassemblerElf32X86> disassembler( + new courgette::DisassemblerElf32X86(file1.c_str(), file1.length())); bool can_parse_header = disassembler->ParseHeader(); EXPECT_TRUE(can_parse_header); @@ -43,7 +33,7 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name, // real file, since trailing debug info is not included EXPECT_EQ(file1.length(), disassembler->length()); - const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); + const uint8_t* offset_p = disassembler->OffsetToPointer(0); EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), reinterpret_cast<const void*>(offset_p)); EXPECT_EQ(0x7F, offset_p[0]); @@ -51,45 +41,46 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name, EXPECT_EQ('L', offset_p[2]); EXPECT_EQ('F', offset_p[3]); - scoped_ptr<AssemblyProgram> program(new AssemblyProgram(EXE_ELF_32_X86)); - - EXPECT_TRUE(disassembler->Disassemble(program.get())); - - const std::vector<RVA>& abs32_list = disassembler->Abs32Locations(); - - // Flatten the list typed rel32 to a list of rel32 RVAs. - std::vector<RVA> rel32_list; - rel32_list.reserve(disassembler->Rel32Locations().size()); - for (TypedRVA* typed_rel32 : disassembler->Rel32Locations()) - rel32_list.push_back(typed_rel32->rva()); - - EXPECT_EQ(expected_abs_count, abs32_list.size()); - EXPECT_EQ(expected_rel_count, rel32_list.size()); - - EXPECT_TRUE(std::is_sorted(abs32_list.begin(), abs32_list.end())); - EXPECT_TRUE(std::is_sorted(rel32_list.begin(), rel32_list.end())); + courgette::AssemblyProgram* program = + new courgette::AssemblyProgram(courgette::EXE_ELF_32_X86); + + EXPECT_TRUE(disassembler->Disassemble(program)); + + EXPECT_EQ(disassembler->Abs32Locations().size(), expected_abs_count); + EXPECT_EQ(disassembler->Rel32Locations().size(), expected_rel_count); + + // Prove that none of the rel32 RVAs overlap with abs32 RVAs + std::set<courgette::RVA> abs(disassembler->Abs32Locations().begin(), + disassembler->Abs32Locations().end()); + std::set<courgette::DisassemblerElf32::TypedRVA*> + rel(disassembler->Rel32Locations().begin(), + disassembler->Rel32Locations().end()); + for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator + rel32 = disassembler->Rel32Locations().begin(); + rel32 != disassembler->Rel32Locations().end(); + rel32++) { + EXPECT_TRUE(abs.find((*rel32)->rva()) == abs.end()); + } - // Verify that rel32 RVAs do not overlap with abs32 RVAs. - // TODO(huangs): Fix this to account for RVA's 4-byte width. - bool found_match = false; - std::vector<RVA>::const_iterator abs32_it = abs32_list.begin(); - std::vector<RVA>::const_iterator rel32_it = rel32_list.begin(); - while (abs32_it != abs32_list.end() && rel32_it != rel32_list.end()) { - if (*abs32_it < *rel32_it) { - ++abs32_it; - } else if (*abs32_it > *rel32_it) { - ++rel32_it; - } else { - found_match = true; + for (std::vector<courgette::RVA>::iterator abs32 = + disassembler->Abs32Locations().begin(); + abs32 != disassembler->Abs32Locations().end(); + abs32++) { + bool found = false; + for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator + rel32 = disassembler->Rel32Locations().begin(); + rel32 != disassembler->Rel32Locations().end(); + rel32++) { + if (*abs32 == (*rel32)->rva()) { + found = true; + break; + } } + EXPECT_TRUE(!found); } - EXPECT_FALSE(found_match); + delete program; } -} // namespace - TEST_F(DisassemblerElf32X86Test, All) { TestExe("elf-32-1", 200, 3442); } - -} // namespace courgette diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc index 819b7f2..74b0fe4 100644 --- a/courgette/disassembler_win32_x64.cc +++ b/courgette/disassembler_win32_x64.cc @@ -8,73 +8,37 @@ #include <stdint.h> #include <algorithm> -#include <iostream> +#include <string> +#include <vector> #include "base/logging.h" #include "base/numerics/safe_conversions.h" + #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/encoded_program.h" namespace courgette { DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) - : Disassembler(start, length), - incomplete_disassembly_(false), - is_PE32_plus_(false), - optional_header_(nullptr), - size_of_optional_header_(0), - offset_of_data_directories_(0), - machine_type_(0), - number_of_sections_(0), - sections_(nullptr), - has_text_section_(false), - size_of_code_(0), - size_of_initialized_data_(0), - size_of_uninitialized_data_(0), - base_of_code_(0), - base_of_data_(0), - image_base_(0), - size_of_image_(0), - number_of_data_directories_(0) { -} - -FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { - const Section* section = RVAToSection(rva); - if (section != nullptr) { - FileOffset offset_in_section = rva - section->virtual_address; - // Need this extra check, since an |rva| may be valid for a section, but is - // non-existent in an image (e.g. uninit data). - if (offset_in_section >= section->size_of_raw_data) - return kNoFileOffset; - - return static_cast<FileOffset>(section->file_offset_of_raw_data + - offset_in_section); - } - - // Small RVA values point into the file header in the loaded image. - // RVA 0 is the module load address which Windows uses as the module handle. - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the - // DOS header. - if (rva == 0 || rva == 2) - return static_cast<FileOffset>(rva); - - NOTREACHED(); - return kNoFileOffset; -} - -RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { - for (int i = 0; i < number_of_sections_; ++i) { - const Section* section = §ions_[i]; - if (file_offset >= section->file_offset_of_raw_data) { - FileOffset offset_in_section = - file_offset - section->file_offset_of_raw_data; - if (offset_in_section < section->size_of_raw_data) - return static_cast<RVA>(section->virtual_address + offset_in_section); - } - } - - NOTREACHED(); - return kNoRVA; + : Disassembler(start, length), + incomplete_disassembly_(false), + is_PE32_plus_(false), + optional_header_(NULL), + size_of_optional_header_(0), + offset_of_data_directories_(0), + machine_type_(0), + number_of_sections_(0), + sections_(NULL), + has_text_section_(false), + size_of_code_(0), + size_of_initialized_data_(0), + size_of_uninitialized_data_(0), + base_of_code_(0), + base_of_data_(0), + image_base_(0), + size_of_image_(0), + number_of_data_directories_(0) { } // ParseHeader attempts to match up the buffer with the Windows data @@ -93,19 +57,18 @@ bool DisassemblerWin32X64::ParseHeader() { return Bad("Not MZ"); // offset from DOS header to PE header is stored in DOS header. - FileOffset file_offset = static_cast<FileOffset>( - ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); + uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); - if (file_offset >= length()) + if (offset >= length()) return Bad("Bad offset to PE header"); - const uint8_t* const pe_header = FileOffsetToPointer(file_offset); + const uint8_t* const pe_header = OffsetToPointer(offset); const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; if (pe_header <= start() || pe_header >= end() - kMinPEHeaderSize) - return Bad("Bad file offset to PE header"); + return Bad("Bad offset to PE header"); - if (file_offset % 8 != 0) + if (offset % 8 != 0) return Bad("Misaligned PE header"); // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. @@ -206,7 +169,7 @@ bool DisassemblerWin32X64::ParseHeader() { size_of_optional_header_); size_t detected_length = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; // TODO(sra): consider using the 'characteristics' field of the section @@ -304,7 +267,7 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) { RVA rva = page_rva + offset; // TODO(sebmarchand): Skip the relocs that live outside of the image. See // the version of this function in disassembler_win32_x86.cc. - if (type == 10) { // IMAGE_REL_BASED_DIR64 + if (type == 10) { // IMAGE_REL_BASED_DIR64 relocs->push_back(rva); } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE // Ignore, used as padding. @@ -324,19 +287,48 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) { } const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; i++) { const Section* section = §ions_[i]; - if (rva >= section->virtual_address) { - FileOffset offset_in_section = rva - section->virtual_address; - if (offset_in_section < section->virtual_size) - return section; + uint32_t offset = rva - section->virtual_address; + if (offset < section->virtual_size) { + return section; + } + } + return NULL; +} + +int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { + const Section* section = RVAToSection(rva); + if (section) { + uint32_t offset = rva - section->virtual_address; + if (offset < section->size_of_raw_data) { + return section->file_offset_of_raw_data + offset; + } else { + return kNoOffset; // In section but not in file (e.g. uninit data). } } - return nullptr; + + // Small RVA values point into the file header in the loaded image. + // RVA 0 is the module load address which Windows uses as the module handle. + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the + // DOS header. + if (rva == 0 || rva == 2) + return rva; + + NOTREACHED(); + return kNoOffset; +} + +const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const { + int file_offset = RVAToFileOffset(rva); + if (file_offset == kNoOffset) + return NULL; + else + return OffsetToPointer(file_offset); } std::string DisassemblerWin32X64::SectionName(const Section* section) { - if (section == nullptr) + if (section == NULL) return "<none>"; char name[9]; memcpy(name, section->name, 8); @@ -346,25 +338,24 @@ std::string DisassemblerWin32X64::SectionName(const Section* section) { CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - FileOffset file_offset = 0; + uint32_t file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == nullptr) { - // No more sections. There should not be extra stuff following last + if (section == NULL) { + // No more sections. There should not be extra stuff following last // section. // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); break; } if (file_offset < section->file_offset_of_raw_data) { - FileOffset section_start_offset = section->file_offset_of_raw_data; - if (!ParseNonSectionFileRegion(file_offset, section_start_offset, - program)) { + uint32_t section_start_offset = section->file_offset_of_raw_data; + if(!ParseNonSectionFileRegion(file_offset, section_start_offset, + program)) return false; - } file_offset = section_start_offset; } - FileOffset end = file_offset + section->size_of_raw_data; + uint32_t end = file_offset + section->size_of_raw_data; if (!ParseFileRegion(section, file_offset, end, program)) return false; file_offset = end; @@ -384,7 +375,7 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() { return false; #if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { + for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); @@ -395,10 +386,10 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() { } void DisassemblerWin32X64::ParseRel32RelocsFromSections() { - FileOffset file_offset = 0; + uint32_t file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == nullptr) + if (section == NULL) break; if (file_offset < section->file_offset_of_raw_data) file_offset = section->file_offset_of_raw_data; @@ -420,11 +411,11 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() { std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); while (abs32_iter != abs32_target_rvas_.end() && rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) { + if (abs32_iter->first < rel32_iter->first) ++abs32_iter; - } else if (rel32_iter->first < abs32_iter->first) { + else if (rel32_iter->first < abs32_iter->first) ++rel32_iter; - } else { + else { ++common; ++abs32_iter; ++rel32_iter; @@ -440,18 +431,18 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { if (!isCode) return; - FileOffset start_file_offset = section->file_offset_of_raw_data; - FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; + uint32_t start_file_offset = section->file_offset_of_raw_data; + uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; // Quick way to convert from Pointer to RVA within a single Section is to - // subtract |pointer_to_rva|. + // subtract 'pointer_to_rva'. const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); @@ -468,10 +459,13 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { } } + //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + // ++abs32_pos; + // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = nullptr; + const uint8_t* rel32 = NULL; bool is_rip_relative = false; if (p + 5 <= end_pointer) { @@ -522,7 +516,7 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); // To be valid, rel32 target must be within image, and within this // section. - if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA. + if (IsValidRVA(target_rva) && (is_rip_relative || (start_rva <= target_rva && target_rva < end_rva))) { rel32_locations_.push_back(rel32_rva); @@ -538,14 +532,14 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { } CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) { if (incomplete_disassembly_) return true; if (end_file_offset > start_file_offset) { - if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), + if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), end_file_offset - start_file_offset)) { return false; } @@ -555,13 +549,13 @@ CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( } CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) { RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; @@ -670,7 +664,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, size_t count = p->second.size(); std::cout << std::dec << p->first << ": " << count; if (count <= 2) { - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < count; ++i) std::cout << " " << DescribeRVA(p->second[i]); } std::cout << std::endl; @@ -682,6 +676,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, } #endif // COURGETTE_HISTOGRAM_TARGETS + // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except // that during development I'm finding I need to call it when compiled in // Release mode. Hence: @@ -700,12 +695,12 @@ std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { } const Section* DisassemblerWin32X64::FindNextSection( - FileOffset file_offset) const { + uint32_t fileOffset) const { const Section* best = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; i++) { const Section* section = §ions_[i]; if (section->size_of_raw_data > 0) { // i.e. has data in file. - if (file_offset <= section->file_offset_of_raw_data) { + if (fileOffset <= section->file_offset_of_raw_data) { if (best == 0 || section->file_offset_of_raw_data < best->file_offset_of_raw_data) { best = section; @@ -716,15 +711,26 @@ const Section* DisassemblerWin32X64::FindNextSection( return best; } +RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const { + for (int i = 0; i < number_of_sections_; i++) { + const Section* section = §ions_[i]; + uint32_t offset = file_offset - section->file_offset_of_raw_data; + if (offset < section->size_of_raw_data) { + return section->virtual_address + offset; + } + } + return 0; +} + bool DisassemblerWin32X64::ReadDataDirectory( int index, ImageDataDirectory* directory) { if (index < number_of_data_directories_) { - FileOffset file_offset = index * 8 + offset_of_data_directories_; - if (file_offset >= size_of_optional_header_) + size_t offset = index * 8 + offset_of_data_directories_; + if (offset >= size_of_optional_header_) return Bad("number of data directories inconsistent"); - const uint8_t* data_directory = optional_header_ + file_offset; + const uint8_t* data_directory = optional_header_ + offset; if (data_directory < start() || data_directory + 8 >= end()) return Bad("data directory outside image"); diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h index 20cfc7e..23aee66 100644 --- a/courgette/disassembler_win32_x64.h +++ b/courgette/disassembler_win32_x64.h @@ -8,16 +8,15 @@ #include <stddef.h> #include <stdint.h> -#include <map> -#include <string> -#include <vector> - #include "base/macros.h" #include "courgette/disassembler.h" -#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_win_pe.h" +#ifdef COURGETTE_HISTOGRAM_TARGETS +#include <map> +#endif + namespace courgette { class AssemblyProgram; @@ -26,14 +25,19 @@ class DisassemblerWin32X64 : public Disassembler { public: explicit DisassemblerWin32X64(const void* start, size_t length); - // Disassembler interfaces. - RVA FileOffsetToRVA(FileOffset file_offset) const override; - FileOffset RVAToFileOffset(RVA rva) const override; - ExecutableType kind() const override { return EXE_WIN_32_X64; } - bool ParseHeader() override; - bool Disassemble(AssemblyProgram* target) override; + virtual ExecutableType kind() { return EXE_WIN_32_X64; } + + // Returns 'true' if the buffer appears to point to a Windows 32 bit + // executable, 'false' otherwise. If ParseHeader() succeeds, other member + // functions may be called. + virtual bool ParseHeader(); + virtual bool Disassemble(AssemblyProgram* target); + + // // Exposed for test purposes + // + bool has_text_section() const { return has_text_section_; } uint32_t size_of_code() const { return size_of_code_; } bool is_32bit() const { return !is_PE32_plus_; } @@ -43,9 +47,17 @@ class DisassemblerWin32X64 : public Disassembler { // that are listed in the base relocation table. bool ParseRelocs(std::vector<RVA> *addresses); - // Returns Section containing the relative virtual address, or null if none. + // Returns Section containing the relative virtual address, or NULL if none. const Section* RVAToSection(RVA rva) const; + static const int kNoOffset = -1; + // Returns kNoOffset if there is no file offset corresponding to 'rva'. + int RVAToFileOffset(RVA rva) const; + + // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL + // is returned if there is no file offset corresponding to 'rva'. + const uint8_t* RVAToPointer(RVA rva) const; + static std::string SectionName(const Section* section); protected: @@ -54,46 +66,62 @@ class DisassemblerWin32X64 : public Disassembler { void ParseRel32RelocsFromSections(); void ParseRel32RelocsFromSection(const Section* section); - CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset, - FileOffset end_file_offset, + CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseFileRegion(const Section* section, - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS void HistogramTargets(const char* kind, const std::map<RVA, int>& map); #endif - // Most addresses are represented as 32-bit RVAs. The one address we can't - // do this with is the image base address. + // Most addresses are represented as 32-bit RVAs. The one address we can't + // do this with is the image base address. 'image_base' is valid only for + // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. uint64_t image_base() const { return image_base_; } const ImageDataDirectory& base_relocation_table() const { return base_relocation_table_; } - // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. + // Subsumes rva != kUnassignedRVA. + bool IsValidRVA(RVA rva) const { return rva < size_of_image_; } + + // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. std::string DescribeRVA(RVA rva) const; - // Finds the first section at file_offset or above. Does not return sections + // Finds the first section at file_offset or above. Does not return sections // that have no raw bytes in the file. - const Section* FindNextSection(FileOffset file_offset) const; + const Section* FindNextSection(uint32_t file_offset) const; + + // There are 2 'coordinate systems' for reasoning about executables. + // FileOffset - the the offset within a single .EXE or .DLL *file*. + // RVA - relative virtual address (offset within *loaded image*) + // FileOffsetToRVA and RVAToFileOffset convert between these representations. + + RVA FileOffsetToRVA(uint32_t offset) const; private: + bool ReadDataDirectory(int index, ImageDataDirectory* dir); - bool incomplete_disassembly_; // true if can omit "uninteresting" bits. + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits std::vector<RVA> abs32_locations_; std::vector<RVA> rel32_locations_; // - // Information that is valid after ParseHeader() succeeds. + // Fields that are always valid. // - bool is_PE32_plus_; // PE32_plus is for 64 bit executables. + + // + // Information that is valid after successful ParseHeader. + // + bool is_PE32_plus_; // PE32_plus is for 64 bit executables. // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. const uint8_t* optional_header_; @@ -130,9 +158,9 @@ class DisassemblerWin32X64 : public Disassembler { std::map<RVA, int> rel32_target_rvas_; #endif + DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X64); }; } // namespace courgette - #endif // COURGETTE_DISASSEMBLER_WIN32_X64_H_ diff --git a/courgette/disassembler_win32_x64_unittest.cc b/courgette/disassembler_win32_x64_unittest.cc index 1121c10..8f732b3 100644 --- a/courgette/disassembler_win32_x64_unittest.cc +++ b/courgette/disassembler_win32_x64_unittest.cc @@ -6,9 +6,6 @@ #include <stdint.h> -#include <string> -#include <vector> - #include "base/memory/scoped_ptr.h" #include "base/stl_util.h" #include "courgette/base_test_unittest.h" @@ -40,16 +37,16 @@ void DisassemblerWin32X64Test::TestExe() const { disassembler->RVAToSection(0x00401234 - 0x00400000)), std::string(".text")); - EXPECT_EQ(0U, disassembler->RVAToFileOffset(0)); - EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096)); - EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000)); + EXPECT_EQ(0, disassembler->RVAToFileOffset(0)); + EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096)); + EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000)); std::vector<courgette::RVA> relocs; bool can_parse_relocs = disassembler->ParseRelocs(&relocs); EXPECT_TRUE(can_parse_relocs); EXPECT_TRUE(base::STLIsSorted(relocs)); - const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); + const uint8_t* offset_p = disassembler->OffsetToPointer(0); EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), reinterpret_cast<const void*>(offset_p)); EXPECT_EQ('M', offset_p[0]); diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc index 07bdfbc..aed26c7 100644 --- a/courgette/disassembler_win32_x86.cc +++ b/courgette/disassembler_win32_x86.cc @@ -8,73 +8,37 @@ #include <stdint.h> #include <algorithm> -#include <iostream> +#include <string> +#include <vector> #include "base/logging.h" + #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/encoded_program.h" #include "courgette/rel32_finder_win32_x86.h" namespace courgette { DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length) - : Disassembler(start, length), - incomplete_disassembly_(false), - is_PE32_plus_(false), - optional_header_(nullptr), - size_of_optional_header_(0), - offset_of_data_directories_(0), - machine_type_(0), - number_of_sections_(0), - sections_(nullptr), - has_text_section_(false), - size_of_code_(0), - size_of_initialized_data_(0), - size_of_uninitialized_data_(0), - base_of_code_(0), - base_of_data_(0), - image_base_(0), - size_of_image_(0), - number_of_data_directories_(0) { -} - -FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { - const Section* section = RVAToSection(rva); - if (section != nullptr) { - FileOffset offset_in_section = rva - section->virtual_address; - // Need this extra check, since an |rva| may be valid for a section, but is - // non-existent in an image (e.g. uninit data). - if (offset_in_section >= section->size_of_raw_data) - return kNoFileOffset; - - return static_cast<FileOffset>(section->file_offset_of_raw_data + - offset_in_section); - } - - // Small RVA values point into the file header in the loaded image. - // RVA 0 is the module load address which Windows uses as the module handle. - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the - // DOS header. - if (rva == 0 || rva == 2) - return static_cast<FileOffset>(rva); - - NOTREACHED(); - return kNoFileOffset; -} - -RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const { - for (int i = 0; i < number_of_sections_; ++i) { - const Section* section = §ions_[i]; - if (file_offset >= section->file_offset_of_raw_data) { - FileOffset offset_in_section = - file_offset - section->file_offset_of_raw_data; - if (offset_in_section < section->size_of_raw_data) - return static_cast<RVA>(section->virtual_address + offset_in_section); - } - } - - NOTREACHED(); - return kNoRVA; + : Disassembler(start, length), + incomplete_disassembly_(false), + is_PE32_plus_(false), + optional_header_(NULL), + size_of_optional_header_(0), + offset_of_data_directories_(0), + machine_type_(0), + number_of_sections_(0), + sections_(NULL), + has_text_section_(false), + size_of_code_(0), + size_of_initialized_data_(0), + size_of_uninitialized_data_(0), + base_of_code_(0), + base_of_data_(0), + image_base_(0), + size_of_image_(0), + number_of_data_directories_(0) { } // ParseHeader attempts to match up the buffer with the Windows data @@ -93,19 +57,18 @@ bool DisassemblerWin32X86::ParseHeader() { return Bad("Not MZ"); // offset from DOS header to PE header is stored in DOS header. - FileOffset file_offset = static_cast<FileOffset>( - ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); + uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); - if (file_offset >= length()) + if (offset >= length()) return Bad("Bad offset to PE header"); - const uint8_t* const pe_header = FileOffsetToPointer(file_offset); + const uint8_t* const pe_header = OffsetToPointer(offset); const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; if (pe_header <= start() || pe_header >= end() - kMinPEHeaderSize) - return Bad("Bad file offset to PE header"); + return Bad("Bad offset to PE header"); - if (file_offset % 8 != 0) + if (offset % 8 != 0) return Bad("Misaligned PE header"); // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. @@ -206,7 +169,7 @@ bool DisassemblerWin32X86::ParseHeader() { size_of_optional_header_); size_t detected_length = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; ++i) { const Section* section = §ions_[i]; // TODO(sra): consider using the 'characteristics' field of the section @@ -330,19 +293,48 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) { } const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const { - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; i++) { const Section* section = §ions_[i]; - if (rva >= section->virtual_address) { - FileOffset offset_in_section = rva - section->virtual_address; - if (offset_in_section < section->virtual_size) - return section; + uint32_t offset = rva - section->virtual_address; + if (offset < section->virtual_size) { + return section; + } + } + return NULL; +} + +int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { + const Section* section = RVAToSection(rva); + if (section) { + uint32_t offset = rva - section->virtual_address; + if (offset < section->size_of_raw_data) { + return section->file_offset_of_raw_data + offset; + } else { + return kNoOffset; // In section but not in file (e.g. uninit data). } } - return nullptr; + + // Small RVA values point into the file header in the loaded image. + // RVA 0 is the module load address which Windows uses as the module handle. + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the + // DOS header. + if (rva == 0 || rva == 2) + return rva; + + NOTREACHED(); + return kNoOffset; +} + +const uint8_t* DisassemblerWin32X86::RVAToPointer(RVA rva) const { + int file_offset = RVAToFileOffset(rva); + if (file_offset == kNoOffset) + return NULL; + else + return OffsetToPointer(file_offset); } std::string DisassemblerWin32X86::SectionName(const Section* section) { - if (section == nullptr) + if (section == NULL) return "<none>"; char name[9]; memcpy(name, section->name, 8); @@ -352,25 +344,24 @@ std::string DisassemblerWin32X86::SectionName(const Section* section) { CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { // Walk all the bytes in the file, whether or not in a section. - FileOffset file_offset = 0; + uint32_t file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == nullptr) { - // No more sections. There should not be extra stuff following last + if (section == NULL) { + // No more sections. There should not be extra stuff following last // section. // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); break; } if (file_offset < section->file_offset_of_raw_data) { - FileOffset section_start_offset = section->file_offset_of_raw_data; - if (!ParseNonSectionFileRegion(file_offset, section_start_offset, - program)) { + uint32_t section_start_offset = section->file_offset_of_raw_data; + if(!ParseNonSectionFileRegion(file_offset, section_start_offset, + program)) return false; - } file_offset = section_start_offset; } - FileOffset end = file_offset + section->size_of_raw_data; + uint32_t end = file_offset + section->size_of_raw_data; if (!ParseFileRegion(section, file_offset, end, program)) return false; file_offset = end; @@ -390,7 +381,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { return false; #if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { + for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); @@ -401,10 +392,10 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { } void DisassemblerWin32X86::ParseRel32RelocsFromSections() { - FileOffset file_offset = 0; + uint32_t file_offset = 0; while (file_offset < length()) { const Section* section = FindNextSection(file_offset); - if (section == nullptr) + if (section == NULL) break; if (file_offset < section->file_offset_of_raw_data) file_offset = section->file_offset_of_raw_data; @@ -426,11 +417,11 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSections() { std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); while (abs32_iter != abs32_target_rvas_.end() && rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) { + if (abs32_iter->first < rel32_iter->first) ++abs32_iter; - } else if (rel32_iter->first < abs32_iter->first) { + else if (rel32_iter->first < abs32_iter->first) ++rel32_iter; - } else { + else { ++common; ++abs32_iter; ++rel32_iter; @@ -446,18 +437,19 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { if (!isCode) return; - FileOffset start_file_offset = section->file_offset_of_raw_data; - FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; + uint32_t start_file_offset = section->file_offset_of_raw_data; + uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; Rel32FinderWin32X86_Basic finder( base_relocation_table().address_, - base_relocation_table().address_ + base_relocation_table().size_); + base_relocation_table().address_ + base_relocation_table().size_, + size_of_image_); finder.Find(start_pointer, end_pointer, start_rva, end_rva, abs32_locations_); finder.SwapRel32Locations(&rel32_locations_); @@ -468,14 +460,14 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { } CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) { if (incomplete_disassembly_) return true; if (end_file_offset > start_file_offset) { - if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), + if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), end_file_offset - start_file_offset)) { return false; } @@ -485,13 +477,13 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( } CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section, - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) { RVA relocs_start_rva = base_relocation_table().address_; - const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); - const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); + const uint8_t* start_pointer = OffsetToPointer(start_file_offset); + const uint8_t* end_pointer = OffsetToPointer(end_file_offset); RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; @@ -600,7 +592,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, size_t count = p->second.size(); std::cout << std::dec << p->first << ": " << count; if (count <= 2) { - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < count; ++i) std::cout << " " << DescribeRVA(p->second[i]); } std::cout << std::endl; @@ -612,6 +604,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, } #endif // COURGETTE_HISTOGRAM_TARGETS + // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except // that during development I'm finding I need to call it when compiled in // Release mode. Hence: @@ -630,12 +623,12 @@ std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const { } const Section* DisassemblerWin32X86::FindNextSection( - FileOffset file_offset) const { + uint32_t fileOffset) const { const Section* best = 0; - for (int i = 0; i < number_of_sections_; ++i) { + for (int i = 0; i < number_of_sections_; i++) { const Section* section = §ions_[i]; if (section->size_of_raw_data > 0) { // i.e. has data in file. - if (file_offset <= section->file_offset_of_raw_data) { + if (fileOffset <= section->file_offset_of_raw_data) { if (best == 0 || section->file_offset_of_raw_data < best->file_offset_of_raw_data) { best = section; @@ -646,15 +639,26 @@ const Section* DisassemblerWin32X86::FindNextSection( return best; } +RVA DisassemblerWin32X86::FileOffsetToRVA(uint32_t file_offset) const { + for (int i = 0; i < number_of_sections_; i++) { + const Section* section = §ions_[i]; + uint32_t offset = file_offset - section->file_offset_of_raw_data; + if (offset < section->size_of_raw_data) { + return section->virtual_address + offset; + } + } + return 0; +} + bool DisassemblerWin32X86::ReadDataDirectory( int index, ImageDataDirectory* directory) { if (index < number_of_data_directories_) { - FileOffset file_offset = index * 8 + offset_of_data_directories_; - if (file_offset >= size_of_optional_header_) + size_t offset = index * 8 + offset_of_data_directories_; + if (offset >= size_of_optional_header_) return Bad("number of data directories inconsistent"); - const uint8_t* data_directory = optional_header_ + file_offset; + const uint8_t* data_directory = optional_header_ + offset; if (data_directory < start() || data_directory + 8 >= end()) return Bad("data directory outside image"); diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h index c22872b..891636c 100644 --- a/courgette/disassembler_win32_x86.h +++ b/courgette/disassembler_win32_x86.h @@ -8,16 +8,15 @@ #include <stddef.h> #include <stdint.h> -#include <map> -#include <string> -#include <vector> - #include "base/macros.h" #include "courgette/disassembler.h" -#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" #include "courgette/types_win_pe.h" +#ifdef COURGETTE_HISTOGRAM_TARGETS +#include <map> +#endif + namespace courgette { class AssemblyProgram; @@ -26,14 +25,19 @@ class DisassemblerWin32X86 : public Disassembler { public: explicit DisassemblerWin32X86(const void* start, size_t length); - // Disassembler interfaces. - RVA FileOffsetToRVA(FileOffset file_offset) const override; - FileOffset RVAToFileOffset(RVA rva) const override; - ExecutableType kind() const override { return EXE_WIN_32_X86; } - bool ParseHeader() override; - bool Disassemble(AssemblyProgram* target) override; + virtual ExecutableType kind() { return EXE_WIN_32_X86; } + // Returns 'true' if the buffer appears to point to a Windows 32 bit + // executable, 'false' otherwise. If ParseHeader() succeeds, other member + // functions may be called. + virtual bool ParseHeader(); + + virtual bool Disassemble(AssemblyProgram* target); + + // // Exposed for test purposes + // + bool has_text_section() const { return has_text_section_; } uint32_t size_of_code() const { return size_of_code_; } bool is_32bit() const { return !is_PE32_plus_; } @@ -43,9 +47,17 @@ class DisassemblerWin32X86 : public Disassembler { // that are listed in the base relocation table. bool ParseRelocs(std::vector<RVA> *addresses); - // Returns Section containing the relative virtual address, or null if none. + // Returns Section containing the relative virtual address, or NULL if none. const Section* RVAToSection(RVA rva) const; + static const int kNoOffset = -1; + // Returns kNoOffset if there is no file offset corresponding to 'rva'. + int RVAToFileOffset(RVA rva) const; + + // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL + // is returned if there is no file offset corresponding to 'rva'. + const uint8_t* RVAToPointer(RVA rva) const; + static std::string SectionName(const Section* section); protected: @@ -54,46 +66,59 @@ class DisassemblerWin32X86 : public Disassembler { void ParseRel32RelocsFromSections(); void ParseRel32RelocsFromSection(const Section* section); - CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset, - FileOffset end_file_offset, + CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; CheckBool ParseFileRegion(const Section* section, - FileOffset start_file_offset, - FileOffset end_file_offset, + uint32_t start_file_offset, + uint32_t end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; #if COURGETTE_HISTOGRAM_TARGETS void HistogramTargets(const char* kind, const std::map<RVA, int>& map); #endif - // Most addresses are represented as 32-bit RVAs. The one address we can't - // do this with is the image base address. + // Most addresses are represented as 32-bit RVAs. The one address we can't + // do this with is the image base address. 'image_base' is valid only for + // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. uint32_t image_base() const { return static_cast<uint32_t>(image_base_); } const ImageDataDirectory& base_relocation_table() const { return base_relocation_table_; } - // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. + // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. std::string DescribeRVA(RVA rva) const; - // Finds the first section at file_offset or above. Does not return sections + // Finds the first section at file_offset or above. Does not return sections // that have no raw bytes in the file. - const Section* FindNextSection(FileOffset file_offset) const; + const Section* FindNextSection(uint32_t file_offset) const; + + // There are 2 'coordinate systems' for reasoning about executables. + // FileOffset - the the offset within a single .EXE or .DLL *file*. + // RVA - relative virtual address (offset within *loaded image*) + // FileOffsetToRVA and RVAToFileOffset convert between these representations. + + RVA FileOffsetToRVA(uint32_t offset) const; private: + bool ReadDataDirectory(int index, ImageDataDirectory* dir); - bool incomplete_disassembly_; // true if can omit "uninteresting" bits. + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits std::vector<RVA> abs32_locations_; std::vector<RVA> rel32_locations_; // - // Information that is valid after ParseHeader() succeeds. + // Fields that are always valid. // - bool is_PE32_plus_; // PE32_plus is for 64 bit executables. + + // + // Information that is valid after successful ParseHeader. + // + bool is_PE32_plus_; // PE32_plus is for 64 bit executables. // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. const uint8_t* optional_header_; @@ -130,9 +155,9 @@ class DisassemblerWin32X86 : public Disassembler { std::map<RVA, int> rel32_target_rvas_; #endif + DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86); }; } // namespace courgette - #endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_ diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc index 3e43273..4e16464 100644 --- a/courgette/disassembler_win32_x86_unittest.cc +++ b/courgette/disassembler_win32_x86_unittest.cc @@ -6,9 +6,6 @@ #include <stdint.h> -#include <string> -#include <vector> - #include "base/memory/scoped_ptr.h" #include "base/stl_util.h" #include "courgette/base_test_unittest.h" @@ -40,16 +37,16 @@ void DisassemblerWin32X86Test::TestExe() const { disassembler->RVAToSection(0x00401234 - 0x00400000)), std::string(".text")); - EXPECT_EQ(0U, disassembler->RVAToFileOffset(0)); - EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096)); - EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000)); + EXPECT_EQ(0, disassembler->RVAToFileOffset(0)); + EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096)); + EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000)); std::vector<courgette::RVA> relocs; bool can_parse_relocs = disassembler->ParseRelocs(&relocs); EXPECT_TRUE(can_parse_relocs); EXPECT_TRUE(base::STLIsSorted(relocs)); - const uint8_t* offset_p = disassembler->FileOffsetToPointer(0); + const uint8_t* offset_p = disassembler->OffsetToPointer(0); EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), reinterpret_cast<const void*>(offset_p)); EXPECT_EQ('M', offset_p[0]); diff --git a/courgette/image_utils.h b/courgette/image_utils.h index cfbfcfe..f958cc1 100644 --- a/courgette/image_utils.h +++ b/courgette/image_utils.h @@ -14,44 +14,8 @@ namespace courgette { -// There are several ways to reason about addresses in an image: -// - File Offset: Position relative to start of image. -// - VA (Virtual Address): Virtual memory address of a loaded image. This is -// subject to relocation by the OS. -// - RVA (Relative Virtual Address): VA relative to some base address. This is -// the preferred way to specify pointers in an image. Two ways to encode RVA -// are: -// - abs32: RVA value is encoded directly. -// - rel32: RVA is encoded as offset from an instruction address. This is -// commonly used for relative branch/call opcodes. -// Courgette operates on File Offsets and RVAs only. - -using RVA = uint32_t; +typedef uint32_t RVA; const RVA kUnassignedRVA = 0xFFFFFFFFU; -const RVA kNoRVA = 0xFFFFFFFFU; - -using FileOffset = size_t; -const FileOffset kNoFileOffset = UINTPTR_MAX; - -// An interface for {File Offset, RVA, pointer to image data} translation. -class AddressTranslator { - public: - // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent. - virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0; - - // Returns the file offset corresponding to |rva|, or kNoFileOffset if - // nonexistent. - virtual FileOffset RVAToFileOffset(RVA rva) const = 0; - - // Returns the pointer to the image data for |file_offset|. Assumes that - // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting - // pointer is an end bound for iteration that should never be dereferenced. - virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0; - - // Returns the pointer to the image data for |rva|, or null if |rva| is - // invalid. - virtual const uint8_t* RVAToPointer(RVA rva) const = 0; -}; // A Label is a symbolic reference to an address. Unlike a conventional // assembly language, we always know the address. The address will later be diff --git a/courgette/rel32_finder_win32_x86.cc b/courgette/rel32_finder_win32_x86.cc index 0ed492f..171b781 100644 --- a/courgette/rel32_finder_win32_x86.cc +++ b/courgette/rel32_finder_win32_x86.cc @@ -8,9 +8,11 @@ namespace courgette { -Rel32FinderWin32X86::Rel32FinderWin32X86(RVA relocs_start_rva, - RVA relocs_end_rva) - : relocs_start_rva_(relocs_start_rva), relocs_end_rva_(relocs_end_rva) { +Rel32FinderWin32X86::Rel32FinderWin32X86( + RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva) + : relocs_start_rva_(relocs_start_rva), + relocs_end_rva_(relocs_end_rva), + image_end_rva_(image_end_rva) { } Rel32FinderWin32X86::~Rel32FinderWin32X86() { @@ -26,9 +28,9 @@ void Rel32FinderWin32X86::SwapRel32TargetRVAs(std::map<RVA, int>* dest) { } #endif -Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(RVA relocs_start_rva, - RVA relocs_end_rva) - : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva) { +Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic( + RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva) + : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva, image_end_rva) { } Rel32FinderWin32X86_Basic::~Rel32FinderWin32X86_Basic() { @@ -49,10 +51,6 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, const uint8_t* p = start_pointer; while (p < end_pointer) { RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); - - // Skip the base reloation table if we encounter it. - // Note: We're not bothering to handle the edge case where a Rel32 pointer - // collides with |relocs_start_rva_| by being {1, 2, 3}-bytes before it. if (current_rva == relocs_start_rva_) { if (relocs_start_rva_ < relocs_end_rva_) { p += relocs_end_rva_ - relocs_start_rva_; @@ -60,10 +58,13 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, } } + //while (abs32_pos != abs32_locations.end() && *abs32_pos < current_rva) + // ++abs32_pos; + // Heuristic discovery of rel32 locations in instruction stream: are the // next few bytes the start of an instruction containing a rel32 // addressing mode? - const uint8_t* rel32 = nullptr; + const uint8_t* rel32 = NULL; if (p + 5 <= end_pointer) { if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 @@ -94,9 +95,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer, } RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); - // Valid, rel32 target must be within image, and within this section. - // Subsumes |target_rva| != |kUnassignedRVA|. - if (start_rva <= target_rva && target_rva < end_rva) { + // To be valid, rel32 target must be within image, and within this + // section. + if (IsValidRVA(target_rva) && + start_rva <= target_rva && target_rva < end_rva) { rel32_locations_.push_back(rel32_rva); #if COURGETTE_HISTOGRAM_TARGETS ++rel32_target_rvas_[target_rva]; diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h index 98ebd98..01226ae 100644 --- a/courgette/rel32_finder_win32_x86.h +++ b/courgette/rel32_finder_win32_x86.h @@ -7,7 +7,9 @@ #include <stdint.h> +#if COURGETTE_HISTOGRAM_TARGETS #include <map> +#endif #include <vector> #include "courgette/image_utils.h" @@ -17,21 +19,25 @@ namespace courgette { // A helper class to scan through a section of code to extract RVAs. class Rel32FinderWin32X86 { public: - Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva); + Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva, + RVA image_end_rva); virtual ~Rel32FinderWin32X86(); - // Swaps data in |rel32_locations_| with |dest|. + // Subsumes rva != kUnassignedRVA. + bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; } + + // Swaps data in |rel32_locations_| to |dest|. void SwapRel32Locations(std::vector<RVA>* dest); #if COURGETTE_HISTOGRAM_TARGETS - // Swaps data in |rel32_target_rvas_| with |dest|. + // Swaps data in |rel32_target_rvas_| to |dest|. void SwapRel32TargetRVAs(std::map<RVA, int>* dest); #endif // Scans through [|start_pointer|, |end_pointer|) for rel32 addresses. Seeks // RVAs that satisfy the following: - // - Do not overlap with |abs32_locations| (assumed sorted). - // - Do not overlap with [relocs_start_rva, relocs_end_rva). + // - Do not collide with |abs32_pos| (assumed sorted). + // - Do not collide with |base_relocation_table|'s RVA range, // - Whose targets are in [|start_rva|, |end_rva|). // The sorted results are written to |rel32_locations_|. virtual void Find(const uint8_t* start_pointer, @@ -43,6 +49,7 @@ class Rel32FinderWin32X86 { protected: const RVA relocs_start_rva_; const RVA relocs_end_rva_; + const RVA image_end_rva_; std::vector<RVA> rel32_locations_; @@ -55,7 +62,8 @@ class Rel32FinderWin32X86 { // (excluding JPO/JPE) disregarding instruction alignment. class Rel32FinderWin32X86_Basic : public Rel32FinderWin32X86 { public: - Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva); + Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva, + RVA image_end_rva); virtual ~Rel32FinderWin32X86_Basic(); // Rel32FinderWin32X86 implementation. diff --git a/courgette/rel32_finder_win32_x86_unittest.cc b/courgette/rel32_finder_win32_x86_unittest.cc index 496f0b9..aed5c13 100644 --- a/courgette/rel32_finder_win32_x86_unittest.cc +++ b/courgette/rel32_finder_win32_x86_unittest.cc @@ -33,7 +33,8 @@ class Rel32FinderWin32X86TestCase { } void RunTestBasic(std::string name) { - Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_); + Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_, + image_end_rva_); ASSERT_FALSE(text_data_.empty()); finder.Find(&text_data_[0], &text_data_[0] + text_data_.size(), text_start_rva_, text_end_rva_, abs32_locations_); |