summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorhuangs <huangs@chromium.org>2016-03-12 12:56:11 -0800
committerCommit bot <commit-bot@chromium.org>2016-03-12 20:57:09 +0000
commit58b822d441f5c982e879e536fa3c1cbac8fd339a (patch)
tree046ebd29e00836421e4aaa8dfc09d67897a728bc /courgette
parent35dc6b49313d8ce3619b274d10b25f70105f5b1f (diff)
downloadchromium_src-58b822d441f5c982e879e536fa3c1cbac8fd339a.zip
chromium_src-58b822d441f5c982e879e536fa3c1cbac8fd339a.tar.gz
chromium_src-58b822d441f5c982e879e536fa3c1cbac8fd339a.tar.bz2
[Courgette] Clean up Disassembler; fix ELF Memory leaks.
Cleaning up code surrounding Disassembler: - Extract AddressTranslator interface to be used across subclasses. - Use FileOffset = size_t by context. - Detailed comments & TODOs in DisassemblerElf32ARM. - Fix DisassemblerElf32ARM memory leaks. - Lots of superficial stylistic changes. Except for AddressTranslator routines and unit tests, shying away from control flow and logic changes. BUG=579206 Review URL: https://codereview.chromium.org/1676683002 Cr-Commit-Position: refs/heads/master@{#380881}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/disassembler.cc20
-rw-r--r--courgette/disassembler.h34
-rw-r--r--courgette/disassembler_elf_32.cc333
-rw-r--r--courgette/disassembler_elf_32.h127
-rw-r--r--courgette/disassembler_elf_32_arm.cc244
-rw-r--r--courgette/disassembler_elf_32_arm.h56
-rw-r--r--courgette/disassembler_elf_32_x86.cc111
-rw-r--r--courgette/disassembler_elf_32_x86.h46
-rw-r--r--courgette/disassembler_elf_32_x86_unittest.cc89
-rw-r--r--courgette/disassembler_win32_x64.cc222
-rw-r--r--courgette/disassembler_win32_x64.h78
-rw-r--r--courgette/disassembler_win32_x64_unittest.cc11
-rw-r--r--courgette/disassembler_win32_x86.cc214
-rw-r--r--courgette/disassembler_win32_x86.h75
-rw-r--r--courgette/disassembler_win32_x86_unittest.cc11
-rw-r--r--courgette/image_utils.h38
-rw-r--r--courgette/rel32_finder_win32_x86.cc30
-rw-r--r--courgette/rel32_finder_win32_x86.h20
-rw-r--r--courgette/rel32_finder_win32_x86_unittest.cc3
19 files changed, 882 insertions, 880 deletions
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
index b9fce8b..9b58ba0 100644
--- a/courgette/disassembler.cc
+++ b/courgette/disassembler.cc
@@ -4,10 +4,12 @@
#include "courgette/disassembler.h"
+#include "base/logging.h"
+
namespace courgette {
Disassembler::Disassembler(const void* start, size_t length)
- : failure_reason_("uninitialized") {
+ : failure_reason_("uninitialized") {
start_ = reinterpret_cast<const uint8_t*>(start);
length_ = length;
end_ = start_ + length_;
@@ -15,13 +17,21 @@ Disassembler::Disassembler(const void* start, size_t length)
Disassembler::~Disassembler() {};
-const uint8_t* Disassembler::OffsetToPointer(size_t offset) const {
- assert(start_ + offset <= end_);
- return start_ + offset;
+const uint8_t* Disassembler::FileOffsetToPointer(FileOffset file_offset) const {
+ CHECK_LE(file_offset, static_cast<FileOffset>(end_ - start_));
+ return start_ + file_offset;
+}
+
+const uint8_t* Disassembler::RVAToPointer(RVA rva) const {
+ FileOffset file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoFileOffset)
+ return nullptr;
+
+ return FileOffsetToPointer(file_offset);
}
bool Disassembler::Good() {
- failure_reason_ = NULL;
+ failure_reason_ = nullptr;
return true;
}
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index e833cfa..bc715b0 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -16,33 +16,35 @@ namespace courgette {
class AssemblyProgram;
-class Disassembler {
+class Disassembler : public AddressTranslator {
public:
virtual ~Disassembler();
- virtual ExecutableType kind() { return EXE_UNKNOWN; }
+ // AddressTranslator interfaces.
+ virtual RVA FileOffsetToRVA(FileOffset file_offset) const override = 0;
+ virtual FileOffset RVAToFileOffset(RVA rva) const override = 0;
+ const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override;
+ const uint8_t* RVAToPointer(RVA rva) const override;
- // ok() may always be called but returns 'true' only after ParseHeader
- // succeeds.
- bool ok() const { return failure_reason_ == NULL; }
+ virtual ExecutableType kind() const = 0;
- // Returns 'true' if the buffer appears to be a valid executable of the
- // expected type. It is not required that this be called before Disassemble.
+ // Returns true if the buffer appears to be a valid executable of the expected
+ // type, and false otherwise. This needs not be called before Disassemble().
virtual bool ParseHeader() = 0;
// Disassembles the item passed to the factory method into the output
// parameter 'program'.
virtual bool Disassemble(AssemblyProgram* program) = 0;
- // Returns the length of the source executable. May reduce after ParseHeader.
+ // ok() may always be called but returns true only after ParseHeader()
+ // succeeds.
+ bool ok() const { return failure_reason_ == nullptr; }
+
+ // Returns the length of the image. May reduce after ParseHeader().
size_t length() const { return length_; }
const uint8_t* start() const { return start_; }
const uint8_t* end() const { return end_; }
- // Returns a pointer into the memory copy of the file format.
- // FileOffsetToPointer(0) returns a pointer to the start of the file format.
- const uint8_t* OffsetToPointer(size_t offset) const;
-
protected:
Disassembler(const void* start, size_t length);
@@ -55,16 +57,16 @@ class Disassembler {
}
// Reduce the length of the image in memory. Does not actually free
- // (or realloc) any memory. Usually only called via ParseHeader()
+ // (or realloc) any memory. Usually only called via ParseHeader().
void ReduceLength(size_t reduced_length);
private:
const char* failure_reason_;
//
- // Basic information that is always valid after Construction, though
- // ParseHeader may shorten the length if the executable is shorter than
- // the total data.
+ // Basic information that is always valid after construction, although
+ // ParseHeader() may shorten |length_| if the executable is shorter than the
+ // total data.
//
size_t length_; // In current memory.
const uint8_t* start_; // In current memory, base for 'file offsets'.
diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc
index 84aa971..9ceb8ab 100644
--- a/courgette/disassembler_elf_32.cc
+++ b/courgette/disassembler_elf_32.cc
@@ -4,39 +4,70 @@
#include "courgette/disassembler_elf_32.h"
-#include <stddef.h>
-#include <stdint.h>
-
#include <algorithm>
-#include <string>
-#include <vector>
#include "base/logging.h"
-#include "base/memory/scoped_vector.h"
-
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
namespace courgette {
DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
- : Disassembler(start, length),
- header_(NULL),
- section_header_table_(NULL),
- section_header_table_size_(0),
- program_header_table_(NULL),
- program_header_table_size_(0),
- default_string_section_(NULL) {
+ : Disassembler(start, length),
+ header_(nullptr),
+ section_header_table_(nullptr),
+ section_header_table_size_(0),
+ program_header_table_(nullptr),
+ program_header_table_size_(0),
+ default_string_section_(nullptr) {
+}
+
+RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const {
+ // File offsets can be 64-bit values, but we are dealing with 32-bit
+ // executables and so only need to support 32-bit file sizes.
+ uint32_t offset32 = static_cast<uint32_t>(offset);
+
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
+ // These can appear to have a size in the file, but don't.
+ if (section_header->sh_type == SHT_NOBITS)
+ continue;
+
+ Elf32_Off section_begin = section_header->sh_offset;
+ Elf32_Off section_end = section_begin + section_header->sh_size;
+
+ if (offset32 >= section_begin && offset32 < section_end) {
+ return section_header->sh_addr + (offset32 - section_begin);
+ }
+ }
+
+ return 0;
+}
+
+FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
+ // These can appear to have a size in the file, but don't.
+ if (section_header->sh_type == SHT_NOBITS)
+ continue;
+ Elf32_Addr begin = section_header->sh_addr;
+ Elf32_Addr end = begin + section_header->sh_size;
+
+ if (rva >= begin && rva < end)
+ return section_header->sh_offset + (rva - begin);
+ }
+ return kNoFileOffset;
}
bool DisassemblerElf32::ParseHeader() {
if (length() < sizeof(Elf32_Ehdr))
return Bad("Too small");
- header_ = (Elf32_Ehdr *)start();
+ header_ = reinterpret_cast<const Elf32_Ehdr*>(start());
- // Have magic for elf header?
+ // Have magic for ELF header?
if (header_->e_ident[0] != 0x7f ||
header_->e_ident[1] != 'E' ||
header_->e_ident[2] != 'L' ||
@@ -59,23 +90,25 @@ bool DisassemblerElf32::ParseHeader() {
if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
return Bad("Out of bounds section header table");
- section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
+ section_header_table_ = reinterpret_cast<const Elf32_Shdr*>(
+ FileOffsetToPointer(header_->e_shoff));
section_header_table_size_ = header_->e_shnum;
if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
return Bad("Out of bounds program header table");
- program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
+ program_header_table_ = reinterpret_cast<const Elf32_Phdr*>(
+ FileOffsetToPointer(header_->e_phoff));
program_header_table_size_ = header_->e_phnum;
if (header_->e_shstrndx >= header_->e_shnum)
return Bad("Out of bounds string section index");
- default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
+ default_string_section_ = reinterpret_cast<const char*>(
+ SectionBody(static_cast<int>(header_->e_shstrndx)));
- if (!UpdateLength()) {
+ if (!UpdateLength())
return Bad("Out of bounds section or segment");
- }
return Good();
}
@@ -97,7 +130,6 @@ bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
return false;
target->DefaultAssignIndexes();
-
return true;
}
@@ -105,8 +137,9 @@ bool DisassemblerElf32::UpdateLength() {
Elf32_Off result = 0;
// Find the end of the last section
- for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
- const Elf32_Shdr *section_header = SectionHeader(section_id);
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_NOBITS)
continue;
@@ -119,8 +152,9 @@ bool DisassemblerElf32::UpdateLength() {
}
// Find the end of the last segment
- for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
- const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
+ for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
+ ++segment_id) {
+ const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
return false;
@@ -129,25 +163,26 @@ bool DisassemblerElf32::UpdateLength() {
result = std::max(result, segment_end);
}
- Elf32_Off section_table_end = header_->e_shoff +
- (header_->e_shnum * sizeof(Elf32_Shdr));
+ Elf32_Off section_table_end =
+ header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr));
result = std::max(result, section_table_end);
- Elf32_Off segment_table_end = header_->e_phoff +
- (header_->e_phnum * sizeof(Elf32_Phdr));
+ Elf32_Off segment_table_end =
+ header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr));
result = std::max(result, segment_table_end);
ReduceLength(result);
return true;
}
-CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
+CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const {
if (rva == kUnassignedRVA)
return false;
// It's valid if it's contained in any program segment
- for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
- const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
+ for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
+ ++segment_id) {
+ const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
if (segment_header->p_type != PT_LOAD)
continue;
@@ -162,114 +197,58 @@ CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
return false;
}
-CheckBool DisassemblerElf32::RVAToFileOffset(RVA rva,
- size_t* result) const {
- for (int i = 0; i < SectionHeaderCount(); i++) {
- const Elf32_Shdr *section_header = SectionHeader(i);
- // These can appear to have a size in the file, but don't.
- if (section_header->sh_type == SHT_NOBITS)
- continue;
- Elf32_Addr begin = section_header->sh_addr;
- Elf32_Addr end = begin + section_header->sh_size;
-
- if (rva >= begin && rva < end) {
- *result = section_header->sh_offset + (rva - begin);
- return true;
- }
- }
- return false;
-}
-
-RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
- // File offsets can be 64 bit values, but we are dealing with 32
- // bit executables and so only need to support 32bit file sizes.
- uint32_t offset32 = (uint32_t)offset;
-
- for (int i = 0; i < SectionHeaderCount(); i++) {
-
- const Elf32_Shdr *section_header = SectionHeader(i);
-
- // These can appear to have a size in the file, but don't.
- if (section_header->sh_type == SHT_NOBITS)
- continue;
-
- Elf32_Off section_begin = section_header->sh_offset;
- Elf32_Off section_end = section_begin + section_header->sh_size;
-
- if (offset32 >= section_begin && offset32 < section_end) {
- return section_header->sh_addr + (offset32 - section_begin);
- }
- }
-
- return 0;
-}
-
-CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
- std::vector<size_t>* offsets) {
- offsets->clear();
-
- for (std::vector<RVA>::iterator rva = rvas->begin();
- rva != rvas->end();
- rva++) {
-
- size_t offset;
-
- if (!RVAToFileOffset(*rva, &offset))
+CheckBool DisassemblerElf32::RVAsToFileOffsets(
+ const std::vector<RVA>& rvas,
+ std::vector<FileOffset>* file_offsets) {
+ file_offsets->clear();
+ for (RVA rva : rvas) {
+ FileOffset file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoFileOffset)
return false;
-
- offsets->push_back(offset);
+ file_offsets->push_back(file_offset);
}
-
return true;
}
-CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
- for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
- rva != rvas->end();
- rva++) {
-
- size_t offset;
-
- if (!RVAToFileOffset((*rva)->rva(), &offset))
+CheckBool DisassemblerElf32::RVAsToFileOffsets(
+ ScopedVector<TypedRVA>* typed_rvas) {
+ for (TypedRVA* typed_rva : *typed_rvas) {
+ FileOffset file_offset = RVAToFileOffset(typed_rva->rva());
+ if (file_offset == kNoFileOffset)
return false;
-
- (*rva)->set_offset(offset);
+ typed_rva->set_file_offset(file_offset);
}
-
return true;
}
CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- uint32_t file_offset = 0;
+ FileOffset file_offset = 0;
- std::vector<size_t> abs_offsets;
+ std::vector<FileOffset> abs_offsets;
- if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
+ if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets))
return false;
- if (!RVAsToOffsets(&rel32_locations_))
+ if (!RVAsToFileOffsets(&rel32_locations_))
return false;
- std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
+ std::vector<FileOffset>::iterator current_abs_offset = abs_offsets.begin();
ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
- std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
+ std::vector<FileOffset>::iterator end_abs_offset = abs_offsets.end();
ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
- for (int section_id = 0;
- section_id < SectionHeaderCount();
- section_id++) {
-
- const Elf32_Shdr *section_header = SectionHeader(section_id);
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_NOBITS)
continue;
- if (!ParseSimpleRegion(file_offset,
- section_header->sh_offset,
- program))
+ if (!ParseSimpleRegion(file_offset, section_header->sh_offset, program))
return false;
+
file_offset = section_header->sh_offset;
switch (section_header->sh_type) {
@@ -280,10 +259,13 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
break;
case SHT_PROGBITS:
if (!ParseProgbitsSection(section_header,
- &current_abs_offset, end_abs_offset,
- &current_rel, end_rel,
- program))
+ &current_abs_offset,
+ end_abs_offset,
+ &current_rel,
+ end_rel,
+ program)) {
return false;
+ }
file_offset = section_header->sh_offset + section_header->sh_size;
break;
case SHT_INIT_ARRAY:
@@ -292,28 +274,27 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
while (current_abs_offset != end_abs_offset &&
*current_abs_offset >= section_header->sh_offset &&
*current_abs_offset <
- (section_header->sh_offset + section_header->sh_size)) {
+ section_header->sh_offset + section_header->sh_size) {
// Skip any abs_offsets appear in the unsupported INIT_ARRAY section
- VLOG(1) << "Skipping relocation entry for unsupported section: " <<
- section_header->sh_type;
- current_abs_offset++;
+ VLOG(1) << "Skipping relocation entry for unsupported section: "
+ << section_header->sh_type;
+ ++current_abs_offset;
}
break;
default:
if (current_abs_offset != end_abs_offset &&
- *current_abs_offset >= section_header->sh_offset &&
- *current_abs_offset <
- (section_header->sh_offset + section_header->sh_size))
- VLOG(1) << "Relocation address in unrecognized ELF section: " << \
- section_header->sh_type;
- break;
+ *current_abs_offset >= section_header->sh_offset &&
+ *current_abs_offset <
+ section_header->sh_offset + section_header->sh_size) {
+ VLOG(1) << "Relocation address in unrecognized ELF section: "
+ << section_header->sh_type;
+ }
+ break;
}
}
// Rest of the file past the last section
- if (!ParseSimpleRegion(file_offset,
- length(),
- program))
+ if (!ParseSimpleRegion(file_offset, length(), program))
return false;
// Make certain we consume all of the relocations as expected
@@ -321,34 +302,32 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
}
CheckBool DisassemblerElf32::ParseProgbitsSection(
- const Elf32_Shdr *section_header,
- std::vector<size_t>::iterator* current_abs_offset,
- std::vector<size_t>::iterator end_abs_offset,
+ const Elf32_Shdr* section_header,
+ std::vector<FileOffset>::iterator* current_abs_offset,
+ std::vector<FileOffset>::iterator end_abs_offset,
ScopedVector<TypedRVA>::iterator* current_rel,
ScopedVector<TypedRVA>::iterator end_rel,
AssemblyProgram* program) {
-
// Walk all the bytes in the file, whether or not in a section.
- size_t file_offset = section_header->sh_offset;
- size_t section_end = section_header->sh_offset + section_header->sh_size;
+ FileOffset file_offset = section_header->sh_offset;
+ FileOffset section_end = section_header->sh_offset + section_header->sh_size;
Elf32_Addr origin = section_header->sh_addr;
- size_t origin_offset = section_header->sh_offset;
+ FileOffset origin_offset = section_header->sh_offset;
if (!program->EmitOriginInstruction(origin))
return false;
while (file_offset < section_end) {
-
if (*current_abs_offset != end_abs_offset &&
file_offset > **current_abs_offset)
return false;
while (*current_rel != end_rel &&
- file_offset > (**current_rel)->get_offset()) {
- (*current_rel)++;
+ file_offset > (**current_rel)->file_offset()) {
+ ++(*current_rel);
}
- size_t next_relocation = section_end;
+ FileOffset next_relocation = section_end;
if (*current_abs_offset != end_abs_offset &&
next_relocation > **current_abs_offset)
@@ -358,8 +337,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
// an Abs value, or the end of the section, so +3 to make sure there is
// room for the full 4 byte value.
if (*current_rel != end_rel &&
- next_relocation > ((**current_rel)->get_offset() + 3))
- next_relocation = (**current_rel)->get_offset();
+ next_relocation > ((**current_rel)->file_offset() + 3))
+ next_relocation = (**current_rel)->file_offset();
if (next_relocation > file_offset) {
if (!ParseSimpleRegion(file_offset, next_relocation, program))
@@ -371,28 +350,28 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
if (*current_abs_offset != end_abs_offset &&
file_offset == **current_abs_offset) {
- const uint8_t* p = OffsetToPointer(file_offset);
+ const uint8_t* p = FileOffsetToPointer(file_offset);
RVA target_rva = Read32LittleEndian(p);
if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
return false;
file_offset += sizeof(RVA);
- (*current_abs_offset)++;
+ ++(*current_abs_offset);
continue;
}
if (*current_rel != end_rel &&
- file_offset == (**current_rel)->get_offset()) {
+ file_offset == (**current_rel)->file_offset()) {
uint32_t relative_target = (**current_rel)->relative_target();
// This cast is for 64 bit systems, and is only safe because we
// are working on 32 bit executables.
RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
relative_target);
- if (! (**current_rel)->EmitInstruction(program, target_rva))
+ if (!(**current_rel)->EmitInstruction(program, target_rva))
return false;
file_offset += (**current_rel)->op_size();
- (*current_rel)++;
+ ++(*current_rel);
continue;
}
}
@@ -401,17 +380,19 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
-CheckBool DisassemblerElf32::ParseSimpleRegion(
- size_t start_file_offset,
- size_t end_file_offset,
- AssemblyProgram* program) {
+CheckBool DisassemblerElf32::ParseSimpleRegion(FileOffset start_file_offset,
+ FileOffset end_file_offset,
+ AssemblyProgram* program) {
// Callers don't guarantee start < end
- if (start_file_offset >= end_file_offset) return true;
+ if (start_file_offset >= end_file_offset)
+ return true;
const size_t len = end_file_offset - start_file_offset;
- if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len))
+ if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
+ len)) {
return false;
+ }
return true;
}
@@ -420,12 +401,13 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
abs32_locations_.clear();
// Loop through sections for relocation sections
- for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
- const Elf32_Shdr *section_header = SectionHeader(section_id);
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_REL) {
-
- Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
+ const Elf32_Rel* relocs_table =
+ reinterpret_cast<const Elf32_Rel*>(SectionBody(section_id));
int relocs_table_count = section_header->sh_size /
section_header->sh_entsize;
@@ -433,7 +415,7 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
// Elf32_Word relocation_section_id = section_header->sh_info;
// Loop through relocation objects in the relocation section
- for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
+ for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) {
RVA rva;
// Quite a few of these conversions fail, and we simply skip
@@ -451,23 +433,18 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
}
CheckBool DisassemblerElf32::CheckSection(RVA rva) {
- size_t offset;
-
- if (!RVAToFileOffset(rva, &offset)) {
+ FileOffset file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoFileOffset)
return false;
- }
-
- for (int section_id = 0;
- section_id < SectionHeaderCount();
- section_id++) {
- const Elf32_Shdr *section_header = SectionHeader(section_id);
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
- if (offset >= section_header->sh_offset &&
- offset < (section_header->sh_offset + section_header->sh_size)) {
+ if (file_offset >= section_header->sh_offset &&
+ file_offset < (section_header->sh_offset + section_header->sh_size)) {
switch (section_header->sh_type) {
- case SHT_REL:
- // Fall-through
+ case SHT_REL: // Falls through.
case SHT_PROGBITS:
return true;
}
@@ -478,16 +455,14 @@ CheckBool DisassemblerElf32::CheckSection(RVA rva) {
}
CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
-
rel32_locations_.clear();
// Loop through sections for relocation sections
- for (int section_id = 0;
- section_id < SectionHeaderCount();
- section_id++) {
-
- const Elf32_Shdr *section_header = SectionHeader(section_id);
+ for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+ ++section_id) {
+ const Elf32_Shdr* section_header = SectionHeader(section_id);
+ // TODO(huangs): Add better checks to skip non-code sections.
// Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0.
if (section_header->sh_type != SHT_PROGBITS ||
section_header->sh_addr == 0)
diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h
index 8483ce3..e9d00ca 100644
--- a/courgette/disassembler_elf_32.h
+++ b/courgette/disassembler_elf_32.h
@@ -8,10 +8,12 @@
#include <stddef.h>
#include <stdint.h>
+#include <vector>
+
#include "base/macros.h"
#include "base/memory/scoped_vector.h"
-#include "courgette/assembly_program.h"
#include "courgette/disassembler.h"
+#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
@@ -19,43 +21,33 @@ namespace courgette {
class AssemblyProgram;
-// A courgette disassembler for 32-bit ELF files. This class is only a
-// partial implementation. Subclasses implement the
-// architecture-specific parts of processing 32-bit ELF files. Specifically,
-// RelToRVA processes entries in ELF relocation table,
-// ParseRelocationSection verifies the organization of the ELF
-// relocation table, and ParseRel32RelocsFromSection finds branch
-// targets by looking for relative jump/call opcodes in the particular
-// architecture's machine code.
+// A Courgette disassembler for 32-bit ELF files. This is only a partial
+// implementation that admits subclasses for the architecture-specific parts of
+// 32-bit ELF file processing. Specifically:
+// - RelToRVA() processes entries in ELF relocation table.
+// - ParseRelocationSection() verifies the organization of the ELF relocation
+// table.
+// - ParseRel32RelocsFromSection() finds branch targets by looking for relative
+// branch/call opcodes in the particular architecture's machine code.
class DisassemblerElf32 : public Disassembler {
public:
// Different instructions encode the target rva differently. This
// class encapsulates this behavior. public for use in unit tests.
class TypedRVA {
public:
- explicit TypedRVA(RVA rva) : rva_(rva), offset_(static_cast<size_t>(-1)) {
- }
+ explicit TypedRVA(RVA rva) : rva_(rva) { }
- virtual ~TypedRVA() { };
+ virtual ~TypedRVA() { }
- RVA rva() {
- return rva_;
- }
-
- RVA relative_target() {
- return relative_target_;
- }
+ RVA rva() const { return rva_; }
+ RVA relative_target() const { return relative_target_; }
+ FileOffset file_offset() const { return file_offset_; }
void set_relative_target(RVA relative_target) {
relative_target_ = relative_target;
}
-
- size_t get_offset() {
- return offset_;
- }
-
- void set_offset(size_t offset) {
- offset_ = offset;
+ void set_file_offset(FileOffset file_offset) {
+ file_offset_ = file_offset;
}
// Computes the relative jump's offset from the op in p.
@@ -65,33 +57,33 @@ class DisassemblerElf32 : public Disassembler {
virtual CheckBool EmitInstruction(AssemblyProgram* program,
RVA target_rva) = 0;
+ // Returns the size of the instruction containing the RVA.
virtual uint16_t op_size() const = 0;
- static bool IsLessThan(TypedRVA *a, TypedRVA *b) {
+ // Comparator for sorting, which assumes uniqueness of RVAs.
+ static bool IsLessThan(TypedRVA* a, TypedRVA* b) {
return a->rva() < b->rva();
}
private:
const RVA rva_;
- RVA relative_target_;
- size_t offset_;
+ RVA relative_target_ = kNoRVA;
+ FileOffset file_offset_ = kNoFileOffset;
};
public:
- explicit DisassemblerElf32(const void* start, size_t length);
-
- virtual ~DisassemblerElf32() { };
+ DisassemblerElf32(const void* start, size_t length);
- virtual ExecutableType kind() = 0;
+ ~DisassemblerElf32() override { }
- virtual e_machine_values ElfEM() = 0;
+ // Disassembler interfaces.
+ RVA FileOffsetToRVA(FileOffset file_offset) const override;
+ FileOffset RVAToFileOffset(RVA rva) const override;
+ virtual ExecutableType kind() const override = 0;
+ bool ParseHeader() override;
+ bool Disassemble(AssemblyProgram* target) override;
- // Returns 'true' if the buffer appears to point to a valid ELF executable
- // for 32 bit. If ParseHeader() succeeds, other member
- // functions may be called.
- virtual bool ParseHeader();
-
- virtual bool Disassemble(AssemblyProgram* target);
+ virtual e_machine_values ElfEM() const = 0;
// Public for unittests only
std::vector<RVA> &Abs32Locations() { return abs32_locations_; }
@@ -107,13 +99,13 @@ class DisassemblerElf32 : public Disassembler {
return section_header_table_size_;
}
- const Elf32_Shdr *SectionHeader(int id) const {
+ const Elf32_Shdr* SectionHeader(Elf32_Half id) const {
assert(id >= 0 && id < SectionHeaderCount());
return section_header_table_ + id;
}
- const uint8_t* SectionBody(int id) const {
- return OffsetToPointer(SectionHeader(id)->sh_offset);
+ const uint8_t* SectionBody(Elf32_Half id) const {
+ return FileOffsetToPointer(SectionHeader(id)->sh_offset);
}
// Misc Segment Helpers
@@ -122,61 +114,62 @@ class DisassemblerElf32 : public Disassembler {
return program_header_table_size_;
}
- const Elf32_Phdr *ProgramSegmentHeader(int id) const {
+ const Elf32_Phdr* ProgramSegmentHeader(Elf32_Half id) const {
assert(id >= 0 && id < ProgramSegmentHeaderCount());
return program_header_table_ + id;
}
// Misc address space helpers
- CheckBool IsValidRVA(RVA rva) const WARN_UNUSED_RESULT;
+ CheckBool IsValidTargetRVA(RVA rva) const WARN_UNUSED_RESULT;
- // Convert an ELF relocation struction into an RVA
+ // Converts an ELF relocation instruction into an RVA.
virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
const WARN_UNUSED_RESULT = 0;
- // Returns kNoOffset if there is no file offset corresponding to 'rva'.
- CheckBool RVAToFileOffset(RVA rva, size_t* result) const WARN_UNUSED_RESULT;
+ CheckBool RVAsToFileOffsets(const std::vector<RVA>& rvas,
+ std::vector<FileOffset>* file_offsets);
- RVA FileOffsetToRVA(size_t offset) const WARN_UNUSED_RESULT;
+ CheckBool RVAsToFileOffsets(ScopedVector<TypedRVA>* typed_rvas);
- CheckBool RVAsToOffsets(std::vector<RVA>* rvas /*in*/,
- std::vector<size_t>* offsets /*out*/);
+ // Parsing code for Disassemble().
- CheckBool RVAsToOffsets(ScopedVector<TypedRVA>* rvas /*in and out*/);
+ virtual CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
+ AssemblyProgram* program)
+ WARN_UNUSED_RESULT = 0;
- // Parsing Code used to really implement Disassemble
+ virtual CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
+ WARN_UNUSED_RESULT = 0;
CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT;
- virtual CheckBool ParseRelocationSection(
- const Elf32_Shdr *section_header,
- AssemblyProgram* program) WARN_UNUSED_RESULT = 0;
+
CheckBool ParseProgbitsSection(
- const Elf32_Shdr *section_header,
- std::vector<size_t>::iterator* current_abs_offset,
- std::vector<size_t>::iterator end_abs_offset,
+ const Elf32_Shdr* section_header,
+ std::vector<FileOffset>::iterator* current_abs_offset,
+ std::vector<FileOffset>::iterator end_abs_offset,
ScopedVector<TypedRVA>::iterator* current_rel,
ScopedVector<TypedRVA>::iterator end_rel,
AssemblyProgram* program) WARN_UNUSED_RESULT;
- CheckBool ParseSimpleRegion(size_t start_file_offset,
- size_t end_file_offset,
+
+ CheckBool ParseSimpleRegion(FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
CheckBool ParseAbs32Relocs() WARN_UNUSED_RESULT;
+
CheckBool CheckSection(RVA rva) WARN_UNUSED_RESULT;
+
CheckBool ParseRel32RelocsFromSections() WARN_UNUSED_RESULT;
- virtual CheckBool ParseRel32RelocsFromSection(
- const Elf32_Shdr* section) WARN_UNUSED_RESULT = 0;
- Elf32_Ehdr *header_;
- Elf32_Shdr *section_header_table_;
+ const Elf32_Ehdr* header_;
+ const Elf32_Shdr* section_header_table_;
Elf32_Half section_header_table_size_;
- Elf32_Phdr *program_header_table_;
+ const Elf32_Phdr* program_header_table_;
Elf32_Half program_header_table_size_;
// Section header for default
- const char *default_string_section_;
+ const char* default_string_section_;
std::vector<RVA> abs32_locations_;
ScopedVector<TypedRVA> rel32_locations_;
diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc
index f6490d9..39172f4 100644
--- a/courgette/disassembler_elf_32_arm.cc
+++ b/courgette/disassembler_elf_32_arm.cc
@@ -4,18 +4,12 @@
#include "courgette/disassembler_elf_32_arm.h"
-#include <stddef.h>
-#include <stdint.h>
-
-#include <algorithm>
-#include <string>
#include <vector>
#include "base/logging.h"
-
+#include "base/memory/scoped_ptr.h"
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
namespace courgette {
@@ -24,31 +18,34 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
RVA rva,
uint16_t* c_op,
uint32_t* addr) {
- // This method takes an ARM or thumb opcode, extracts the relative
- // target address from it (addr), and creates a corresponding
- // Courgette opcode (c_op).
- //
- // Details on ARM the opcodes, and how the relative targets are
- // computed were taken from the "ARM Architecture Reference Manual",
- // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
- // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
+ // Notation for bit ranges in comments:
+ // - Listing bits from highest to lowest.
+ // - A-Z or (j1), (j2), etc.: single bit in source.
+ // - a-z: multiple, consecutive bits in source.
switch (type) {
case ARM_OFF8: {
- // The offset is given by lower 8 bits of the op. It is a 9-bit
- // offset, shifted right one bit and signed extended.
+ // Encoding T1.
+ // The offset is given by lower 8 bits of the op. It is a 9-bit offset,
+ // shifted right 1 bit, and signed extended.
+ // arm_op = aaaaaaaa Snnnnnnn
+ // *addr := SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100
+ // *c_op := 00010000 aaaaaaaa
uint32_t temp = (arm_op & 0x00FF) << 1;
if (temp & 0x0100)
temp |= 0xFFFFFE00;
temp += 4; // Offset from _next_ PC.
- fflush(stdout);
(*addr) = temp;
(*c_op) = static_cast<uint16_t>(arm_op >> 8) | 0x1000;
break;
}
case ARM_OFF11: {
- // The offset is given by lower 11 bits of the op, and is a
- // 12-bit offset, shifted right one bit and sign extended.
+ // Encoding T2.
+ // The offset is given by lower 11 bits of the op, and is a 12-bit offset,
+ // shifted right 1 bit, and sign extended.
+ // arm_op = aaaaaSnn nnnnnnnn
+ // *addr := SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100
+ // *c_op := 00100000 000aaaaa
uint32_t temp = (arm_op & 0x07FF) << 1;
if (temp & 0x00000800)
temp |= 0xFFFFF000;
@@ -61,6 +58,9 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
case ARM_OFF24: {
// The offset is given by the lower 24-bits of the op, shifted
// left 2 bits, and sign extended.
+ // arm_op = aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn
+ // *addr := SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000
+ // *c_op := 00110000 aaaaaaaa
uint32_t temp = (arm_op & 0x00FFFFFF) << 2;
if (temp & 0x02000000)
temp |= 0xFC000000;
@@ -71,6 +71,18 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
break;
}
case ARM_OFF25: {
+ // Encoding T4.
+ // arm_op = aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn
+ // where CD is in {01, 10, 11}
+ // i1 := ~(j1 ^ S)
+ // i2 := ~(j2 ^ S)
+ // If CD == 10:
+ // pppp := (rva % 4 == 0) ? 0100 : 0010
+ // Else:
+ // pppp := 0100
+ // *addr := SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp
+ // *c_op := 0100pppp aaaaaBCD
+ // TODO(huangs): aaaaa = 11110 and B = 1 always? Investigate and fix.
uint32_t temp = 0;
temp |= (arm_op & 0x000007FF) << 1; // imm11
temp |= (arm_op & 0x03FF0000) >> 4; // imm10
@@ -78,8 +90,8 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
uint32_t S = (arm_op & (1 << 26)) >> 26;
uint32_t j2 = (arm_op & (1 << 11)) >> 11;
uint32_t j1 = (arm_op & (1 << 13)) >> 13;
- bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
- bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
+ bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; // D
+ bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; // C
uint32_t i2 = ~(j2 ^ S) & 1;
uint32_t i1 = ~(j1 ^ S) & 1;
@@ -91,7 +103,7 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
temp |= 0xFE000000;
uint32_t prefetch;
if (toARM) {
- // Align PC on 4-byte boundary
+ // Align PC on 4-byte boundary.
uint32_t align4byte = (rva % 4) ? 2 : 4;
prefetch = align4byte;
} else {
@@ -101,20 +113,25 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
(*addr) = temp;
uint32_t temp2 = 0x4000;
- temp2 |= (arm_op & (1 << 12)) >> 12;
- temp2 |= (arm_op & (1 << 14)) >> 13;
- temp2 |= (arm_op & (1 << 15)) >> 13;
- temp2 |= (arm_op & 0xF8000000) >> 24;
+ temp2 |= (arm_op & (1 << 12)) >> 12; // .......D
+ temp2 |= (arm_op & (1 << 14)) >> 13; // ......C.
+ temp2 |= (arm_op & (1 << 15)) >> 13; // .....B..
+ temp2 |= (arm_op & 0xF8000000) >> 24; // aaaaa...
temp2 |= (prefetch & 0x0000000F) << 8;
(*c_op) = static_cast<uint16_t>(temp2);
break;
}
case ARM_OFF21: {
+ // Encoding T3.
+ // arm_op = 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn
+ // *addr := SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100
+ // *c_op := 01010000 0000cccc
uint32_t temp = 0;
temp |= (arm_op & 0x000007FF) << 1; // imm11
temp |= (arm_op & 0x003F0000) >> 4; // imm6
uint32_t S = (arm_op & (1 << 26)) >> 26;
+ // TODO(huangs): Check with docs: Perhaps j1, j2 should swap?
uint32_t j2 = (arm_op & (1 << 11)) >> 11;
uint32_t j1 = (arm_op & (1 << 13)) >> 13;
@@ -140,20 +157,31 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type,
uint16_t c_op,
uint32_t addr,
uint32_t* arm_op) {
- // Reverses the process in the compress() method. Takes the
- // Courgette op and relative address and reconstructs the original
- // ARM or thumb op.
switch (type) {
case ARM_OFF8:
+ // addr = SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100
+ // c_op = 00010000 aaaaaaaa
+ // *arm_op := aaaaaaaa Snnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
break;
case ARM_OFF11:
+ // addr = SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100
+ // c_op = 00100000 000aaaaa
+ // *arm_op := aaaaaSnn nnnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
break;
case ARM_OFF24:
+ // addr = SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000
+ // c_op = 00110000 aaaaaaaa
+ // *arm_op := aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
break;
case ARM_OFF25: {
+ // addr = SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp
+ // c_op = 0100pppp aaaaaBCD
+ // j1 := ~i1 ^ S
+ // j2 := ~i2 ^ S
+ // *arm_op := aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn
uint32_t temp = 0;
temp |= (c_op & (1 << 0)) << 12;
temp |= (c_op & (1 << 1)) << 13;
@@ -183,6 +211,9 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type,
break;
}
case ARM_OFF21: {
+ // addr = SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100
+ // c_op = 01010000 0000cccc
+ // *arm_op := 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn
uint32_t temp = 0xF0008000;
temp |= (c_op & (0x03C00000 >> 22)) << 22;
@@ -230,24 +261,28 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
const uint8_t* op_pointer) {
arm_op_ = op_pointer;
switch (type_) {
- case ARM_OFF8:
- // Fall through
+ case ARM_OFF8: // Falls through.
case ARM_OFF11: {
RVA relative_target;
- CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
- &c_op_, &relative_target);
+ CheckBool ret = Compress(type_,
+ Read16LittleEndian(op_pointer),
+ rva(),
+ &c_op_,
+ &relative_target);
set_relative_target(relative_target);
return ret;
}
case ARM_OFF24: {
RVA relative_target;
- CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
- &c_op_, &relative_target);
+ CheckBool ret = Compress(type_,
+ Read32LittleEndian(op_pointer),
+ rva(),
+ &c_op_,
+ &relative_target);
set_relative_target(relative_target);
return ret;
}
- case ARM_OFF25:
- // Fall through
+ case ARM_OFF25: // Falls through.
case ARM_OFF21: {
// A thumb-2 op is 32 bits stored as two 16-bit words
uint32_t pval = (Read16LittleEndian(op_pointer) << 16) |
@@ -272,26 +307,24 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
}
DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
- : DisassemblerElf32(start, length) {
+ : DisassemblerElf32(start, length) {
}
-// Convert an ELF relocation struction into an RVA
+// Convert an ELF relocation struction into an RVA.
CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
-
- // The rightmost byte of r_info is the type...
+ // The rightmost byte of r_info is the type.
elf32_rel_arm_type_values type =
- (elf32_rel_arm_type_values)(unsigned char)rel.r_info;
+ static_cast<elf32_rel_arm_type_values>(rel.r_info & 0xFF);
- // The other 3 bytes of r_info are the symbol
+ // The other 3 bytes of r_info are the symbol.
uint32_t symbol = rel.r_info >> 8;
- switch(type)
- {
+ switch (type) {
case R_ARM_RELATIVE:
if (symbol != 0)
return false;
- // This is a basic ABS32 relocation address
+ // This is a basic ABS32 relocation address.
*result = rel.r_offset;
return true;
@@ -301,32 +334,33 @@ CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
}
CheckBool DisassemblerElf32ARM::ParseRelocationSection(
- const Elf32_Shdr *section_header,
- AssemblyProgram* program) {
- // This method compresses a contiguous stretch of R_ARM_RELATIVE
- // entries in the relocation table with a Courgette relocation table
- // instruction. It skips any entries at the beginning that appear
- // in a section that Courgette doesn't support, e.g. INIT.
+ const Elf32_Shdr* section_header,
+ AssemblyProgram* program) {
+ // This method compresses a contiguous stretch of R_ARM_RELATIVE entries in
+ // the relocation table with a Courgette relocation table instruction.
+ // It skips any entries at the beginning that appear in a section that
+ // Courgette doesn't support, e.g. INIT.
+ //
// Specifically, the entries should be
// (1) In the same relocation table
// (2) Are consecutive
// (3) Are sorted in memory address order
//
- // Happily, this is normally the case, but it's not required by spec
- // so we check, and just don't do it if we don't match up.
+ // Happily, this is normally the case, but it's not required by spec so we
+ // check, and just don't do it if we don't match up.
//
- // The expectation is that one relocation section will contain
- // all of our R_ARM_RELATIVE entries in the expected order followed
- // by assorted other entries we can't use special handling for.
+ // The expectation is that one relocation section will contain all of our
+ // R_ARM_RELATIVE entries in the expected order followed by assorted other
+ // entries we can't use special handling for.
bool match = true;
- // Walk all the bytes in the section, matching relocation table or not
- size_t file_offset = section_header->sh_offset;
- size_t section_end = section_header->sh_offset + section_header->sh_size;
+ // Walk all the bytes in the section, matching relocation table or not.
+ FileOffset file_offset = section_header->sh_offset;
+ FileOffset section_end = section_header->sh_offset + section_header->sh_size;
- Elf32_Rel *section_relocs_iter =
- (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
+ const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>(
+ FileOffsetToPointer(section_header->sh_offset));
uint32_t section_relocs_count =
section_header->sh_size / section_header->sh_entsize;
@@ -337,13 +371,15 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
if (!abs32_locations_.empty()) {
std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
- for (uint32_t i = 0; i < section_relocs_count; i++) {
+ for (uint32_t i = 0; i < section_relocs_count; ++i) {
if (section_relocs_iter->r_offset == *reloc_iter)
break;
- if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel),
- program))
+ if (!ParseSimpleRegion(file_offset,
+ file_offset + sizeof(Elf32_Rel),
+ program)) {
return false;
+ }
file_offset += sizeof(Elf32_Rel);
++section_relocs_iter;
@@ -351,11 +387,12 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
while (match && (reloc_iter != abs32_locations_.end())) {
if (section_relocs_iter->r_info != R_ARM_RELATIVE ||
- section_relocs_iter->r_offset != *reloc_iter)
+ section_relocs_iter->r_offset != *reloc_iter) {
match = false;
+ }
- section_relocs_iter++;
- reloc_iter++;
+ ++section_relocs_iter;
+ ++reloc_iter;
file_offset += sizeof(Elf32_Rel);
}
@@ -369,118 +406,119 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
+// TODO(huangs): Detect and avoid overlap with abs32 addresses.
CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
const Elf32_Shdr* section_header) {
- uint32_t start_file_offset = section_header->sh_offset;
- uint32_t end_file_offset = start_file_offset + section_header->sh_size;
+ FileOffset start_file_offset = section_header->sh_offset;
+ FileOffset end_file_offset = start_file_offset + section_header->sh_size;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract 'pointer_to_rva'.
+ // subtract |pointer_to_rva|.
const uint8_t* const adjust_pointer_to_rva =
start_pointer - section_header->sh_addr;
// Find the rel32 relocations.
const uint8_t* p = start_pointer;
- bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
+ bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
while (p < end_pointer) {
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
-
- TypedRVAARM* rel32_rva = NULL;
+ scoped_ptr<TypedRVAARM> rel32_rva;
RVA target_rva = 0;
bool found = false;
// 16-bit thumb ops
- if (!found && (p + 3) <= end_pointer) {
+ if (!found && p + 3 <= end_pointer) {
uint16_t pval = Read16LittleEndian(p);
if ((pval & 0xF000) == 0xD000) {
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
- if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
+ rel32_rva.reset(new TypedRVAARM(ARM_OFF8, rva));
+ if (!rel32_rva->ComputeRelativeTarget(p))
return false;
- }
+
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
} else if ((pval & 0xF800) == 0xE000) {
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
- if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
+ rel32_rva.reset(new TypedRVAARM(ARM_OFF11, rva));
+ if (!rel32_rva->ComputeRelativeTarget(p))
return false;
- }
+
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
- // thumb-2 ops comprised of two 16-bit words
- if (!found && (p + 5) <= end_pointer) {
+ // thumb-2 ops comprised of two 16-bit words.
+ if (!found && p + 5 <= end_pointer) {
// This is really two 16-bit words, not one 32-bit word.
uint32_t pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
if ((pval & 0xF8008000) == 0xF0008000) {
// Covers thumb-2's 32-bit conditional/unconditional branches
-
- if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
+ if ((pval & (1 << 14)) || (pval & (1 << 12))) {
// A branch, with link, or with link and exchange.
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
- if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
+ rel32_rva.reset(new TypedRVAARM(ARM_OFF25, rva));
+ if (!rel32_rva->ComputeRelativeTarget(p))
return false;
- }
+
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
+
} else {
// TODO(paulgazz) make sure cond is not 111
// A conditional branch instruction
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
- if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
+ rel32_rva.reset(new TypedRVAARM(ARM_OFF21, rva));
+ if (!rel32_rva->ComputeRelativeTarget(p))
return false;
- }
+
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
}
- // 32-bit ARM ops
+ // 32-bit ARM ops.
if (!found && on_32bit && (p + 5) <= end_pointer) {
uint32_t pval = Read32LittleEndian(p);
if ((pval & 0x0E000000) == 0x0A000000) {
// Covers both 0x0A 0x0B ARM relative branches
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
- if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
+ rel32_rva.reset(new TypedRVAARM(ARM_OFF24, rva));
+ if (!rel32_rva->ComputeRelativeTarget(p))
return false;
- }
+
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
- if (found && IsValidRVA(target_rva)) {
- rel32_locations_.push_back(rel32_rva);
+ if (found && IsValidTargetRVA(target_rva)) {
+ uint16_t op_size = rel32_rva->op_size();
+ rel32_locations_.push_back(rel32_rva.release());
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
#endif
- p += rel32_rva->op_size();
+ p += op_size;
- // A tricky way to update the on_32bit flag. Here is the truth table:
+ // A tricky way to update the on_32bit flag. Here is the truth table:
// on_32bit | on_32bit size is 4
// ---------+---------------------
// 1 | 0 0
// 0 | 0 1
// 0 | 1 0
// 1 | 1 1
- on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
+ on_32bit = (~(on_32bit ^ (op_size == 4))) != 0;
} else {
// Move 2 bytes at a time, but track 32-bit boundaries
p += 2;
diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h
index 17ebb25..5dc6897 100644
--- a/courgette/disassembler_elf_32_arm.h
+++ b/courgette/disassembler_elf_32_arm.h
@@ -8,9 +8,10 @@
#include <stddef.h>
#include <stdint.h>
+#include <map>
+
#include "base/macros.h"
#include "courgette/disassembler_elf_32.h"
-#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
namespace courgette {
@@ -30,51 +31,60 @@ class DisassemblerElf32ARM : public DisassemblerElf32 {
class TypedRVAARM : public TypedRVA {
public:
TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { }
+ ~TypedRVAARM() override { }
- uint16_t c_op() const { return c_op_; }
-
- virtual CheckBool ComputeRelativeTarget(const uint8_t* op_pointer);
+ // TypedRVA interfaces.
+ CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override;
+ CheckBool EmitInstruction(AssemblyProgram* program,
+ RVA target_rva) override;
+ uint16_t op_size() const override;
- virtual CheckBool EmitInstruction(AssemblyProgram* program,
- RVA target_rva);
-
- virtual uint16_t op_size() const;
+ uint16_t c_op() const { return c_op_; }
private:
ARM_RVA type_;
-
- uint16_t c_op_; // set by ComputeRelativeTarget()
+ uint16_t c_op_; // Set by ComputeRelativeTarget().
const uint8_t* arm_op_;
};
- explicit DisassemblerElf32ARM(const void* start, size_t length);
+ DisassemblerElf32ARM(const void* start, size_t length);
- virtual ExecutableType kind() { return EXE_ELF_32_ARM; }
+ ~DisassemblerElf32ARM() override { }
- virtual e_machine_values ElfEM() { return EM_ARM; }
+ // DisassemblerElf32 interfaces.
+ ExecutableType kind() const override { return EXE_ELF_32_ARM; }
+ e_machine_values ElfEM() const override { return EM_ARM; }
+ // Takes an ARM or thumb opcode |arm_op| of specified |type| and located at
+ // |rva|, extracts the instruction-relative target RVA into |*addr| and
+ // encodes the corresponding Courgette opcode as |*c_op|.
+ //
+ // Details on ARM opcodes, and target RVA extraction are taken from
+ // "ARM Architecture Reference Manual", section A4.1.5 and
+ // "Thumb-2 supplement", section 4.6.12.
+ // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
static CheckBool Compress(ARM_RVA type,
uint32_t arm_op,
RVA rva,
uint16_t* c_op /* out */,
uint32_t* addr /* out */);
+ // Inverse for Compress(). Takes Courgette op |c_op| and relative address
+ // |addr| to reconstruct the original ARM or thumb op |*arm_op|.
static CheckBool Decompress(ARM_RVA type,
uint16_t c_op,
uint32_t addr,
uint32_t* arm_op /* out */);
protected:
-
- virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
- const WARN_UNUSED_RESULT;
-
- virtual CheckBool ParseRelocationSection(
- const Elf32_Shdr *section_header,
- AssemblyProgram* program) WARN_UNUSED_RESULT;
-
- virtual CheckBool ParseRel32RelocsFromSection(
- const Elf32_Shdr* section) WARN_UNUSED_RESULT;
+ // DisassemblerElf32 interfaces.
+ CheckBool RelToRVA(Elf32_Rel rel,
+ RVA* result) const override WARN_UNUSED_RESULT;
+ CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
+ AssemblyProgram* program)
+ override WARN_UNUSED_RESULT;
+ CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
+ override WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
std::map<RVA, int> rel32_target_rvas_;
diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc
index 98084c1..45f7cf6 100644
--- a/courgette/disassembler_elf_32_x86.cc
+++ b/courgette/disassembler_elf_32_x86.cc
@@ -4,37 +4,45 @@
#include "courgette/disassembler_elf_32_x86.h"
-#include <stddef.h>
-#include <stdint.h>
-
-#include <algorithm>
-#include <string>
#include <vector>
#include "base/logging.h"
-
+#include "base/memory/scoped_ptr.h"
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
namespace courgette {
+CheckBool DisassemblerElf32X86::TypedRVAX86::ComputeRelativeTarget(
+ const uint8_t* op_pointer) {
+ set_relative_target(Read32LittleEndian(op_pointer) + 4);
+ return true;
+}
+
+CheckBool DisassemblerElf32X86::TypedRVAX86::EmitInstruction(
+ AssemblyProgram* program,
+ RVA target_rva) {
+ return program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
+}
+
+uint16_t DisassemblerElf32X86::TypedRVAX86::op_size() const {
+ return 4;
+}
+
DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length)
- : DisassemblerElf32(start, length) {
+ : DisassemblerElf32(start, length) {
}
-// Convert an ELF relocation struction into an RVA
+// Convert an ELF relocation struction into an RVA.
CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
-
- // The rightmost byte of r_info is the type...
+ // The rightmost byte of r_info is the type.
elf32_rel_386_type_values type =
- (elf32_rel_386_type_values)(unsigned char)rel.r_info;
+ static_cast<elf32_rel_386_type_values>(rel.r_info & 0xFF);
- // The other 3 bytes of r_info are the symbol
+ // The other 3 bytes of r_info are the symbol.
uint32_t symbol = rel.r_info >> 8;
- switch(type)
- {
+ switch (type) {
case R_386_NONE:
case R_386_32:
case R_386_PC32:
@@ -49,7 +57,7 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
if (symbol != 0)
return false;
- // This is a basic ABS32 relocation address
+ // This is a basic ABS32 relocation address.
*result = rel.r_offset;
return true;
@@ -63,32 +71,31 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
}
CheckBool DisassemblerElf32X86::ParseRelocationSection(
- const Elf32_Shdr *section_header,
- AssemblyProgram* program) {
- // We can reproduce the R_386_RELATIVE entries in one of the relocation
- // table based on other information in the patch, given these
- // conditions....
+ const Elf32_Shdr* section_header,
+ AssemblyProgram* program) {
+ // We can reproduce the R_386_RELATIVE entries in one of the relocation table
+ // based on other information in the patch, given these conditions:
//
// All R_386_RELATIVE entries are:
// 1) In the same relocation table
// 2) Are consecutive
// 3) Are sorted in memory address order
//
- // Happily, this is normally the case, but it's not required by spec
- // so we check, and just don't do it if we don't match up.
+ // Happily, this is normally the case, but it's not required by spec, so we
+ // check, and just don't do it if we don't match up.
- // The expectation is that one relocation section will contain
- // all of our R_386_RELATIVE entries in the expected order followed
- // by assorted other entries we can't use special handling for.
+ // The expectation is that one relocation section will contain all of our
+ // R_386_RELATIVE entries in the expected order followed by assorted other
+ // entries we can't use special handling for.
bool match = true;
- // Walk all the bytes in the section, matching relocation table or not
- size_t file_offset = section_header->sh_offset;
- size_t section_end = section_header->sh_offset + section_header->sh_size;
+ // Walk all the bytes in the section, matching relocation table or not.
+ FileOffset file_offset = section_header->sh_offset;
+ FileOffset section_end = file_offset + section_header->sh_size;
- Elf32_Rel *section_relocs_iter =
- (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
+ const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>(
+ FileOffsetToPointer(section_header->sh_offset));
uint32_t section_relocs_count =
section_header->sh_size / section_header->sh_entsize;
@@ -101,16 +108,17 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection(
std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
- while (match && (reloc_iter != abs32_locations_.end())) {
+ while (match && (reloc_iter != abs32_locations_.end())) {
if (section_relocs_iter->r_info != R_386_RELATIVE ||
- section_relocs_iter->r_offset != *reloc_iter)
+ section_relocs_iter->r_offset != *reloc_iter) {
match = false;
- section_relocs_iter++;
- reloc_iter++;
+ }
+ ++section_relocs_iter;
+ ++reloc_iter;
}
if (match) {
- // Skip over relocation tables
+ // Skip over relocation tables.
if (!program->EmitElfRelocationInstruction())
return false;
file_offset += sizeof(Elf32_Rel) * abs32_locations_.size();
@@ -119,28 +127,27 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
+// TODO(huangs): Detect and avoid overlap with abs32 addresses.
CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
const Elf32_Shdr* section_header) {
- uint32_t start_file_offset = section_header->sh_offset;
- uint32_t end_file_offset = start_file_offset + section_header->sh_size;
+ FileOffset start_file_offset = section_header->sh_offset;
+ FileOffset end_file_offset = start_file_offset + section_header->sh_size;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract 'pointer_to_rva'.
+ // subtract |pointer_to_rva|.
const uint8_t* const adjust_pointer_to_rva =
start_pointer - section_header->sh_addr;
// Find the rel32 relocations.
const uint8_t* p = start_pointer;
while (p < end_pointer) {
- //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
-
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = NULL;
+ const uint8_t* rel32 = nullptr;
if (p + 5 <= end_pointer) {
if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
@@ -148,32 +155,26 @@ CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
}
}
if (p + 6 <= end_pointer) {
- if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
+ if (*p == 0x0F && (p[1] & 0xF0) == 0x80) { // Jcc long form
if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
rel32 = p + 2;
}
}
if (rel32) {
RVA rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
- TypedRVAX86* rel32_rva = new TypedRVAX86(rva);
+ scoped_ptr<TypedRVAX86> rel32_rva(new TypedRVAX86(rva));
- if (!rel32_rva->ComputeRelativeTarget(rel32)) {
- delete rel32_rva;
+ if (!rel32_rva->ComputeRelativeTarget(rel32))
return false;
- }
RVA target_rva = rel32_rva->rva() + rel32_rva->relative_target();
- // To be valid, rel32 target must be within image, and within this
- // section.
- if (IsValidRVA(target_rva)) {
- rel32_locations_.push_back(rel32_rva);
+ if (IsValidTargetRVA(target_rva)) {
+ rel32_locations_.push_back(rel32_rva.release());
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
#endif
p = rel32 + 4;
continue;
- } else {
- delete rel32_rva;
}
}
p += 1;
diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h
index 5c87d4c..63be755 100644
--- a/courgette/disassembler_elf_32_x86.h
+++ b/courgette/disassembler_elf_32_x86.h
@@ -8,9 +8,10 @@
#include <stddef.h>
#include <stdint.h>
+#include <map>
+
#include "base/macros.h"
#include "courgette/disassembler_elf_32.h"
-#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
namespace courgette {
@@ -21,38 +22,33 @@ class DisassemblerElf32X86 : public DisassemblerElf32 {
public:
class TypedRVAX86 : public TypedRVA {
public:
- explicit TypedRVAX86(RVA rva) : TypedRVA(rva) {
- }
-
- CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override {
- set_relative_target(Read32LittleEndian(op_pointer) + 4);
- return true;
- }
+ explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { }
+ ~TypedRVAX86() override { }
+ // TypedRVA interfaces.
+ CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override;
CheckBool EmitInstruction(AssemblyProgram* program,
- RVA target_rva) override {
- return program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
- }
-
- uint16_t op_size() const override { return 4; }
+ RVA target_rva) override;
+ uint16_t op_size() const override;
};
- explicit DisassemblerElf32X86(const void* start, size_t length);
+ DisassemblerElf32X86(const void* start, size_t length);
- virtual ExecutableType kind() { return EXE_ELF_32_X86; }
+ ~DisassemblerElf32X86() override { }
- virtual e_machine_values ElfEM() { return EM_386; }
+ // DisassemblerElf32 interfaces.
+ ExecutableType kind() const override { return EXE_ELF_32_X86; }
+ e_machine_values ElfEM() const override { return EM_386; }
protected:
- virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
- const WARN_UNUSED_RESULT;
-
- virtual CheckBool ParseRelocationSection(
- const Elf32_Shdr *section_header,
- AssemblyProgram* program) WARN_UNUSED_RESULT;
-
- virtual CheckBool ParseRel32RelocsFromSection(
- const Elf32_Shdr* section) WARN_UNUSED_RESULT;
+ // DisassemblerElf32 interfaces.
+ CheckBool RelToRVA(Elf32_Rel rel,
+ RVA* result) const override WARN_UNUSED_RESULT;
+ CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
+ AssemblyProgram* program)
+ override WARN_UNUSED_RESULT;
+ CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
+ override WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
std::map<RVA, int> rel32_target_rvas_;
diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc
index 3ce6a63..c15b8df 100644
--- a/courgette/disassembler_elf_32_x86_unittest.cc
+++ b/courgette/disassembler_elf_32_x86_unittest.cc
@@ -2,16 +2,25 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "courgette/disassembler_elf_32_x86.h"
+
#include <stddef.h>
#include <stdint.h>
+#include <algorithm>
+#include <string>
+
+#include "base/memory/scoped_ptr.h"
#include "courgette/assembly_program.h"
#include "courgette/base_test_unittest.h"
-#include "courgette/disassembler_elf_32_x86.h"
+#include "courgette/image_utils.h"
+
+namespace courgette {
+
+namespace {
class DisassemblerElf32X86Test : public BaseTest {
public:
-
void TestExe(const char* file_name,
size_t expected_abs_count,
size_t expected_rel_count) const;
@@ -20,10 +29,11 @@ class DisassemblerElf32X86Test : public BaseTest {
void DisassemblerElf32X86Test::TestExe(const char* file_name,
size_t expected_abs_count,
size_t expected_rel_count) const {
+ using TypedRVA = DisassemblerElf32::TypedRVA;
std::string file1 = FileContents(file_name);
- scoped_ptr<courgette::DisassemblerElf32X86> disassembler(
- new courgette::DisassemblerElf32X86(file1.c_str(), file1.length()));
+ scoped_ptr<DisassemblerElf32X86> disassembler(
+ new DisassemblerElf32X86(file1.c_str(), file1.length()));
bool can_parse_header = disassembler->ParseHeader();
EXPECT_TRUE(can_parse_header);
@@ -33,7 +43,7 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
// real file, since trailing debug info is not included
EXPECT_EQ(file1.length(), disassembler->length());
- const uint8_t* offset_p = disassembler->OffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ(0x7F, offset_p[0]);
@@ -41,46 +51,45 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
EXPECT_EQ('L', offset_p[2]);
EXPECT_EQ('F', offset_p[3]);
- courgette::AssemblyProgram* program =
- new courgette::AssemblyProgram(courgette::EXE_ELF_32_X86);
-
- EXPECT_TRUE(disassembler->Disassemble(program));
-
- EXPECT_EQ(disassembler->Abs32Locations().size(), expected_abs_count);
- EXPECT_EQ(disassembler->Rel32Locations().size(), expected_rel_count);
-
- // Prove that none of the rel32 RVAs overlap with abs32 RVAs
- std::set<courgette::RVA> abs(disassembler->Abs32Locations().begin(),
- disassembler->Abs32Locations().end());
- std::set<courgette::DisassemblerElf32::TypedRVA*>
- rel(disassembler->Rel32Locations().begin(),
- disassembler->Rel32Locations().end());
- for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator
- rel32 = disassembler->Rel32Locations().begin();
- rel32 != disassembler->Rel32Locations().end();
- rel32++) {
- EXPECT_TRUE(abs.find((*rel32)->rva()) == abs.end());
- }
+ scoped_ptr<AssemblyProgram> program(new AssemblyProgram(EXE_ELF_32_X86));
- for (std::vector<courgette::RVA>::iterator abs32 =
- disassembler->Abs32Locations().begin();
- abs32 != disassembler->Abs32Locations().end();
- abs32++) {
- bool found = false;
- for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator
- rel32 = disassembler->Rel32Locations().begin();
- rel32 != disassembler->Rel32Locations().end();
- rel32++) {
- if (*abs32 == (*rel32)->rva()) {
- found = true;
- break;
- }
+ EXPECT_TRUE(disassembler->Disassemble(program.get()));
+
+ const std::vector<RVA>& abs32_list = disassembler->Abs32Locations();
+
+ // Flatten the list typed rel32 to a list of rel32 RVAs.
+ std::vector<RVA> rel32_list;
+ rel32_list.reserve(disassembler->Rel32Locations().size());
+ for (TypedRVA* typed_rel32 : disassembler->Rel32Locations())
+ rel32_list.push_back(typed_rel32->rva());
+
+ EXPECT_EQ(expected_abs_count, abs32_list.size());
+ EXPECT_EQ(expected_rel_count, rel32_list.size());
+
+ EXPECT_TRUE(std::is_sorted(abs32_list.begin(), abs32_list.end()));
+ EXPECT_TRUE(std::is_sorted(rel32_list.begin(), rel32_list.end()));
+
+ // Verify that rel32 RVAs do not overlap with abs32 RVAs.
+ // TODO(huangs): Fix this to account for RVA's 4-byte width.
+ bool found_match = false;
+ std::vector<RVA>::const_iterator abs32_it = abs32_list.begin();
+ std::vector<RVA>::const_iterator rel32_it = rel32_list.begin();
+ while (abs32_it != abs32_list.end() && rel32_it != rel32_list.end()) {
+ if (*abs32_it < *rel32_it) {
+ ++abs32_it;
+ } else if (*abs32_it > *rel32_it) {
+ ++rel32_it;
+ } else {
+ found_match = true;
}
- EXPECT_TRUE(!found);
}
- delete program;
+ EXPECT_FALSE(found_match);
}
+} // namespace
+
TEST_F(DisassemblerElf32X86Test, All) {
TestExe("elf-32-1", 200, 3442);
}
+
+} // namespace courgette
diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc
index 74b0fe4..819b7f2 100644
--- a/courgette/disassembler_win32_x64.cc
+++ b/courgette/disassembler_win32_x64.cc
@@ -8,37 +8,73 @@
#include <stdint.h>
#include <algorithm>
-#include <string>
-#include <vector>
+#include <iostream>
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
-
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
namespace courgette {
DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length)
- : Disassembler(start, length),
- incomplete_disassembly_(false),
- is_PE32_plus_(false),
- optional_header_(NULL),
- size_of_optional_header_(0),
- offset_of_data_directories_(0),
- machine_type_(0),
- number_of_sections_(0),
- sections_(NULL),
- has_text_section_(false),
- size_of_code_(0),
- size_of_initialized_data_(0),
- size_of_uninitialized_data_(0),
- base_of_code_(0),
- base_of_data_(0),
- image_base_(0),
- size_of_image_(0),
- number_of_data_directories_(0) {
+ : Disassembler(start, length),
+ incomplete_disassembly_(false),
+ is_PE32_plus_(false),
+ optional_header_(nullptr),
+ size_of_optional_header_(0),
+ offset_of_data_directories_(0),
+ machine_type_(0),
+ number_of_sections_(0),
+ sections_(nullptr),
+ has_text_section_(false),
+ size_of_code_(0),
+ size_of_initialized_data_(0),
+ size_of_uninitialized_data_(0),
+ base_of_code_(0),
+ base_of_data_(0),
+ image_base_(0),
+ size_of_image_(0),
+ number_of_data_directories_(0) {
+}
+
+FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ if (section != nullptr) {
+ FileOffset offset_in_section = rva - section->virtual_address;
+ // Need this extra check, since an |rva| may be valid for a section, but is
+ // non-existent in an image (e.g. uninit data).
+ if (offset_in_section >= section->size_of_raw_data)
+ return kNoFileOffset;
+
+ return static_cast<FileOffset>(section->file_offset_of_raw_data +
+ offset_in_section);
+ }
+
+ // Small RVA values point into the file header in the loaded image.
+ // RVA 0 is the module load address which Windows uses as the module handle.
+ // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+ // DOS header.
+ if (rva == 0 || rva == 2)
+ return static_cast<FileOffset>(rva);
+
+ NOTREACHED();
+ return kNoFileOffset;
+}
+
+RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
+ for (int i = 0; i < number_of_sections_; ++i) {
+ const Section* section = &sections_[i];
+ if (file_offset >= section->file_offset_of_raw_data) {
+ FileOffset offset_in_section =
+ file_offset - section->file_offset_of_raw_data;
+ if (offset_in_section < section->size_of_raw_data)
+ return static_cast<RVA>(section->virtual_address + offset_in_section);
+ }
+ }
+
+ NOTREACHED();
+ return kNoRVA;
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -57,18 +93,19 @@ bool DisassemblerWin32X64::ParseHeader() {
return Bad("Not MZ");
// offset from DOS header to PE header is stored in DOS header.
- uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader);
+ FileOffset file_offset = static_cast<FileOffset>(
+ ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader));
- if (offset >= length())
+ if (file_offset >= length())
return Bad("Bad offset to PE header");
- const uint8_t* const pe_header = OffsetToPointer(offset);
+ const uint8_t* const pe_header = FileOffsetToPointer(file_offset);
const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
if (pe_header <= start() ||
pe_header >= end() - kMinPEHeaderSize)
- return Bad("Bad offset to PE header");
+ return Bad("Bad file offset to PE header");
- if (offset % 8 != 0)
+ if (file_offset % 8 != 0)
return Bad("Misaligned PE header");
// The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
@@ -169,7 +206,7 @@ bool DisassemblerWin32X64::ParseHeader() {
size_of_optional_header_);
size_t detected_length = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
// TODO(sra): consider using the 'characteristics' field of the section
@@ -267,7 +304,7 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) {
RVA rva = page_rva + offset;
// TODO(sebmarchand): Skip the relocs that live outside of the image. See
// the version of this function in disassembler_win32_x86.cc.
- if (type == 10) { // IMAGE_REL_BASED_DIR64
+ if (type == 10) { // IMAGE_REL_BASED_DIR64
relocs->push_back(rva);
} else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE
// Ignore, used as padding.
@@ -287,48 +324,19 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) {
}
const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const {
- for (int i = 0; i < number_of_sections_; i++) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
- uint32_t offset = rva - section->virtual_address;
- if (offset < section->virtual_size) {
- return section;
- }
- }
- return NULL;
-}
-
-int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
- const Section* section = RVAToSection(rva);
- if (section) {
- uint32_t offset = rva - section->virtual_address;
- if (offset < section->size_of_raw_data) {
- return section->file_offset_of_raw_data + offset;
- } else {
- return kNoOffset; // In section but not in file (e.g. uninit data).
+ if (rva >= section->virtual_address) {
+ FileOffset offset_in_section = rva - section->virtual_address;
+ if (offset_in_section < section->virtual_size)
+ return section;
}
}
-
- // Small RVA values point into the file header in the loaded image.
- // RVA 0 is the module load address which Windows uses as the module handle.
- // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
- // DOS header.
- if (rva == 0 || rva == 2)
- return rva;
-
- NOTREACHED();
- return kNoOffset;
-}
-
-const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const {
- int file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoOffset)
- return NULL;
- else
- return OffsetToPointer(file_offset);
+ return nullptr;
}
std::string DisassemblerWin32X64::SectionName(const Section* section) {
- if (section == NULL)
+ if (section == nullptr)
return "<none>";
char name[9];
memcpy(name, section->name, 8);
@@ -338,24 +346,25 @@ std::string DisassemblerWin32X64::SectionName(const Section* section) {
CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- uint32_t file_offset = 0;
+ FileOffset file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == NULL) {
- // No more sections. There should not be extra stuff following last
+ if (section == nullptr) {
+ // No more sections. There should not be extra stuff following last
// section.
// ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
break;
}
if (file_offset < section->file_offset_of_raw_data) {
- uint32_t section_start_offset = section->file_offset_of_raw_data;
- if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
- program))
+ FileOffset section_start_offset = section->file_offset_of_raw_data;
+ if (!ParseNonSectionFileRegion(file_offset, section_start_offset,
+ program)) {
return false;
+ }
file_offset = section_start_offset;
}
- uint32_t end = file_offset + section->size_of_raw_data;
+ FileOffset end = file_offset + section->size_of_raw_data;
if (!ParseFileRegion(section, file_offset, end, program))
return false;
file_offset = end;
@@ -375,7 +384,7 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
return false;
#if COURGETTE_HISTOGRAM_TARGETS
- for (size_t i = 0; i < abs32_locations_.size(); ++i) {
+ for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
@@ -386,10 +395,10 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
}
void DisassemblerWin32X64::ParseRel32RelocsFromSections() {
- uint32_t file_offset = 0;
+ FileOffset file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == NULL)
+ if (section == nullptr)
break;
if (file_offset < section->file_offset_of_raw_data)
file_offset = section->file_offset_of_raw_data;
@@ -411,11 +420,11 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() {
std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
while (abs32_iter != abs32_target_rvas_.end() &&
rel32_iter != rel32_target_rvas_.end()) {
- if (abs32_iter->first < rel32_iter->first)
+ if (abs32_iter->first < rel32_iter->first) {
++abs32_iter;
- else if (rel32_iter->first < abs32_iter->first)
+ } else if (rel32_iter->first < abs32_iter->first) {
++rel32_iter;
- else {
+ } else {
++common;
++abs32_iter;
++rel32_iter;
@@ -431,18 +440,18 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
if (!isCode)
return;
- uint32_t start_file_offset = section->file_offset_of_raw_data;
- uint32_t end_file_offset = start_file_offset + section->size_of_raw_data;
+ FileOffset start_file_offset = section->file_offset_of_raw_data;
+ FileOffset end_file_offset = start_file_offset + section->size_of_raw_data;
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract 'pointer_to_rva'.
+ // subtract |pointer_to_rva|.
const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva;
std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
@@ -459,13 +468,10 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
}
}
- //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
- // ++abs32_pos;
-
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = NULL;
+ const uint8_t* rel32 = nullptr;
bool is_rip_relative = false;
if (p + 5 <= end_pointer) {
@@ -516,7 +522,7 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
// To be valid, rel32 target must be within image, and within this
// section.
- if (IsValidRVA(target_rva) &&
+ if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA.
(is_rip_relative ||
(start_rva <= target_rva && target_rva < end_rva))) {
rel32_locations_.push_back(rel32_rva);
@@ -532,14 +538,14 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
}
CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion(
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) {
if (incomplete_disassembly_)
return true;
if (end_file_offset > start_file_offset) {
- if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset),
+ if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
end_file_offset - start_file_offset)) {
return false;
}
@@ -549,13 +555,13 @@ CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion(
}
CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section,
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) {
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
@@ -664,7 +670,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind,
size_t count = p->second.size();
std::cout << std::dec << p->first << ": " << count;
if (count <= 2) {
- for (size_t i = 0; i < count; ++i)
+ for (size_t i = 0; i < count; ++i)
std::cout << " " << DescribeRVA(p->second[i]);
}
std::cout << std::endl;
@@ -676,7 +682,6 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind,
}
#endif // COURGETTE_HISTOGRAM_TARGETS
-
// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
// that during development I'm finding I need to call it when compiled in
// Release mode. Hence:
@@ -695,12 +700,12 @@ std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const {
}
const Section* DisassemblerWin32X64::FindNextSection(
- uint32_t fileOffset) const {
+ FileOffset file_offset) const {
const Section* best = 0;
- for (int i = 0; i < number_of_sections_; i++) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (section->size_of_raw_data > 0) { // i.e. has data in file.
- if (fileOffset <= section->file_offset_of_raw_data) {
+ if (file_offset <= section->file_offset_of_raw_data) {
if (best == 0 ||
section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
best = section;
@@ -711,26 +716,15 @@ const Section* DisassemblerWin32X64::FindNextSection(
return best;
}
-RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const {
- for (int i = 0; i < number_of_sections_; i++) {
- const Section* section = &sections_[i];
- uint32_t offset = file_offset - section->file_offset_of_raw_data;
- if (offset < section->size_of_raw_data) {
- return section->virtual_address + offset;
- }
- }
- return 0;
-}
-
bool DisassemblerWin32X64::ReadDataDirectory(
int index,
ImageDataDirectory* directory) {
if (index < number_of_data_directories_) {
- size_t offset = index * 8 + offset_of_data_directories_;
- if (offset >= size_of_optional_header_)
+ FileOffset file_offset = index * 8 + offset_of_data_directories_;
+ if (file_offset >= size_of_optional_header_)
return Bad("number of data directories inconsistent");
- const uint8_t* data_directory = optional_header_ + offset;
+ const uint8_t* data_directory = optional_header_ + file_offset;
if (data_directory < start() ||
data_directory + 8 >= end())
return Bad("data directory outside image");
diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h
index 23aee66..20cfc7e 100644
--- a/courgette/disassembler_win32_x64.h
+++ b/courgette/disassembler_win32_x64.h
@@ -8,15 +8,16 @@
#include <stddef.h>
#include <stdint.h>
+#include <map>
+#include <string>
+#include <vector>
+
#include "base/macros.h"
#include "courgette/disassembler.h"
+#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_win_pe.h"
-#ifdef COURGETTE_HISTOGRAM_TARGETS
-#include <map>
-#endif
-
namespace courgette {
class AssemblyProgram;
@@ -25,19 +26,14 @@ class DisassemblerWin32X64 : public Disassembler {
public:
explicit DisassemblerWin32X64(const void* start, size_t length);
- virtual ExecutableType kind() { return EXE_WIN_32_X64; }
-
- // Returns 'true' if the buffer appears to point to a Windows 32 bit
- // executable, 'false' otherwise. If ParseHeader() succeeds, other member
- // functions may be called.
- virtual bool ParseHeader();
+ // Disassembler interfaces.
+ RVA FileOffsetToRVA(FileOffset file_offset) const override;
+ FileOffset RVAToFileOffset(RVA rva) const override;
+ ExecutableType kind() const override { return EXE_WIN_32_X64; }
+ bool ParseHeader() override;
+ bool Disassemble(AssemblyProgram* target) override;
- virtual bool Disassemble(AssemblyProgram* target);
-
- //
// Exposed for test purposes
- //
-
bool has_text_section() const { return has_text_section_; }
uint32_t size_of_code() const { return size_of_code_; }
bool is_32bit() const { return !is_PE32_plus_; }
@@ -47,17 +43,9 @@ class DisassemblerWin32X64 : public Disassembler {
// that are listed in the base relocation table.
bool ParseRelocs(std::vector<RVA> *addresses);
- // Returns Section containing the relative virtual address, or NULL if none.
+ // Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;
- static const int kNoOffset = -1;
- // Returns kNoOffset if there is no file offset corresponding to 'rva'.
- int RVAToFileOffset(RVA rva) const;
-
- // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
- // is returned if there is no file offset corresponding to 'rva'.
- const uint8_t* RVAToPointer(RVA rva) const;
-
static std::string SectionName(const Section* section);
protected:
@@ -66,62 +54,46 @@ class DisassemblerWin32X64 : public Disassembler {
void ParseRel32RelocsFromSections();
void ParseRel32RelocsFromSection(const Section* section);
- CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset,
- uint32_t end_file_offset,
+ CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program)
WARN_UNUSED_RESULT;
CheckBool ParseFileRegion(const Section* section,
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
#endif
- // Most addresses are represented as 32-bit RVAs. The one address we can't
- // do this with is the image base address. 'image_base' is valid only for
- // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
+ // Most addresses are represented as 32-bit RVAs. The one address we can't
+ // do this with is the image base address.
uint64_t image_base() const { return image_base_; }
const ImageDataDirectory& base_relocation_table() const {
return base_relocation_table_;
}
- // Subsumes rva != kUnassignedRVA.
- bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
-
- // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
+ // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
std::string DescribeRVA(RVA rva) const;
- // Finds the first section at file_offset or above. Does not return sections
+ // Finds the first section at file_offset or above. Does not return sections
// that have no raw bytes in the file.
- const Section* FindNextSection(uint32_t file_offset) const;
-
- // There are 2 'coordinate systems' for reasoning about executables.
- // FileOffset - the the offset within a single .EXE or .DLL *file*.
- // RVA - relative virtual address (offset within *loaded image*)
- // FileOffsetToRVA and RVAToFileOffset convert between these representations.
-
- RVA FileOffsetToRVA(uint32_t offset) const;
+ const Section* FindNextSection(FileOffset file_offset) const;
private:
-
bool ReadDataDirectory(int index, ImageDataDirectory* dir);
- bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
+ bool incomplete_disassembly_; // true if can omit "uninteresting" bits.
std::vector<RVA> abs32_locations_;
std::vector<RVA> rel32_locations_;
//
- // Fields that are always valid.
+ // Information that is valid after ParseHeader() succeeds.
//
-
- //
- // Information that is valid after successful ParseHeader.
- //
- bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
+ bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
// Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
const uint8_t* optional_header_;
@@ -158,9 +130,9 @@ class DisassemblerWin32X64 : public Disassembler {
std::map<RVA, int> rel32_target_rvas_;
#endif
-
DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X64);
};
} // namespace courgette
+
#endif // COURGETTE_DISASSEMBLER_WIN32_X64_H_
diff --git a/courgette/disassembler_win32_x64_unittest.cc b/courgette/disassembler_win32_x64_unittest.cc
index 8f732b3..1121c10 100644
--- a/courgette/disassembler_win32_x64_unittest.cc
+++ b/courgette/disassembler_win32_x64_unittest.cc
@@ -6,6 +6,9 @@
#include <stdint.h>
+#include <string>
+#include <vector>
+
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "courgette/base_test_unittest.h"
@@ -37,16 +40,16 @@ void DisassemblerWin32X64Test::TestExe() const {
disassembler->RVAToSection(0x00401234 - 0x00400000)),
std::string(".text"));
- EXPECT_EQ(0, disassembler->RVAToFileOffset(0));
- EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096));
- EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000));
+ EXPECT_EQ(0U, disassembler->RVAToFileOffset(0));
+ EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096));
+ EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000));
std::vector<courgette::RVA> relocs;
bool can_parse_relocs = disassembler->ParseRelocs(&relocs);
EXPECT_TRUE(can_parse_relocs);
EXPECT_TRUE(base::STLIsSorted(relocs));
- const uint8_t* offset_p = disassembler->OffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ('M', offset_p[0]);
diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc
index aed26c7..07bdfbc 100644
--- a/courgette/disassembler_win32_x86.cc
+++ b/courgette/disassembler_win32_x86.cc
@@ -8,37 +8,73 @@
#include <stdint.h>
#include <algorithm>
-#include <string>
-#include <vector>
+#include <iostream>
#include "base/logging.h"
-
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
#include "courgette/rel32_finder_win32_x86.h"
namespace courgette {
DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
- : Disassembler(start, length),
- incomplete_disassembly_(false),
- is_PE32_plus_(false),
- optional_header_(NULL),
- size_of_optional_header_(0),
- offset_of_data_directories_(0),
- machine_type_(0),
- number_of_sections_(0),
- sections_(NULL),
- has_text_section_(false),
- size_of_code_(0),
- size_of_initialized_data_(0),
- size_of_uninitialized_data_(0),
- base_of_code_(0),
- base_of_data_(0),
- image_base_(0),
- size_of_image_(0),
- number_of_data_directories_(0) {
+ : Disassembler(start, length),
+ incomplete_disassembly_(false),
+ is_PE32_plus_(false),
+ optional_header_(nullptr),
+ size_of_optional_header_(0),
+ offset_of_data_directories_(0),
+ machine_type_(0),
+ number_of_sections_(0),
+ sections_(nullptr),
+ has_text_section_(false),
+ size_of_code_(0),
+ size_of_initialized_data_(0),
+ size_of_uninitialized_data_(0),
+ base_of_code_(0),
+ base_of_data_(0),
+ image_base_(0),
+ size_of_image_(0),
+ number_of_data_directories_(0) {
+}
+
+FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ if (section != nullptr) {
+ FileOffset offset_in_section = rva - section->virtual_address;
+ // Need this extra check, since an |rva| may be valid for a section, but is
+ // non-existent in an image (e.g. uninit data).
+ if (offset_in_section >= section->size_of_raw_data)
+ return kNoFileOffset;
+
+ return static_cast<FileOffset>(section->file_offset_of_raw_data +
+ offset_in_section);
+ }
+
+ // Small RVA values point into the file header in the loaded image.
+ // RVA 0 is the module load address which Windows uses as the module handle.
+ // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+ // DOS header.
+ if (rva == 0 || rva == 2)
+ return static_cast<FileOffset>(rva);
+
+ NOTREACHED();
+ return kNoFileOffset;
+}
+
+RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
+ for (int i = 0; i < number_of_sections_; ++i) {
+ const Section* section = &sections_[i];
+ if (file_offset >= section->file_offset_of_raw_data) {
+ FileOffset offset_in_section =
+ file_offset - section->file_offset_of_raw_data;
+ if (offset_in_section < section->size_of_raw_data)
+ return static_cast<RVA>(section->virtual_address + offset_in_section);
+ }
+ }
+
+ NOTREACHED();
+ return kNoRVA;
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -57,18 +93,19 @@ bool DisassemblerWin32X86::ParseHeader() {
return Bad("Not MZ");
// offset from DOS header to PE header is stored in DOS header.
- uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader);
+ FileOffset file_offset = static_cast<FileOffset>(
+ ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader));
- if (offset >= length())
+ if (file_offset >= length())
return Bad("Bad offset to PE header");
- const uint8_t* const pe_header = OffsetToPointer(offset);
+ const uint8_t* const pe_header = FileOffsetToPointer(file_offset);
const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
if (pe_header <= start() ||
pe_header >= end() - kMinPEHeaderSize)
- return Bad("Bad offset to PE header");
+ return Bad("Bad file offset to PE header");
- if (offset % 8 != 0)
+ if (file_offset % 8 != 0)
return Bad("Misaligned PE header");
// The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
@@ -169,7 +206,7 @@ bool DisassemblerWin32X86::ParseHeader() {
size_of_optional_header_);
size_t detected_length = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
// TODO(sra): consider using the 'characteristics' field of the section
@@ -293,48 +330,19 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
}
const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
- for (int i = 0; i < number_of_sections_; i++) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
- uint32_t offset = rva - section->virtual_address;
- if (offset < section->virtual_size) {
- return section;
- }
- }
- return NULL;
-}
-
-int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
- const Section* section = RVAToSection(rva);
- if (section) {
- uint32_t offset = rva - section->virtual_address;
- if (offset < section->size_of_raw_data) {
- return section->file_offset_of_raw_data + offset;
- } else {
- return kNoOffset; // In section but not in file (e.g. uninit data).
+ if (rva >= section->virtual_address) {
+ FileOffset offset_in_section = rva - section->virtual_address;
+ if (offset_in_section < section->virtual_size)
+ return section;
}
}
-
- // Small RVA values point into the file header in the loaded image.
- // RVA 0 is the module load address which Windows uses as the module handle.
- // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
- // DOS header.
- if (rva == 0 || rva == 2)
- return rva;
-
- NOTREACHED();
- return kNoOffset;
-}
-
-const uint8_t* DisassemblerWin32X86::RVAToPointer(RVA rva) const {
- int file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoOffset)
- return NULL;
- else
- return OffsetToPointer(file_offset);
+ return nullptr;
}
std::string DisassemblerWin32X86::SectionName(const Section* section) {
- if (section == NULL)
+ if (section == nullptr)
return "<none>";
char name[9];
memcpy(name, section->name, 8);
@@ -344,24 +352,25 @@ std::string DisassemblerWin32X86::SectionName(const Section* section) {
CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- uint32_t file_offset = 0;
+ FileOffset file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == NULL) {
- // No more sections. There should not be extra stuff following last
+ if (section == nullptr) {
+ // No more sections. There should not be extra stuff following last
// section.
// ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
break;
}
if (file_offset < section->file_offset_of_raw_data) {
- uint32_t section_start_offset = section->file_offset_of_raw_data;
- if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
- program))
+ FileOffset section_start_offset = section->file_offset_of_raw_data;
+ if (!ParseNonSectionFileRegion(file_offset, section_start_offset,
+ program)) {
return false;
+ }
file_offset = section_start_offset;
}
- uint32_t end = file_offset + section->size_of_raw_data;
+ FileOffset end = file_offset + section->size_of_raw_data;
if (!ParseFileRegion(section, file_offset, end, program))
return false;
file_offset = end;
@@ -381,7 +390,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
return false;
#if COURGETTE_HISTOGRAM_TARGETS
- for (size_t i = 0; i < abs32_locations_.size(); ++i) {
+ for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
@@ -392,10 +401,10 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
}
void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
- uint32_t file_offset = 0;
+ FileOffset file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == NULL)
+ if (section == nullptr)
break;
if (file_offset < section->file_offset_of_raw_data)
file_offset = section->file_offset_of_raw_data;
@@ -417,11 +426,11 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
while (abs32_iter != abs32_target_rvas_.end() &&
rel32_iter != rel32_target_rvas_.end()) {
- if (abs32_iter->first < rel32_iter->first)
+ if (abs32_iter->first < rel32_iter->first) {
++abs32_iter;
- else if (rel32_iter->first < abs32_iter->first)
+ } else if (rel32_iter->first < abs32_iter->first) {
++rel32_iter;
- else {
+ } else {
++common;
++abs32_iter;
++rel32_iter;
@@ -437,19 +446,18 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
if (!isCode)
return;
- uint32_t start_file_offset = section->file_offset_of_raw_data;
- uint32_t end_file_offset = start_file_offset + section->size_of_raw_data;
+ FileOffset start_file_offset = section->file_offset_of_raw_data;
+ FileOffset end_file_offset = start_file_offset + section->size_of_raw_data;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
Rel32FinderWin32X86_Basic finder(
base_relocation_table().address_,
- base_relocation_table().address_ + base_relocation_table().size_,
- size_of_image_);
+ base_relocation_table().address_ + base_relocation_table().size_);
finder.Find(start_pointer, end_pointer, start_rva, end_rva, abs32_locations_);
finder.SwapRel32Locations(&rel32_locations_);
@@ -460,14 +468,14 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
}
CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) {
if (incomplete_disassembly_)
return true;
if (end_file_offset > start_file_offset) {
- if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset),
+ if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
end_file_offset - start_file_offset)) {
return false;
}
@@ -477,13 +485,13 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
}
CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section,
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) {
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
@@ -592,7 +600,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
size_t count = p->second.size();
std::cout << std::dec << p->first << ": " << count;
if (count <= 2) {
- for (size_t i = 0; i < count; ++i)
+ for (size_t i = 0; i < count; ++i)
std::cout << " " << DescribeRVA(p->second[i]);
}
std::cout << std::endl;
@@ -604,7 +612,6 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
}
#endif // COURGETTE_HISTOGRAM_TARGETS
-
// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
// that during development I'm finding I need to call it when compiled in
// Release mode. Hence:
@@ -623,12 +630,12 @@ std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const {
}
const Section* DisassemblerWin32X86::FindNextSection(
- uint32_t fileOffset) const {
+ FileOffset file_offset) const {
const Section* best = 0;
- for (int i = 0; i < number_of_sections_; i++) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (section->size_of_raw_data > 0) { // i.e. has data in file.
- if (fileOffset <= section->file_offset_of_raw_data) {
+ if (file_offset <= section->file_offset_of_raw_data) {
if (best == 0 ||
section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
best = section;
@@ -639,26 +646,15 @@ const Section* DisassemblerWin32X86::FindNextSection(
return best;
}
-RVA DisassemblerWin32X86::FileOffsetToRVA(uint32_t file_offset) const {
- for (int i = 0; i < number_of_sections_; i++) {
- const Section* section = &sections_[i];
- uint32_t offset = file_offset - section->file_offset_of_raw_data;
- if (offset < section->size_of_raw_data) {
- return section->virtual_address + offset;
- }
- }
- return 0;
-}
-
bool DisassemblerWin32X86::ReadDataDirectory(
int index,
ImageDataDirectory* directory) {
if (index < number_of_data_directories_) {
- size_t offset = index * 8 + offset_of_data_directories_;
- if (offset >= size_of_optional_header_)
+ FileOffset file_offset = index * 8 + offset_of_data_directories_;
+ if (file_offset >= size_of_optional_header_)
return Bad("number of data directories inconsistent");
- const uint8_t* data_directory = optional_header_ + offset;
+ const uint8_t* data_directory = optional_header_ + file_offset;
if (data_directory < start() ||
data_directory + 8 >= end())
return Bad("data directory outside image");
diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h
index 891636c..c22872b 100644
--- a/courgette/disassembler_win32_x86.h
+++ b/courgette/disassembler_win32_x86.h
@@ -8,15 +8,16 @@
#include <stddef.h>
#include <stdint.h>
+#include <map>
+#include <string>
+#include <vector>
+
#include "base/macros.h"
#include "courgette/disassembler.h"
+#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_win_pe.h"
-#ifdef COURGETTE_HISTOGRAM_TARGETS
-#include <map>
-#endif
-
namespace courgette {
class AssemblyProgram;
@@ -25,19 +26,14 @@ class DisassemblerWin32X86 : public Disassembler {
public:
explicit DisassemblerWin32X86(const void* start, size_t length);
- virtual ExecutableType kind() { return EXE_WIN_32_X86; }
+ // Disassembler interfaces.
+ RVA FileOffsetToRVA(FileOffset file_offset) const override;
+ FileOffset RVAToFileOffset(RVA rva) const override;
+ ExecutableType kind() const override { return EXE_WIN_32_X86; }
+ bool ParseHeader() override;
+ bool Disassemble(AssemblyProgram* target) override;
- // Returns 'true' if the buffer appears to point to a Windows 32 bit
- // executable, 'false' otherwise. If ParseHeader() succeeds, other member
- // functions may be called.
- virtual bool ParseHeader();
-
- virtual bool Disassemble(AssemblyProgram* target);
-
- //
// Exposed for test purposes
- //
-
bool has_text_section() const { return has_text_section_; }
uint32_t size_of_code() const { return size_of_code_; }
bool is_32bit() const { return !is_PE32_plus_; }
@@ -47,17 +43,9 @@ class DisassemblerWin32X86 : public Disassembler {
// that are listed in the base relocation table.
bool ParseRelocs(std::vector<RVA> *addresses);
- // Returns Section containing the relative virtual address, or NULL if none.
+ // Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;
- static const int kNoOffset = -1;
- // Returns kNoOffset if there is no file offset corresponding to 'rva'.
- int RVAToFileOffset(RVA rva) const;
-
- // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
- // is returned if there is no file offset corresponding to 'rva'.
- const uint8_t* RVAToPointer(RVA rva) const;
-
static std::string SectionName(const Section* section);
protected:
@@ -66,59 +54,46 @@ class DisassemblerWin32X86 : public Disassembler {
void ParseRel32RelocsFromSections();
void ParseRel32RelocsFromSection(const Section* section);
- CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset,
- uint32_t end_file_offset,
+ CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program)
WARN_UNUSED_RESULT;
CheckBool ParseFileRegion(const Section* section,
- uint32_t start_file_offset,
- uint32_t end_file_offset,
+ FileOffset start_file_offset,
+ FileOffset end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
#endif
- // Most addresses are represented as 32-bit RVAs. The one address we can't
- // do this with is the image base address. 'image_base' is valid only for
- // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
+ // Most addresses are represented as 32-bit RVAs. The one address we can't
+ // do this with is the image base address.
uint32_t image_base() const { return static_cast<uint32_t>(image_base_); }
const ImageDataDirectory& base_relocation_table() const {
return base_relocation_table_;
}
- // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
+ // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
std::string DescribeRVA(RVA rva) const;
- // Finds the first section at file_offset or above. Does not return sections
+ // Finds the first section at file_offset or above. Does not return sections
// that have no raw bytes in the file.
- const Section* FindNextSection(uint32_t file_offset) const;
-
- // There are 2 'coordinate systems' for reasoning about executables.
- // FileOffset - the the offset within a single .EXE or .DLL *file*.
- // RVA - relative virtual address (offset within *loaded image*)
- // FileOffsetToRVA and RVAToFileOffset convert between these representations.
-
- RVA FileOffsetToRVA(uint32_t offset) const;
+ const Section* FindNextSection(FileOffset file_offset) const;
private:
-
bool ReadDataDirectory(int index, ImageDataDirectory* dir);
- bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
+ bool incomplete_disassembly_; // true if can omit "uninteresting" bits.
std::vector<RVA> abs32_locations_;
std::vector<RVA> rel32_locations_;
//
- // Fields that are always valid.
+ // Information that is valid after ParseHeader() succeeds.
//
-
- //
- // Information that is valid after successful ParseHeader.
- //
- bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
+ bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
// Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
const uint8_t* optional_header_;
@@ -155,9 +130,9 @@ class DisassemblerWin32X86 : public Disassembler {
std::map<RVA, int> rel32_target_rvas_;
#endif
-
DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86);
};
} // namespace courgette
+
#endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_
diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc
index 4e16464..3e43273 100644
--- a/courgette/disassembler_win32_x86_unittest.cc
+++ b/courgette/disassembler_win32_x86_unittest.cc
@@ -6,6 +6,9 @@
#include <stdint.h>
+#include <string>
+#include <vector>
+
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "courgette/base_test_unittest.h"
@@ -37,16 +40,16 @@ void DisassemblerWin32X86Test::TestExe() const {
disassembler->RVAToSection(0x00401234 - 0x00400000)),
std::string(".text"));
- EXPECT_EQ(0, disassembler->RVAToFileOffset(0));
- EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096));
- EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000));
+ EXPECT_EQ(0U, disassembler->RVAToFileOffset(0));
+ EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096));
+ EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000));
std::vector<courgette::RVA> relocs;
bool can_parse_relocs = disassembler->ParseRelocs(&relocs);
EXPECT_TRUE(can_parse_relocs);
EXPECT_TRUE(base::STLIsSorted(relocs));
- const uint8_t* offset_p = disassembler->OffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ('M', offset_p[0]);
diff --git a/courgette/image_utils.h b/courgette/image_utils.h
index f958cc1..cfbfcfe 100644
--- a/courgette/image_utils.h
+++ b/courgette/image_utils.h
@@ -14,8 +14,44 @@
namespace courgette {
-typedef uint32_t RVA;
+// There are several ways to reason about addresses in an image:
+// - File Offset: Position relative to start of image.
+// - VA (Virtual Address): Virtual memory address of a loaded image. This is
+// subject to relocation by the OS.
+// - RVA (Relative Virtual Address): VA relative to some base address. This is
+// the preferred way to specify pointers in an image. Two ways to encode RVA
+// are:
+// - abs32: RVA value is encoded directly.
+// - rel32: RVA is encoded as offset from an instruction address. This is
+// commonly used for relative branch/call opcodes.
+// Courgette operates on File Offsets and RVAs only.
+
+using RVA = uint32_t;
const RVA kUnassignedRVA = 0xFFFFFFFFU;
+const RVA kNoRVA = 0xFFFFFFFFU;
+
+using FileOffset = size_t;
+const FileOffset kNoFileOffset = UINTPTR_MAX;
+
+// An interface for {File Offset, RVA, pointer to image data} translation.
+class AddressTranslator {
+ public:
+ // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent.
+ virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;
+
+ // Returns the file offset corresponding to |rva|, or kNoFileOffset if
+ // nonexistent.
+ virtual FileOffset RVAToFileOffset(RVA rva) const = 0;
+
+ // Returns the pointer to the image data for |file_offset|. Assumes that
+ // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting
+ // pointer is an end bound for iteration that should never be dereferenced.
+ virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;
+
+ // Returns the pointer to the image data for |rva|, or null if |rva| is
+ // invalid.
+ virtual const uint8_t* RVAToPointer(RVA rva) const = 0;
+};
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
diff --git a/courgette/rel32_finder_win32_x86.cc b/courgette/rel32_finder_win32_x86.cc
index 171b781..0ed492f 100644
--- a/courgette/rel32_finder_win32_x86.cc
+++ b/courgette/rel32_finder_win32_x86.cc
@@ -8,11 +8,9 @@
namespace courgette {
-Rel32FinderWin32X86::Rel32FinderWin32X86(
- RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
- : relocs_start_rva_(relocs_start_rva),
- relocs_end_rva_(relocs_end_rva),
- image_end_rva_(image_end_rva) {
+Rel32FinderWin32X86::Rel32FinderWin32X86(RVA relocs_start_rva,
+ RVA relocs_end_rva)
+ : relocs_start_rva_(relocs_start_rva), relocs_end_rva_(relocs_end_rva) {
}
Rel32FinderWin32X86::~Rel32FinderWin32X86() {
@@ -28,9 +26,9 @@ void Rel32FinderWin32X86::SwapRel32TargetRVAs(std::map<RVA, int>* dest) {
}
#endif
-Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(
- RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
- : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva, image_end_rva) {
+Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(RVA relocs_start_rva,
+ RVA relocs_end_rva)
+ : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva) {
}
Rel32FinderWin32X86_Basic::~Rel32FinderWin32X86_Basic() {
@@ -51,6 +49,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
const uint8_t* p = start_pointer;
while (p < end_pointer) {
RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+ // Skip the base reloation table if we encounter it.
+ // Note: We're not bothering to handle the edge case where a Rel32 pointer
+ // collides with |relocs_start_rva_| by being {1, 2, 3}-bytes before it.
if (current_rva == relocs_start_rva_) {
if (relocs_start_rva_ < relocs_end_rva_) {
p += relocs_end_rva_ - relocs_start_rva_;
@@ -58,13 +60,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
}
}
- //while (abs32_pos != abs32_locations.end() && *abs32_pos < current_rva)
- // ++abs32_pos;
-
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = NULL;
+ const uint8_t* rel32 = nullptr;
if (p + 5 <= end_pointer) {
if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
@@ -95,10 +94,9 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
}
RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
- // To be valid, rel32 target must be within image, and within this
- // section.
- if (IsValidRVA(target_rva) &&
- start_rva <= target_rva && target_rva < end_rva) {
+ // Valid, rel32 target must be within image, and within this section.
+ // Subsumes |target_rva| != |kUnassignedRVA|.
+ if (start_rva <= target_rva && target_rva < end_rva) {
rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h
index 01226ae..98ebd98 100644
--- a/courgette/rel32_finder_win32_x86.h
+++ b/courgette/rel32_finder_win32_x86.h
@@ -7,9 +7,7 @@
#include <stdint.h>
-#if COURGETTE_HISTOGRAM_TARGETS
#include <map>
-#endif
#include <vector>
#include "courgette/image_utils.h"
@@ -19,25 +17,21 @@ namespace courgette {
// A helper class to scan through a section of code to extract RVAs.
class Rel32FinderWin32X86 {
public:
- Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva,
- RVA image_end_rva);
+ Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva);
virtual ~Rel32FinderWin32X86();
- // Subsumes rva != kUnassignedRVA.
- bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; }
-
- // Swaps data in |rel32_locations_| to |dest|.
+ // Swaps data in |rel32_locations_| with |dest|.
void SwapRel32Locations(std::vector<RVA>* dest);
#if COURGETTE_HISTOGRAM_TARGETS
- // Swaps data in |rel32_target_rvas_| to |dest|.
+ // Swaps data in |rel32_target_rvas_| with |dest|.
void SwapRel32TargetRVAs(std::map<RVA, int>* dest);
#endif
// Scans through [|start_pointer|, |end_pointer|) for rel32 addresses. Seeks
// RVAs that satisfy the following:
- // - Do not collide with |abs32_pos| (assumed sorted).
- // - Do not collide with |base_relocation_table|'s RVA range,
+ // - Do not overlap with |abs32_locations| (assumed sorted).
+ // - Do not overlap with [relocs_start_rva, relocs_end_rva).
// - Whose targets are in [|start_rva|, |end_rva|).
// The sorted results are written to |rel32_locations_|.
virtual void Find(const uint8_t* start_pointer,
@@ -49,7 +43,6 @@ class Rel32FinderWin32X86 {
protected:
const RVA relocs_start_rva_;
const RVA relocs_end_rva_;
- const RVA image_end_rva_;
std::vector<RVA> rel32_locations_;
@@ -62,8 +55,7 @@ class Rel32FinderWin32X86 {
// (excluding JPO/JPE) disregarding instruction alignment.
class Rel32FinderWin32X86_Basic : public Rel32FinderWin32X86 {
public:
- Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva,
- RVA image_end_rva);
+ Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva);
virtual ~Rel32FinderWin32X86_Basic();
// Rel32FinderWin32X86 implementation.
diff --git a/courgette/rel32_finder_win32_x86_unittest.cc b/courgette/rel32_finder_win32_x86_unittest.cc
index aed5c13..496f0b9 100644
--- a/courgette/rel32_finder_win32_x86_unittest.cc
+++ b/courgette/rel32_finder_win32_x86_unittest.cc
@@ -33,8 +33,7 @@ class Rel32FinderWin32X86TestCase {
}
void RunTestBasic(std::string name) {
- Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_,
- image_end_rva_);
+ Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_);
ASSERT_FALSE(text_data_.empty());
finder.Find(&text_data_[0], &text_data_[0] + text_data_.size(),
text_start_rva_, text_end_rva_, abs32_locations_);