summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorscottmg <scottmg@chromium.org>2016-03-12 15:54:56 -0800
committerCommit bot <commit-bot@chromium.org>2016-03-12 23:56:45 +0000
commit4a95ca5a4bab60f9f54325036516b640d263e2ec (patch)
treee4cb16184171a68d73caed9b37f3f0ee977c5a3d /courgette
parent2eb2d38dde83688f266012a04120c3253156d2b7 (diff)
downloadchromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.zip
chromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.tar.gz
chromium_src-4a95ca5a4bab60f9f54325036516b640d263e2ec.tar.bz2
Revert of [Courgette] Clean up Disassembler; fix ELF Memory leaks. (patchset #15 id:270001 of https://codereview.chromium.org/1676683002/ )
Reason for revert: Regressed linux sizes (iostream maybe?) https://build.chromium.org/p/chromium/builders/Linux/builds/72899/steps/sizes/logs/stdio Original issue's description: > [Courgette] Clean up Disassembler; fix ELF Memory leaks. > > Cleaning up code surrounding Disassembler: > - Extract AddressTranslator interface to be used across subclasses. > - Use FileOffset = size_t by context. > - Detailed comments & TODOs in DisassemblerElf32ARM. > - Fix DisassemblerElf32ARM memory leaks. > - Lots of superficial stylistic changes. > > Except for AddressTranslator routines and unit tests, shying away > from control flow and logic changes. > > BUG=579206 > > Committed: https://crrev.com/58b822d441f5c982e879e536fa3c1cbac8fd339a > Cr-Commit-Position: refs/heads/master@{#380881} TBR=grt@chromium.org,wfh@chromium.org,chrisha@chromium.org,andrewhayden@chromium.org,huangs@chromium.org # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=579206 Review URL: https://codereview.chromium.org/1792603006 Cr-Commit-Position: refs/heads/master@{#380885}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/disassembler.cc20
-rw-r--r--courgette/disassembler.h34
-rw-r--r--courgette/disassembler_elf_32.cc333
-rw-r--r--courgette/disassembler_elf_32.h127
-rw-r--r--courgette/disassembler_elf_32_arm.cc244
-rw-r--r--courgette/disassembler_elf_32_arm.h56
-rw-r--r--courgette/disassembler_elf_32_x86.cc111
-rw-r--r--courgette/disassembler_elf_32_x86.h46
-rw-r--r--courgette/disassembler_elf_32_x86_unittest.cc89
-rw-r--r--courgette/disassembler_win32_x64.cc222
-rw-r--r--courgette/disassembler_win32_x64.h78
-rw-r--r--courgette/disassembler_win32_x64_unittest.cc11
-rw-r--r--courgette/disassembler_win32_x86.cc214
-rw-r--r--courgette/disassembler_win32_x86.h75
-rw-r--r--courgette/disassembler_win32_x86_unittest.cc11
-rw-r--r--courgette/image_utils.h38
-rw-r--r--courgette/rel32_finder_win32_x86.cc30
-rw-r--r--courgette/rel32_finder_win32_x86.h20
-rw-r--r--courgette/rel32_finder_win32_x86_unittest.cc3
19 files changed, 880 insertions, 882 deletions
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
index 9b58ba0..b9fce8b 100644
--- a/courgette/disassembler.cc
+++ b/courgette/disassembler.cc
@@ -4,12 +4,10 @@
#include "courgette/disassembler.h"
-#include "base/logging.h"
-
namespace courgette {
Disassembler::Disassembler(const void* start, size_t length)
- : failure_reason_("uninitialized") {
+ : failure_reason_("uninitialized") {
start_ = reinterpret_cast<const uint8_t*>(start);
length_ = length;
end_ = start_ + length_;
@@ -17,21 +15,13 @@ Disassembler::Disassembler(const void* start, size_t length)
Disassembler::~Disassembler() {};
-const uint8_t* Disassembler::FileOffsetToPointer(FileOffset file_offset) const {
- CHECK_LE(file_offset, static_cast<FileOffset>(end_ - start_));
- return start_ + file_offset;
-}
-
-const uint8_t* Disassembler::RVAToPointer(RVA rva) const {
- FileOffset file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoFileOffset)
- return nullptr;
-
- return FileOffsetToPointer(file_offset);
+const uint8_t* Disassembler::OffsetToPointer(size_t offset) const {
+ assert(start_ + offset <= end_);
+ return start_ + offset;
}
bool Disassembler::Good() {
- failure_reason_ = nullptr;
+ failure_reason_ = NULL;
return true;
}
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index bc715b0..e833cfa 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -16,35 +16,33 @@ namespace courgette {
class AssemblyProgram;
-class Disassembler : public AddressTranslator {
+class Disassembler {
public:
virtual ~Disassembler();
- // AddressTranslator interfaces.
- virtual RVA FileOffsetToRVA(FileOffset file_offset) const override = 0;
- virtual FileOffset RVAToFileOffset(RVA rva) const override = 0;
- const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override;
- const uint8_t* RVAToPointer(RVA rva) const override;
+ virtual ExecutableType kind() { return EXE_UNKNOWN; }
- virtual ExecutableType kind() const = 0;
+ // ok() may always be called but returns 'true' only after ParseHeader
+ // succeeds.
+ bool ok() const { return failure_reason_ == NULL; }
- // Returns true if the buffer appears to be a valid executable of the expected
- // type, and false otherwise. This needs not be called before Disassemble().
+ // Returns 'true' if the buffer appears to be a valid executable of the
+ // expected type. It is not required that this be called before Disassemble.
virtual bool ParseHeader() = 0;
// Disassembles the item passed to the factory method into the output
// parameter 'program'.
virtual bool Disassemble(AssemblyProgram* program) = 0;
- // ok() may always be called but returns true only after ParseHeader()
- // succeeds.
- bool ok() const { return failure_reason_ == nullptr; }
-
- // Returns the length of the image. May reduce after ParseHeader().
+ // Returns the length of the source executable. May reduce after ParseHeader.
size_t length() const { return length_; }
const uint8_t* start() const { return start_; }
const uint8_t* end() const { return end_; }
+ // Returns a pointer into the memory copy of the file format.
+ // FileOffsetToPointer(0) returns a pointer to the start of the file format.
+ const uint8_t* OffsetToPointer(size_t offset) const;
+
protected:
Disassembler(const void* start, size_t length);
@@ -57,16 +55,16 @@ class Disassembler : public AddressTranslator {
}
// Reduce the length of the image in memory. Does not actually free
- // (or realloc) any memory. Usually only called via ParseHeader().
+ // (or realloc) any memory. Usually only called via ParseHeader()
void ReduceLength(size_t reduced_length);
private:
const char* failure_reason_;
//
- // Basic information that is always valid after construction, although
- // ParseHeader() may shorten |length_| if the executable is shorter than the
- // total data.
+ // Basic information that is always valid after Construction, though
+ // ParseHeader may shorten the length if the executable is shorter than
+ // the total data.
//
size_t length_; // In current memory.
const uint8_t* start_; // In current memory, base for 'file offsets'.
diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc
index 9ceb8ab..84aa971 100644
--- a/courgette/disassembler_elf_32.cc
+++ b/courgette/disassembler_elf_32.cc
@@ -4,70 +4,39 @@
#include "courgette/disassembler_elf_32.h"
+#include <stddef.h>
+#include <stdint.h>
+
#include <algorithm>
+#include <string>
+#include <vector>
#include "base/logging.h"
+#include "base/memory/scoped_vector.h"
+
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
namespace courgette {
DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
- : Disassembler(start, length),
- header_(nullptr),
- section_header_table_(nullptr),
- section_header_table_size_(0),
- program_header_table_(nullptr),
- program_header_table_size_(0),
- default_string_section_(nullptr) {
-}
-
-RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const {
- // File offsets can be 64-bit values, but we are dealing with 32-bit
- // executables and so only need to support 32-bit file sizes.
- uint32_t offset32 = static_cast<uint32_t>(offset);
-
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
- // These can appear to have a size in the file, but don't.
- if (section_header->sh_type == SHT_NOBITS)
- continue;
-
- Elf32_Off section_begin = section_header->sh_offset;
- Elf32_Off section_end = section_begin + section_header->sh_size;
-
- if (offset32 >= section_begin && offset32 < section_end) {
- return section_header->sh_addr + (offset32 - section_begin);
- }
- }
-
- return 0;
-}
-
-FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
- // These can appear to have a size in the file, but don't.
- if (section_header->sh_type == SHT_NOBITS)
- continue;
- Elf32_Addr begin = section_header->sh_addr;
- Elf32_Addr end = begin + section_header->sh_size;
-
- if (rva >= begin && rva < end)
- return section_header->sh_offset + (rva - begin);
- }
- return kNoFileOffset;
+ : Disassembler(start, length),
+ header_(NULL),
+ section_header_table_(NULL),
+ section_header_table_size_(0),
+ program_header_table_(NULL),
+ program_header_table_size_(0),
+ default_string_section_(NULL) {
}
bool DisassemblerElf32::ParseHeader() {
if (length() < sizeof(Elf32_Ehdr))
return Bad("Too small");
- header_ = reinterpret_cast<const Elf32_Ehdr*>(start());
+ header_ = (Elf32_Ehdr *)start();
- // Have magic for ELF header?
+ // Have magic for elf header?
if (header_->e_ident[0] != 0x7f ||
header_->e_ident[1] != 'E' ||
header_->e_ident[2] != 'L' ||
@@ -90,25 +59,23 @@ bool DisassemblerElf32::ParseHeader() {
if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
return Bad("Out of bounds section header table");
- section_header_table_ = reinterpret_cast<const Elf32_Shdr*>(
- FileOffsetToPointer(header_->e_shoff));
+ section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
section_header_table_size_ = header_->e_shnum;
if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
return Bad("Out of bounds program header table");
- program_header_table_ = reinterpret_cast<const Elf32_Phdr*>(
- FileOffsetToPointer(header_->e_phoff));
+ program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
program_header_table_size_ = header_->e_phnum;
if (header_->e_shstrndx >= header_->e_shnum)
return Bad("Out of bounds string section index");
- default_string_section_ = reinterpret_cast<const char*>(
- SectionBody(static_cast<int>(header_->e_shstrndx)));
+ default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
- if (!UpdateLength())
+ if (!UpdateLength()) {
return Bad("Out of bounds section or segment");
+ }
return Good();
}
@@ -130,6 +97,7 @@ bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
return false;
target->DefaultAssignIndexes();
+
return true;
}
@@ -137,9 +105,8 @@ bool DisassemblerElf32::UpdateLength() {
Elf32_Off result = 0;
// Find the end of the last section
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
+ for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
+ const Elf32_Shdr *section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_NOBITS)
continue;
@@ -152,9 +119,8 @@ bool DisassemblerElf32::UpdateLength() {
}
// Find the end of the last segment
- for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
- ++segment_id) {
- const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
+ for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
+ const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
return false;
@@ -163,26 +129,25 @@ bool DisassemblerElf32::UpdateLength() {
result = std::max(result, segment_end);
}
- Elf32_Off section_table_end =
- header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr));
+ Elf32_Off section_table_end = header_->e_shoff +
+ (header_->e_shnum * sizeof(Elf32_Shdr));
result = std::max(result, section_table_end);
- Elf32_Off segment_table_end =
- header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr));
+ Elf32_Off segment_table_end = header_->e_phoff +
+ (header_->e_phnum * sizeof(Elf32_Phdr));
result = std::max(result, segment_table_end);
ReduceLength(result);
return true;
}
-CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const {
+CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
if (rva == kUnassignedRVA)
return false;
// It's valid if it's contained in any program segment
- for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
- ++segment_id) {
- const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
+ for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
+ const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
if (segment_header->p_type != PT_LOAD)
continue;
@@ -197,58 +162,114 @@ CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const {
return false;
}
-CheckBool DisassemblerElf32::RVAsToFileOffsets(
- const std::vector<RVA>& rvas,
- std::vector<FileOffset>* file_offsets) {
- file_offsets->clear();
- for (RVA rva : rvas) {
- FileOffset file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoFileOffset)
+CheckBool DisassemblerElf32::RVAToFileOffset(RVA rva,
+ size_t* result) const {
+ for (int i = 0; i < SectionHeaderCount(); i++) {
+ const Elf32_Shdr *section_header = SectionHeader(i);
+ // These can appear to have a size in the file, but don't.
+ if (section_header->sh_type == SHT_NOBITS)
+ continue;
+ Elf32_Addr begin = section_header->sh_addr;
+ Elf32_Addr end = begin + section_header->sh_size;
+
+ if (rva >= begin && rva < end) {
+ *result = section_header->sh_offset + (rva - begin);
+ return true;
+ }
+ }
+ return false;
+}
+
+RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
+ // File offsets can be 64 bit values, but we are dealing with 32
+ // bit executables and so only need to support 32bit file sizes.
+ uint32_t offset32 = (uint32_t)offset;
+
+ for (int i = 0; i < SectionHeaderCount(); i++) {
+
+ const Elf32_Shdr *section_header = SectionHeader(i);
+
+ // These can appear to have a size in the file, but don't.
+ if (section_header->sh_type == SHT_NOBITS)
+ continue;
+
+ Elf32_Off section_begin = section_header->sh_offset;
+ Elf32_Off section_end = section_begin + section_header->sh_size;
+
+ if (offset32 >= section_begin && offset32 < section_end) {
+ return section_header->sh_addr + (offset32 - section_begin);
+ }
+ }
+
+ return 0;
+}
+
+CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
+ std::vector<size_t>* offsets) {
+ offsets->clear();
+
+ for (std::vector<RVA>::iterator rva = rvas->begin();
+ rva != rvas->end();
+ rva++) {
+
+ size_t offset;
+
+ if (!RVAToFileOffset(*rva, &offset))
return false;
- file_offsets->push_back(file_offset);
+
+ offsets->push_back(offset);
}
+
return true;
}
-CheckBool DisassemblerElf32::RVAsToFileOffsets(
- ScopedVector<TypedRVA>* typed_rvas) {
- for (TypedRVA* typed_rva : *typed_rvas) {
- FileOffset file_offset = RVAToFileOffset(typed_rva->rva());
- if (file_offset == kNoFileOffset)
+CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
+ for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
+ rva != rvas->end();
+ rva++) {
+
+ size_t offset;
+
+ if (!RVAToFileOffset((*rva)->rva(), &offset))
return false;
- typed_rva->set_file_offset(file_offset);
+
+ (*rva)->set_offset(offset);
}
+
return true;
}
CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- FileOffset file_offset = 0;
+ uint32_t file_offset = 0;
- std::vector<FileOffset> abs_offsets;
+ std::vector<size_t> abs_offsets;
- if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets))
+ if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
return false;
- if (!RVAsToFileOffsets(&rel32_locations_))
+ if (!RVAsToOffsets(&rel32_locations_))
return false;
- std::vector<FileOffset>::iterator current_abs_offset = abs_offsets.begin();
+ std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
- std::vector<FileOffset>::iterator end_abs_offset = abs_offsets.end();
+ std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
+ for (int section_id = 0;
+ section_id < SectionHeaderCount();
+ section_id++) {
+
+ const Elf32_Shdr *section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_NOBITS)
continue;
- if (!ParseSimpleRegion(file_offset, section_header->sh_offset, program))
+ if (!ParseSimpleRegion(file_offset,
+ section_header->sh_offset,
+ program))
return false;
-
file_offset = section_header->sh_offset;
switch (section_header->sh_type) {
@@ -259,13 +280,10 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
break;
case SHT_PROGBITS:
if (!ParseProgbitsSection(section_header,
- &current_abs_offset,
- end_abs_offset,
- &current_rel,
- end_rel,
- program)) {
+ &current_abs_offset, end_abs_offset,
+ &current_rel, end_rel,
+ program))
return false;
- }
file_offset = section_header->sh_offset + section_header->sh_size;
break;
case SHT_INIT_ARRAY:
@@ -274,27 +292,28 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
while (current_abs_offset != end_abs_offset &&
*current_abs_offset >= section_header->sh_offset &&
*current_abs_offset <
- section_header->sh_offset + section_header->sh_size) {
+ (section_header->sh_offset + section_header->sh_size)) {
// Skip any abs_offsets appear in the unsupported INIT_ARRAY section
- VLOG(1) << "Skipping relocation entry for unsupported section: "
- << section_header->sh_type;
- ++current_abs_offset;
+ VLOG(1) << "Skipping relocation entry for unsupported section: " <<
+ section_header->sh_type;
+ current_abs_offset++;
}
break;
default:
if (current_abs_offset != end_abs_offset &&
- *current_abs_offset >= section_header->sh_offset &&
- *current_abs_offset <
- section_header->sh_offset + section_header->sh_size) {
- VLOG(1) << "Relocation address in unrecognized ELF section: "
- << section_header->sh_type;
- }
- break;
+ *current_abs_offset >= section_header->sh_offset &&
+ *current_abs_offset <
+ (section_header->sh_offset + section_header->sh_size))
+ VLOG(1) << "Relocation address in unrecognized ELF section: " << \
+ section_header->sh_type;
+ break;
}
}
// Rest of the file past the last section
- if (!ParseSimpleRegion(file_offset, length(), program))
+ if (!ParseSimpleRegion(file_offset,
+ length(),
+ program))
return false;
// Make certain we consume all of the relocations as expected
@@ -302,32 +321,34 @@ CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
}
CheckBool DisassemblerElf32::ParseProgbitsSection(
- const Elf32_Shdr* section_header,
- std::vector<FileOffset>::iterator* current_abs_offset,
- std::vector<FileOffset>::iterator end_abs_offset,
+ const Elf32_Shdr *section_header,
+ std::vector<size_t>::iterator* current_abs_offset,
+ std::vector<size_t>::iterator end_abs_offset,
ScopedVector<TypedRVA>::iterator* current_rel,
ScopedVector<TypedRVA>::iterator end_rel,
AssemblyProgram* program) {
+
// Walk all the bytes in the file, whether or not in a section.
- FileOffset file_offset = section_header->sh_offset;
- FileOffset section_end = section_header->sh_offset + section_header->sh_size;
+ size_t file_offset = section_header->sh_offset;
+ size_t section_end = section_header->sh_offset + section_header->sh_size;
Elf32_Addr origin = section_header->sh_addr;
- FileOffset origin_offset = section_header->sh_offset;
+ size_t origin_offset = section_header->sh_offset;
if (!program->EmitOriginInstruction(origin))
return false;
while (file_offset < section_end) {
+
if (*current_abs_offset != end_abs_offset &&
file_offset > **current_abs_offset)
return false;
while (*current_rel != end_rel &&
- file_offset > (**current_rel)->file_offset()) {
- ++(*current_rel);
+ file_offset > (**current_rel)->get_offset()) {
+ (*current_rel)++;
}
- FileOffset next_relocation = section_end;
+ size_t next_relocation = section_end;
if (*current_abs_offset != end_abs_offset &&
next_relocation > **current_abs_offset)
@@ -337,8 +358,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
// an Abs value, or the end of the section, so +3 to make sure there is
// room for the full 4 byte value.
if (*current_rel != end_rel &&
- next_relocation > ((**current_rel)->file_offset() + 3))
- next_relocation = (**current_rel)->file_offset();
+ next_relocation > ((**current_rel)->get_offset() + 3))
+ next_relocation = (**current_rel)->get_offset();
if (next_relocation > file_offset) {
if (!ParseSimpleRegion(file_offset, next_relocation, program))
@@ -350,28 +371,28 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
if (*current_abs_offset != end_abs_offset &&
file_offset == **current_abs_offset) {
- const uint8_t* p = FileOffsetToPointer(file_offset);
+ const uint8_t* p = OffsetToPointer(file_offset);
RVA target_rva = Read32LittleEndian(p);
if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
return false;
file_offset += sizeof(RVA);
- ++(*current_abs_offset);
+ (*current_abs_offset)++;
continue;
}
if (*current_rel != end_rel &&
- file_offset == (**current_rel)->file_offset()) {
+ file_offset == (**current_rel)->get_offset()) {
uint32_t relative_target = (**current_rel)->relative_target();
// This cast is for 64 bit systems, and is only safe because we
// are working on 32 bit executables.
RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
relative_target);
- if (!(**current_rel)->EmitInstruction(program, target_rva))
+ if (! (**current_rel)->EmitInstruction(program, target_rva))
return false;
file_offset += (**current_rel)->op_size();
- ++(*current_rel);
+ (*current_rel)++;
continue;
}
}
@@ -380,19 +401,17 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
-CheckBool DisassemblerElf32::ParseSimpleRegion(FileOffset start_file_offset,
- FileOffset end_file_offset,
- AssemblyProgram* program) {
+CheckBool DisassemblerElf32::ParseSimpleRegion(
+ size_t start_file_offset,
+ size_t end_file_offset,
+ AssemblyProgram* program) {
// Callers don't guarantee start < end
- if (start_file_offset >= end_file_offset)
- return true;
+ if (start_file_offset >= end_file_offset) return true;
const size_t len = end_file_offset - start_file_offset;
- if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
- len)) {
+ if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len))
return false;
- }
return true;
}
@@ -401,13 +420,12 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
abs32_locations_.clear();
// Loop through sections for relocation sections
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
+ for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
+ const Elf32_Shdr *section_header = SectionHeader(section_id);
if (section_header->sh_type == SHT_REL) {
- const Elf32_Rel* relocs_table =
- reinterpret_cast<const Elf32_Rel*>(SectionBody(section_id));
+
+ Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
int relocs_table_count = section_header->sh_size /
section_header->sh_entsize;
@@ -415,7 +433,7 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
// Elf32_Word relocation_section_id = section_header->sh_info;
// Loop through relocation objects in the relocation section
- for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) {
+ for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
RVA rva;
// Quite a few of these conversions fail, and we simply skip
@@ -433,18 +451,23 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() {
}
CheckBool DisassemblerElf32::CheckSection(RVA rva) {
- FileOffset file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoFileOffset)
+ size_t offset;
+
+ if (!RVAToFileOffset(rva, &offset)) {
return false;
+ }
+
+ for (int section_id = 0;
+ section_id < SectionHeaderCount();
+ section_id++) {
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
+ const Elf32_Shdr *section_header = SectionHeader(section_id);
- if (file_offset >= section_header->sh_offset &&
- file_offset < (section_header->sh_offset + section_header->sh_size)) {
+ if (offset >= section_header->sh_offset &&
+ offset < (section_header->sh_offset + section_header->sh_size)) {
switch (section_header->sh_type) {
- case SHT_REL: // Falls through.
+ case SHT_REL:
+ // Fall-through
case SHT_PROGBITS:
return true;
}
@@ -455,14 +478,16 @@ CheckBool DisassemblerElf32::CheckSection(RVA rva) {
}
CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
+
rel32_locations_.clear();
// Loop through sections for relocation sections
- for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
- ++section_id) {
- const Elf32_Shdr* section_header = SectionHeader(section_id);
+ for (int section_id = 0;
+ section_id < SectionHeaderCount();
+ section_id++) {
+
+ const Elf32_Shdr *section_header = SectionHeader(section_id);
- // TODO(huangs): Add better checks to skip non-code sections.
// Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0.
if (section_header->sh_type != SHT_PROGBITS ||
section_header->sh_addr == 0)
diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h
index e9d00ca..8483ce3 100644
--- a/courgette/disassembler_elf_32.h
+++ b/courgette/disassembler_elf_32.h
@@ -8,12 +8,10 @@
#include <stddef.h>
#include <stdint.h>
-#include <vector>
-
#include "base/macros.h"
#include "base/memory/scoped_vector.h"
+#include "courgette/assembly_program.h"
#include "courgette/disassembler.h"
-#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
@@ -21,33 +19,43 @@ namespace courgette {
class AssemblyProgram;
-// A Courgette disassembler for 32-bit ELF files. This is only a partial
-// implementation that admits subclasses for the architecture-specific parts of
-// 32-bit ELF file processing. Specifically:
-// - RelToRVA() processes entries in ELF relocation table.
-// - ParseRelocationSection() verifies the organization of the ELF relocation
-// table.
-// - ParseRel32RelocsFromSection() finds branch targets by looking for relative
-// branch/call opcodes in the particular architecture's machine code.
+// A courgette disassembler for 32-bit ELF files. This class is only a
+// partial implementation. Subclasses implement the
+// architecture-specific parts of processing 32-bit ELF files. Specifically,
+// RelToRVA processes entries in ELF relocation table,
+// ParseRelocationSection verifies the organization of the ELF
+// relocation table, and ParseRel32RelocsFromSection finds branch
+// targets by looking for relative jump/call opcodes in the particular
+// architecture's machine code.
class DisassemblerElf32 : public Disassembler {
public:
// Different instructions encode the target rva differently. This
// class encapsulates this behavior. public for use in unit tests.
class TypedRVA {
public:
- explicit TypedRVA(RVA rva) : rva_(rva) { }
+ explicit TypedRVA(RVA rva) : rva_(rva), offset_(static_cast<size_t>(-1)) {
+ }
- virtual ~TypedRVA() { }
+ virtual ~TypedRVA() { };
- RVA rva() const { return rva_; }
- RVA relative_target() const { return relative_target_; }
- FileOffset file_offset() const { return file_offset_; }
+ RVA rva() {
+ return rva_;
+ }
+
+ RVA relative_target() {
+ return relative_target_;
+ }
void set_relative_target(RVA relative_target) {
relative_target_ = relative_target;
}
- void set_file_offset(FileOffset file_offset) {
- file_offset_ = file_offset;
+
+ size_t get_offset() {
+ return offset_;
+ }
+
+ void set_offset(size_t offset) {
+ offset_ = offset;
}
// Computes the relative jump's offset from the op in p.
@@ -57,33 +65,33 @@ class DisassemblerElf32 : public Disassembler {
virtual CheckBool EmitInstruction(AssemblyProgram* program,
RVA target_rva) = 0;
- // Returns the size of the instruction containing the RVA.
virtual uint16_t op_size() const = 0;
- // Comparator for sorting, which assumes uniqueness of RVAs.
- static bool IsLessThan(TypedRVA* a, TypedRVA* b) {
+ static bool IsLessThan(TypedRVA *a, TypedRVA *b) {
return a->rva() < b->rva();
}
private:
const RVA rva_;
- RVA relative_target_ = kNoRVA;
- FileOffset file_offset_ = kNoFileOffset;
+ RVA relative_target_;
+ size_t offset_;
};
public:
- DisassemblerElf32(const void* start, size_t length);
+ explicit DisassemblerElf32(const void* start, size_t length);
+
+ virtual ~DisassemblerElf32() { };
- ~DisassemblerElf32() override { }
+ virtual ExecutableType kind() = 0;
- // Disassembler interfaces.
- RVA FileOffsetToRVA(FileOffset file_offset) const override;
- FileOffset RVAToFileOffset(RVA rva) const override;
- virtual ExecutableType kind() const override = 0;
- bool ParseHeader() override;
- bool Disassemble(AssemblyProgram* target) override;
+ virtual e_machine_values ElfEM() = 0;
- virtual e_machine_values ElfEM() const = 0;
+ // Returns 'true' if the buffer appears to point to a valid ELF executable
+ // for 32 bit. If ParseHeader() succeeds, other member
+ // functions may be called.
+ virtual bool ParseHeader();
+
+ virtual bool Disassemble(AssemblyProgram* target);
// Public for unittests only
std::vector<RVA> &Abs32Locations() { return abs32_locations_; }
@@ -99,13 +107,13 @@ class DisassemblerElf32 : public Disassembler {
return section_header_table_size_;
}
- const Elf32_Shdr* SectionHeader(Elf32_Half id) const {
+ const Elf32_Shdr *SectionHeader(int id) const {
assert(id >= 0 && id < SectionHeaderCount());
return section_header_table_ + id;
}
- const uint8_t* SectionBody(Elf32_Half id) const {
- return FileOffsetToPointer(SectionHeader(id)->sh_offset);
+ const uint8_t* SectionBody(int id) const {
+ return OffsetToPointer(SectionHeader(id)->sh_offset);
}
// Misc Segment Helpers
@@ -114,62 +122,61 @@ class DisassemblerElf32 : public Disassembler {
return program_header_table_size_;
}
- const Elf32_Phdr* ProgramSegmentHeader(Elf32_Half id) const {
+ const Elf32_Phdr *ProgramSegmentHeader(int id) const {
assert(id >= 0 && id < ProgramSegmentHeaderCount());
return program_header_table_ + id;
}
// Misc address space helpers
- CheckBool IsValidTargetRVA(RVA rva) const WARN_UNUSED_RESULT;
+ CheckBool IsValidRVA(RVA rva) const WARN_UNUSED_RESULT;
- // Converts an ELF relocation instruction into an RVA.
+ // Convert an ELF relocation struction into an RVA
virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
const WARN_UNUSED_RESULT = 0;
- CheckBool RVAsToFileOffsets(const std::vector<RVA>& rvas,
- std::vector<FileOffset>* file_offsets);
+ // Returns kNoOffset if there is no file offset corresponding to 'rva'.
+ CheckBool RVAToFileOffset(RVA rva, size_t* result) const WARN_UNUSED_RESULT;
- CheckBool RVAsToFileOffsets(ScopedVector<TypedRVA>* typed_rvas);
+ RVA FileOffsetToRVA(size_t offset) const WARN_UNUSED_RESULT;
- // Parsing code for Disassemble().
+ CheckBool RVAsToOffsets(std::vector<RVA>* rvas /*in*/,
+ std::vector<size_t>* offsets /*out*/);
- virtual CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
- AssemblyProgram* program)
- WARN_UNUSED_RESULT = 0;
+ CheckBool RVAsToOffsets(ScopedVector<TypedRVA>* rvas /*in and out*/);
- virtual CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
- WARN_UNUSED_RESULT = 0;
+ // Parsing Code used to really implement Disassemble
CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT;
-
+ virtual CheckBool ParseRelocationSection(
+ const Elf32_Shdr *section_header,
+ AssemblyProgram* program) WARN_UNUSED_RESULT = 0;
CheckBool ParseProgbitsSection(
- const Elf32_Shdr* section_header,
- std::vector<FileOffset>::iterator* current_abs_offset,
- std::vector<FileOffset>::iterator end_abs_offset,
+ const Elf32_Shdr *section_header,
+ std::vector<size_t>::iterator* current_abs_offset,
+ std::vector<size_t>::iterator end_abs_offset,
ScopedVector<TypedRVA>::iterator* current_rel,
ScopedVector<TypedRVA>::iterator end_rel,
AssemblyProgram* program) WARN_UNUSED_RESULT;
-
- CheckBool ParseSimpleRegion(FileOffset start_file_offset,
- FileOffset end_file_offset,
+ CheckBool ParseSimpleRegion(size_t start_file_offset,
+ size_t end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
CheckBool ParseAbs32Relocs() WARN_UNUSED_RESULT;
-
CheckBool CheckSection(RVA rva) WARN_UNUSED_RESULT;
-
CheckBool ParseRel32RelocsFromSections() WARN_UNUSED_RESULT;
+ virtual CheckBool ParseRel32RelocsFromSection(
+ const Elf32_Shdr* section) WARN_UNUSED_RESULT = 0;
- const Elf32_Ehdr* header_;
- const Elf32_Shdr* section_header_table_;
+ Elf32_Ehdr *header_;
+ Elf32_Shdr *section_header_table_;
Elf32_Half section_header_table_size_;
- const Elf32_Phdr* program_header_table_;
+ Elf32_Phdr *program_header_table_;
Elf32_Half program_header_table_size_;
// Section header for default
- const char* default_string_section_;
+ const char *default_string_section_;
std::vector<RVA> abs32_locations_;
ScopedVector<TypedRVA> rel32_locations_;
diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc
index 39172f4..f6490d9 100644
--- a/courgette/disassembler_elf_32_arm.cc
+++ b/courgette/disassembler_elf_32_arm.cc
@@ -4,12 +4,18 @@
#include "courgette/disassembler_elf_32_arm.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
#include <vector>
#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
+
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
namespace courgette {
@@ -18,34 +24,31 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
RVA rva,
uint16_t* c_op,
uint32_t* addr) {
- // Notation for bit ranges in comments:
- // - Listing bits from highest to lowest.
- // - A-Z or (j1), (j2), etc.: single bit in source.
- // - a-z: multiple, consecutive bits in source.
+ // This method takes an ARM or thumb opcode, extracts the relative
+ // target address from it (addr), and creates a corresponding
+ // Courgette opcode (c_op).
+ //
+ // Details on ARM the opcodes, and how the relative targets are
+ // computed were taken from the "ARM Architecture Reference Manual",
+ // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
+ // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
switch (type) {
case ARM_OFF8: {
- // Encoding T1.
- // The offset is given by lower 8 bits of the op. It is a 9-bit offset,
- // shifted right 1 bit, and signed extended.
- // arm_op = aaaaaaaa Snnnnnnn
- // *addr := SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100
- // *c_op := 00010000 aaaaaaaa
+ // The offset is given by lower 8 bits of the op. It is a 9-bit
+ // offset, shifted right one bit and signed extended.
uint32_t temp = (arm_op & 0x00FF) << 1;
if (temp & 0x0100)
temp |= 0xFFFFFE00;
temp += 4; // Offset from _next_ PC.
+ fflush(stdout);
(*addr) = temp;
(*c_op) = static_cast<uint16_t>(arm_op >> 8) | 0x1000;
break;
}
case ARM_OFF11: {
- // Encoding T2.
- // The offset is given by lower 11 bits of the op, and is a 12-bit offset,
- // shifted right 1 bit, and sign extended.
- // arm_op = aaaaaSnn nnnnnnnn
- // *addr := SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100
- // *c_op := 00100000 000aaaaa
+ // The offset is given by lower 11 bits of the op, and is a
+ // 12-bit offset, shifted right one bit and sign extended.
uint32_t temp = (arm_op & 0x07FF) << 1;
if (temp & 0x00000800)
temp |= 0xFFFFF000;
@@ -58,9 +61,6 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
case ARM_OFF24: {
// The offset is given by the lower 24-bits of the op, shifted
// left 2 bits, and sign extended.
- // arm_op = aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn
- // *addr := SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000
- // *c_op := 00110000 aaaaaaaa
uint32_t temp = (arm_op & 0x00FFFFFF) << 2;
if (temp & 0x02000000)
temp |= 0xFC000000;
@@ -71,18 +71,6 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
break;
}
case ARM_OFF25: {
- // Encoding T4.
- // arm_op = aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn
- // where CD is in {01, 10, 11}
- // i1 := ~(j1 ^ S)
- // i2 := ~(j2 ^ S)
- // If CD == 10:
- // pppp := (rva % 4 == 0) ? 0100 : 0010
- // Else:
- // pppp := 0100
- // *addr := SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp
- // *c_op := 0100pppp aaaaaBCD
- // TODO(huangs): aaaaa = 11110 and B = 1 always? Investigate and fix.
uint32_t temp = 0;
temp |= (arm_op & 0x000007FF) << 1; // imm11
temp |= (arm_op & 0x03FF0000) >> 4; // imm10
@@ -90,8 +78,8 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
uint32_t S = (arm_op & (1 << 26)) >> 26;
uint32_t j2 = (arm_op & (1 << 11)) >> 11;
uint32_t j1 = (arm_op & (1 << 13)) >> 13;
- bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0; // D
- bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0; // C
+ bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
+ bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
uint32_t i2 = ~(j2 ^ S) & 1;
uint32_t i1 = ~(j1 ^ S) & 1;
@@ -103,7 +91,7 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
temp |= 0xFE000000;
uint32_t prefetch;
if (toARM) {
- // Align PC on 4-byte boundary.
+ // Align PC on 4-byte boundary
uint32_t align4byte = (rva % 4) ? 2 : 4;
prefetch = align4byte;
} else {
@@ -113,25 +101,20 @@ CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type,
(*addr) = temp;
uint32_t temp2 = 0x4000;
- temp2 |= (arm_op & (1 << 12)) >> 12; // .......D
- temp2 |= (arm_op & (1 << 14)) >> 13; // ......C.
- temp2 |= (arm_op & (1 << 15)) >> 13; // .....B..
- temp2 |= (arm_op & 0xF8000000) >> 24; // aaaaa...
+ temp2 |= (arm_op & (1 << 12)) >> 12;
+ temp2 |= (arm_op & (1 << 14)) >> 13;
+ temp2 |= (arm_op & (1 << 15)) >> 13;
+ temp2 |= (arm_op & 0xF8000000) >> 24;
temp2 |= (prefetch & 0x0000000F) << 8;
(*c_op) = static_cast<uint16_t>(temp2);
break;
}
case ARM_OFF21: {
- // Encoding T3.
- // arm_op = 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn
- // *addr := SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100
- // *c_op := 01010000 0000cccc
uint32_t temp = 0;
temp |= (arm_op & 0x000007FF) << 1; // imm11
temp |= (arm_op & 0x003F0000) >> 4; // imm6
uint32_t S = (arm_op & (1 << 26)) >> 26;
- // TODO(huangs): Check with docs: Perhaps j1, j2 should swap?
uint32_t j2 = (arm_op & (1 << 11)) >> 11;
uint32_t j1 = (arm_op & (1 << 13)) >> 13;
@@ -157,31 +140,20 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type,
uint16_t c_op,
uint32_t addr,
uint32_t* arm_op) {
+ // Reverses the process in the compress() method. Takes the
+ // Courgette op and relative address and reconstructs the original
+ // ARM or thumb op.
switch (type) {
case ARM_OFF8:
- // addr = SSSSSSSS SSSSSSSS SSSSSSSS nnnnnnn0 + 100
- // c_op = 00010000 aaaaaaaa
- // *arm_op := aaaaaaaa Snnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
break;
case ARM_OFF11:
- // addr = SSSSSSSS SSSSSSSS SSSSSnnn nnnnnnn0 + 100
- // c_op = 00100000 000aaaaa
- // *arm_op := aaaaaSnn nnnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
break;
case ARM_OFF24:
- // addr = SSSSSSSn nnnnnnnn nnnnnnnn nnnnnn00 + 1000
- // c_op = 00110000 aaaaaaaa
- // *arm_op := aaaaaaaa Snnnnnnn nnnnnnnn nnnnnnnn
(*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
break;
case ARM_OFF25: {
- // addr = SSSSSSSS (i1)(i2)mmmmmm mmmmnnnn nnnnnnn0 + pppp
- // c_op = 0100pppp aaaaaBCD
- // j1 := ~i1 ^ S
- // j2 := ~i2 ^ S
- // *arm_op := aaaaaSmm mmmmmmmm BC(j1)D(j2)nnn nnnnnnnn
uint32_t temp = 0;
temp |= (c_op & (1 << 0)) << 12;
temp |= (c_op & (1 << 1)) << 13;
@@ -211,9 +183,6 @@ CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type,
break;
}
case ARM_OFF21: {
- // addr = SSSSSSSS SSSS(j1)(j2)mm mmmmnnnn nnnnnnn0 + 100
- // c_op = 01010000 0000cccc
- // *arm_op := 11110Scc ccmmmmmm 10(j1)0(j2)nnn nnnnnnnn
uint32_t temp = 0xF0008000;
temp |= (c_op & (0x03C00000 >> 22)) << 22;
@@ -261,28 +230,24 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
const uint8_t* op_pointer) {
arm_op_ = op_pointer;
switch (type_) {
- case ARM_OFF8: // Falls through.
+ case ARM_OFF8:
+ // Fall through
case ARM_OFF11: {
RVA relative_target;
- CheckBool ret = Compress(type_,
- Read16LittleEndian(op_pointer),
- rva(),
- &c_op_,
- &relative_target);
+ CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
+ &c_op_, &relative_target);
set_relative_target(relative_target);
return ret;
}
case ARM_OFF24: {
RVA relative_target;
- CheckBool ret = Compress(type_,
- Read32LittleEndian(op_pointer),
- rva(),
- &c_op_,
- &relative_target);
+ CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
+ &c_op_, &relative_target);
set_relative_target(relative_target);
return ret;
}
- case ARM_OFF25: // Falls through.
+ case ARM_OFF25:
+ // Fall through
case ARM_OFF21: {
// A thumb-2 op is 32 bits stored as two 16-bit words
uint32_t pval = (Read16LittleEndian(op_pointer) << 16) |
@@ -307,24 +272,26 @@ CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
}
DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
- : DisassemblerElf32(start, length) {
+ : DisassemblerElf32(start, length) {
}
-// Convert an ELF relocation struction into an RVA.
+// Convert an ELF relocation struction into an RVA
CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
- // The rightmost byte of r_info is the type.
+
+ // The rightmost byte of r_info is the type...
elf32_rel_arm_type_values type =
- static_cast<elf32_rel_arm_type_values>(rel.r_info & 0xFF);
+ (elf32_rel_arm_type_values)(unsigned char)rel.r_info;
- // The other 3 bytes of r_info are the symbol.
+ // The other 3 bytes of r_info are the symbol
uint32_t symbol = rel.r_info >> 8;
- switch (type) {
+ switch(type)
+ {
case R_ARM_RELATIVE:
if (symbol != 0)
return false;
- // This is a basic ABS32 relocation address.
+ // This is a basic ABS32 relocation address
*result = rel.r_offset;
return true;
@@ -334,33 +301,32 @@ CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
}
CheckBool DisassemblerElf32ARM::ParseRelocationSection(
- const Elf32_Shdr* section_header,
- AssemblyProgram* program) {
- // This method compresses a contiguous stretch of R_ARM_RELATIVE entries in
- // the relocation table with a Courgette relocation table instruction.
- // It skips any entries at the beginning that appear in a section that
- // Courgette doesn't support, e.g. INIT.
- //
+ const Elf32_Shdr *section_header,
+ AssemblyProgram* program) {
+ // This method compresses a contiguous stretch of R_ARM_RELATIVE
+ // entries in the relocation table with a Courgette relocation table
+ // instruction. It skips any entries at the beginning that appear
+ // in a section that Courgette doesn't support, e.g. INIT.
// Specifically, the entries should be
// (1) In the same relocation table
// (2) Are consecutive
// (3) Are sorted in memory address order
//
- // Happily, this is normally the case, but it's not required by spec so we
- // check, and just don't do it if we don't match up.
+ // Happily, this is normally the case, but it's not required by spec
+ // so we check, and just don't do it if we don't match up.
//
- // The expectation is that one relocation section will contain all of our
- // R_ARM_RELATIVE entries in the expected order followed by assorted other
- // entries we can't use special handling for.
+ // The expectation is that one relocation section will contain
+ // all of our R_ARM_RELATIVE entries in the expected order followed
+ // by assorted other entries we can't use special handling for.
bool match = true;
- // Walk all the bytes in the section, matching relocation table or not.
- FileOffset file_offset = section_header->sh_offset;
- FileOffset section_end = section_header->sh_offset + section_header->sh_size;
+ // Walk all the bytes in the section, matching relocation table or not
+ size_t file_offset = section_header->sh_offset;
+ size_t section_end = section_header->sh_offset + section_header->sh_size;
- const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>(
- FileOffsetToPointer(section_header->sh_offset));
+ Elf32_Rel *section_relocs_iter =
+ (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
uint32_t section_relocs_count =
section_header->sh_size / section_header->sh_entsize;
@@ -371,15 +337,13 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
if (!abs32_locations_.empty()) {
std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
- for (uint32_t i = 0; i < section_relocs_count; ++i) {
+ for (uint32_t i = 0; i < section_relocs_count; i++) {
if (section_relocs_iter->r_offset == *reloc_iter)
break;
- if (!ParseSimpleRegion(file_offset,
- file_offset + sizeof(Elf32_Rel),
- program)) {
+ if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel),
+ program))
return false;
- }
file_offset += sizeof(Elf32_Rel);
++section_relocs_iter;
@@ -387,12 +351,11 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
while (match && (reloc_iter != abs32_locations_.end())) {
if (section_relocs_iter->r_info != R_ARM_RELATIVE ||
- section_relocs_iter->r_offset != *reloc_iter) {
+ section_relocs_iter->r_offset != *reloc_iter)
match = false;
- }
- ++section_relocs_iter;
- ++reloc_iter;
+ section_relocs_iter++;
+ reloc_iter++;
file_offset += sizeof(Elf32_Rel);
}
@@ -406,119 +369,118 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
-// TODO(huangs): Detect and avoid overlap with abs32 addresses.
CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
const Elf32_Shdr* section_header) {
- FileOffset start_file_offset = section_header->sh_offset;
- FileOffset end_file_offset = start_file_offset + section_header->sh_size;
+ uint32_t start_file_offset = section_header->sh_offset;
+ uint32_t end_file_offset = start_file_offset + section_header->sh_size;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract |pointer_to_rva|.
+ // subtract 'pointer_to_rva'.
const uint8_t* const adjust_pointer_to_rva =
start_pointer - section_header->sh_addr;
// Find the rel32 relocations.
const uint8_t* p = start_pointer;
- bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
+ bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
while (p < end_pointer) {
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- scoped_ptr<TypedRVAARM> rel32_rva;
+
+ TypedRVAARM* rel32_rva = NULL;
RVA target_rva = 0;
bool found = false;
// 16-bit thumb ops
- if (!found && p + 3 <= end_pointer) {
+ if (!found && (p + 3) <= end_pointer) {
uint16_t pval = Read16LittleEndian(p);
if ((pval & 0xF000) == 0xD000) {
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva.reset(new TypedRVAARM(ARM_OFF8, rva));
- if (!rel32_rva->ComputeRelativeTarget(p))
+ rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
+ if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
return false;
-
+ }
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
} else if ((pval & 0xF800) == 0xE000) {
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva.reset(new TypedRVAARM(ARM_OFF11, rva));
- if (!rel32_rva->ComputeRelativeTarget(p))
+ rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
+ if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
return false;
-
+ }
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
- // thumb-2 ops comprised of two 16-bit words.
- if (!found && p + 5 <= end_pointer) {
+ // thumb-2 ops comprised of two 16-bit words
+ if (!found && (p + 5) <= end_pointer) {
// This is really two 16-bit words, not one 32-bit word.
uint32_t pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
if ((pval & 0xF8008000) == 0xF0008000) {
// Covers thumb-2's 32-bit conditional/unconditional branches
- if ((pval & (1 << 14)) || (pval & (1 << 12))) {
+
+ if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
// A branch, with link, or with link and exchange.
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva.reset(new TypedRVAARM(ARM_OFF25, rva));
- if (!rel32_rva->ComputeRelativeTarget(p))
+ rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
+ if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
return false;
-
+ }
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
-
} else {
// TODO(paulgazz) make sure cond is not 111
// A conditional branch instruction
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva.reset(new TypedRVAARM(ARM_OFF21, rva));
- if (!rel32_rva->ComputeRelativeTarget(p))
+ rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
+ if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
return false;
-
+ }
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
}
- // 32-bit ARM ops.
+ // 32-bit ARM ops
if (!found && on_32bit && (p + 5) <= end_pointer) {
uint32_t pval = Read32LittleEndian(p);
if ((pval & 0x0E000000) == 0x0A000000) {
// Covers both 0x0A 0x0B ARM relative branches
RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- rel32_rva.reset(new TypedRVAARM(ARM_OFF24, rva));
- if (!rel32_rva->ComputeRelativeTarget(p))
+ rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
+ if (!rel32_rva->ComputeRelativeTarget((uint8_t*)p)) {
return false;
-
+ }
target_rva = rel32_rva->rva() + rel32_rva->relative_target();
found = true;
}
}
- if (found && IsValidTargetRVA(target_rva)) {
- uint16_t op_size = rel32_rva->op_size();
- rel32_locations_.push_back(rel32_rva.release());
+ if (found && IsValidRVA(target_rva)) {
+ rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
#endif
- p += op_size;
+ p += rel32_rva->op_size();
- // A tricky way to update the on_32bit flag. Here is the truth table:
+ // A tricky way to update the on_32bit flag. Here is the truth table:
// on_32bit | on_32bit size is 4
// ---------+---------------------
// 1 | 0 0
// 0 | 0 1
// 0 | 1 0
// 1 | 1 1
- on_32bit = (~(on_32bit ^ (op_size == 4))) != 0;
+ on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
} else {
// Move 2 bytes at a time, but track 32-bit boundaries
p += 2;
diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h
index 5dc6897..17ebb25 100644
--- a/courgette/disassembler_elf_32_arm.h
+++ b/courgette/disassembler_elf_32_arm.h
@@ -8,10 +8,9 @@
#include <stddef.h>
#include <stdint.h>
-#include <map>
-
#include "base/macros.h"
#include "courgette/disassembler_elf_32.h"
+#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
namespace courgette {
@@ -31,60 +30,51 @@ class DisassemblerElf32ARM : public DisassemblerElf32 {
class TypedRVAARM : public TypedRVA {
public:
TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { }
- ~TypedRVAARM() override { }
-
- // TypedRVA interfaces.
- CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override;
- CheckBool EmitInstruction(AssemblyProgram* program,
- RVA target_rva) override;
- uint16_t op_size() const override;
uint16_t c_op() const { return c_op_; }
+ virtual CheckBool ComputeRelativeTarget(const uint8_t* op_pointer);
+
+ virtual CheckBool EmitInstruction(AssemblyProgram* program,
+ RVA target_rva);
+
+ virtual uint16_t op_size() const;
+
private:
ARM_RVA type_;
- uint16_t c_op_; // Set by ComputeRelativeTarget().
+
+ uint16_t c_op_; // set by ComputeRelativeTarget()
const uint8_t* arm_op_;
};
- DisassemblerElf32ARM(const void* start, size_t length);
+ explicit DisassemblerElf32ARM(const void* start, size_t length);
- ~DisassemblerElf32ARM() override { }
+ virtual ExecutableType kind() { return EXE_ELF_32_ARM; }
- // DisassemblerElf32 interfaces.
- ExecutableType kind() const override { return EXE_ELF_32_ARM; }
- e_machine_values ElfEM() const override { return EM_ARM; }
+ virtual e_machine_values ElfEM() { return EM_ARM; }
- // Takes an ARM or thumb opcode |arm_op| of specified |type| and located at
- // |rva|, extracts the instruction-relative target RVA into |*addr| and
- // encodes the corresponding Courgette opcode as |*c_op|.
- //
- // Details on ARM opcodes, and target RVA extraction are taken from
- // "ARM Architecture Reference Manual", section A4.1.5 and
- // "Thumb-2 supplement", section 4.6.12.
- // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
static CheckBool Compress(ARM_RVA type,
uint32_t arm_op,
RVA rva,
uint16_t* c_op /* out */,
uint32_t* addr /* out */);
- // Inverse for Compress(). Takes Courgette op |c_op| and relative address
- // |addr| to reconstruct the original ARM or thumb op |*arm_op|.
static CheckBool Decompress(ARM_RVA type,
uint16_t c_op,
uint32_t addr,
uint32_t* arm_op /* out */);
protected:
- // DisassemblerElf32 interfaces.
- CheckBool RelToRVA(Elf32_Rel rel,
- RVA* result) const override WARN_UNUSED_RESULT;
- CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
- AssemblyProgram* program)
- override WARN_UNUSED_RESULT;
- CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
- override WARN_UNUSED_RESULT;
+
+ virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
+ const WARN_UNUSED_RESULT;
+
+ virtual CheckBool ParseRelocationSection(
+ const Elf32_Shdr *section_header,
+ AssemblyProgram* program) WARN_UNUSED_RESULT;
+
+ virtual CheckBool ParseRel32RelocsFromSection(
+ const Elf32_Shdr* section) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
std::map<RVA, int> rel32_target_rvas_;
diff --git a/courgette/disassembler_elf_32_x86.cc b/courgette/disassembler_elf_32_x86.cc
index 45f7cf6..98084c1 100644
--- a/courgette/disassembler_elf_32_x86.cc
+++ b/courgette/disassembler_elf_32_x86.cc
@@ -4,45 +4,37 @@
#include "courgette/disassembler_elf_32_x86.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
#include <vector>
#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
+
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
namespace courgette {
-CheckBool DisassemblerElf32X86::TypedRVAX86::ComputeRelativeTarget(
- const uint8_t* op_pointer) {
- set_relative_target(Read32LittleEndian(op_pointer) + 4);
- return true;
-}
-
-CheckBool DisassemblerElf32X86::TypedRVAX86::EmitInstruction(
- AssemblyProgram* program,
- RVA target_rva) {
- return program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
-}
-
-uint16_t DisassemblerElf32X86::TypedRVAX86::op_size() const {
- return 4;
-}
-
DisassemblerElf32X86::DisassemblerElf32X86(const void* start, size_t length)
- : DisassemblerElf32(start, length) {
+ : DisassemblerElf32(start, length) {
}
-// Convert an ELF relocation struction into an RVA.
+// Convert an ELF relocation struction into an RVA
CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
- // The rightmost byte of r_info is the type.
+
+ // The rightmost byte of r_info is the type...
elf32_rel_386_type_values type =
- static_cast<elf32_rel_386_type_values>(rel.r_info & 0xFF);
+ (elf32_rel_386_type_values)(unsigned char)rel.r_info;
- // The other 3 bytes of r_info are the symbol.
+ // The other 3 bytes of r_info are the symbol
uint32_t symbol = rel.r_info >> 8;
- switch (type) {
+ switch(type)
+ {
case R_386_NONE:
case R_386_32:
case R_386_PC32:
@@ -57,7 +49,7 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
if (symbol != 0)
return false;
- // This is a basic ABS32 relocation address.
+ // This is a basic ABS32 relocation address
*result = rel.r_offset;
return true;
@@ -71,31 +63,32 @@ CheckBool DisassemblerElf32X86::RelToRVA(Elf32_Rel rel, RVA* result) const {
}
CheckBool DisassemblerElf32X86::ParseRelocationSection(
- const Elf32_Shdr* section_header,
- AssemblyProgram* program) {
- // We can reproduce the R_386_RELATIVE entries in one of the relocation table
- // based on other information in the patch, given these conditions:
+ const Elf32_Shdr *section_header,
+ AssemblyProgram* program) {
+ // We can reproduce the R_386_RELATIVE entries in one of the relocation
+ // table based on other information in the patch, given these
+ // conditions....
//
// All R_386_RELATIVE entries are:
// 1) In the same relocation table
// 2) Are consecutive
// 3) Are sorted in memory address order
//
- // Happily, this is normally the case, but it's not required by spec, so we
- // check, and just don't do it if we don't match up.
+ // Happily, this is normally the case, but it's not required by spec
+ // so we check, and just don't do it if we don't match up.
- // The expectation is that one relocation section will contain all of our
- // R_386_RELATIVE entries in the expected order followed by assorted other
- // entries we can't use special handling for.
+ // The expectation is that one relocation section will contain
+ // all of our R_386_RELATIVE entries in the expected order followed
+ // by assorted other entries we can't use special handling for.
bool match = true;
- // Walk all the bytes in the section, matching relocation table or not.
- FileOffset file_offset = section_header->sh_offset;
- FileOffset section_end = file_offset + section_header->sh_size;
+ // Walk all the bytes in the section, matching relocation table or not
+ size_t file_offset = section_header->sh_offset;
+ size_t section_end = section_header->sh_offset + section_header->sh_size;
- const Elf32_Rel* section_relocs_iter = reinterpret_cast<const Elf32_Rel*>(
- FileOffsetToPointer(section_header->sh_offset));
+ Elf32_Rel *section_relocs_iter =
+ (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
uint32_t section_relocs_count =
section_header->sh_size / section_header->sh_entsize;
@@ -108,17 +101,16 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection(
std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
- while (match && (reloc_iter != abs32_locations_.end())) {
+ while (match && (reloc_iter != abs32_locations_.end())) {
if (section_relocs_iter->r_info != R_386_RELATIVE ||
- section_relocs_iter->r_offset != *reloc_iter) {
+ section_relocs_iter->r_offset != *reloc_iter)
match = false;
- }
- ++section_relocs_iter;
- ++reloc_iter;
+ section_relocs_iter++;
+ reloc_iter++;
}
if (match) {
- // Skip over relocation tables.
+ // Skip over relocation tables
if (!program->EmitElfRelocationInstruction())
return false;
file_offset += sizeof(Elf32_Rel) * abs32_locations_.size();
@@ -127,27 +119,28 @@ CheckBool DisassemblerElf32X86::ParseRelocationSection(
return ParseSimpleRegion(file_offset, section_end, program);
}
-// TODO(huangs): Detect and avoid overlap with abs32 addresses.
CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
const Elf32_Shdr* section_header) {
- FileOffset start_file_offset = section_header->sh_offset;
- FileOffset end_file_offset = start_file_offset + section_header->sh_size;
+ uint32_t start_file_offset = section_header->sh_offset;
+ uint32_t end_file_offset = start_file_offset + section_header->sh_size;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract |pointer_to_rva|.
+ // subtract 'pointer_to_rva'.
const uint8_t* const adjust_pointer_to_rva =
start_pointer - section_header->sh_addr;
// Find the rel32 relocations.
const uint8_t* p = start_pointer;
while (p < end_pointer) {
+ //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = nullptr;
+ const uint8_t* rel32 = NULL;
if (p + 5 <= end_pointer) {
if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
@@ -155,26 +148,32 @@ CheckBool DisassemblerElf32X86::ParseRel32RelocsFromSection(
}
}
if (p + 6 <= end_pointer) {
- if (*p == 0x0F && (p[1] & 0xF0) == 0x80) { // Jcc long form
+ if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
rel32 = p + 2;
}
}
if (rel32) {
RVA rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
- scoped_ptr<TypedRVAX86> rel32_rva(new TypedRVAX86(rva));
+ TypedRVAX86* rel32_rva = new TypedRVAX86(rva);
- if (!rel32_rva->ComputeRelativeTarget(rel32))
+ if (!rel32_rva->ComputeRelativeTarget(rel32)) {
+ delete rel32_rva;
return false;
+ }
RVA target_rva = rel32_rva->rva() + rel32_rva->relative_target();
- if (IsValidTargetRVA(target_rva)) {
- rel32_locations_.push_back(rel32_rva.release());
+ // To be valid, rel32 target must be within image, and within this
+ // section.
+ if (IsValidRVA(target_rva)) {
+ rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
#endif
p = rel32 + 4;
continue;
+ } else {
+ delete rel32_rva;
}
}
p += 1;
diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h
index 63be755..5c87d4c 100644
--- a/courgette/disassembler_elf_32_x86.h
+++ b/courgette/disassembler_elf_32_x86.h
@@ -8,10 +8,9 @@
#include <stddef.h>
#include <stdint.h>
-#include <map>
-
#include "base/macros.h"
#include "courgette/disassembler_elf_32.h"
+#include "courgette/memory_allocator.h"
#include "courgette/types_elf.h"
namespace courgette {
@@ -22,33 +21,38 @@ class DisassemblerElf32X86 : public DisassemblerElf32 {
public:
class TypedRVAX86 : public TypedRVA {
public:
- explicit TypedRVAX86(RVA rva) : TypedRVA(rva) { }
- ~TypedRVAX86() override { }
+ explicit TypedRVAX86(RVA rva) : TypedRVA(rva) {
+ }
+
+ CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override {
+ set_relative_target(Read32LittleEndian(op_pointer) + 4);
+ return true;
+ }
- // TypedRVA interfaces.
- CheckBool ComputeRelativeTarget(const uint8_t* op_pointer) override;
CheckBool EmitInstruction(AssemblyProgram* program,
- RVA target_rva) override;
- uint16_t op_size() const override;
+ RVA target_rva) override {
+ return program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
+ }
+
+ uint16_t op_size() const override { return 4; }
};
- DisassemblerElf32X86(const void* start, size_t length);
+ explicit DisassemblerElf32X86(const void* start, size_t length);
- ~DisassemblerElf32X86() override { }
+ virtual ExecutableType kind() { return EXE_ELF_32_X86; }
- // DisassemblerElf32 interfaces.
- ExecutableType kind() const override { return EXE_ELF_32_X86; }
- e_machine_values ElfEM() const override { return EM_386; }
+ virtual e_machine_values ElfEM() { return EM_386; }
protected:
- // DisassemblerElf32 interfaces.
- CheckBool RelToRVA(Elf32_Rel rel,
- RVA* result) const override WARN_UNUSED_RESULT;
- CheckBool ParseRelocationSection(const Elf32_Shdr* section_header,
- AssemblyProgram* program)
- override WARN_UNUSED_RESULT;
- CheckBool ParseRel32RelocsFromSection(const Elf32_Shdr* section)
- override WARN_UNUSED_RESULT;
+ virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
+ const WARN_UNUSED_RESULT;
+
+ virtual CheckBool ParseRelocationSection(
+ const Elf32_Shdr *section_header,
+ AssemblyProgram* program) WARN_UNUSED_RESULT;
+
+ virtual CheckBool ParseRel32RelocsFromSection(
+ const Elf32_Shdr* section) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
std::map<RVA, int> rel32_target_rvas_;
diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc
index c15b8df..3ce6a63 100644
--- a/courgette/disassembler_elf_32_x86_unittest.cc
+++ b/courgette/disassembler_elf_32_x86_unittest.cc
@@ -2,25 +2,16 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "courgette/disassembler_elf_32_x86.h"
-
#include <stddef.h>
#include <stdint.h>
-#include <algorithm>
-#include <string>
-
-#include "base/memory/scoped_ptr.h"
#include "courgette/assembly_program.h"
#include "courgette/base_test_unittest.h"
-#include "courgette/image_utils.h"
-
-namespace courgette {
-
-namespace {
+#include "courgette/disassembler_elf_32_x86.h"
class DisassemblerElf32X86Test : public BaseTest {
public:
+
void TestExe(const char* file_name,
size_t expected_abs_count,
size_t expected_rel_count) const;
@@ -29,11 +20,10 @@ class DisassemblerElf32X86Test : public BaseTest {
void DisassemblerElf32X86Test::TestExe(const char* file_name,
size_t expected_abs_count,
size_t expected_rel_count) const {
- using TypedRVA = DisassemblerElf32::TypedRVA;
std::string file1 = FileContents(file_name);
- scoped_ptr<DisassemblerElf32X86> disassembler(
- new DisassemblerElf32X86(file1.c_str(), file1.length()));
+ scoped_ptr<courgette::DisassemblerElf32X86> disassembler(
+ new courgette::DisassemblerElf32X86(file1.c_str(), file1.length()));
bool can_parse_header = disassembler->ParseHeader();
EXPECT_TRUE(can_parse_header);
@@ -43,7 +33,7 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
// real file, since trailing debug info is not included
EXPECT_EQ(file1.length(), disassembler->length());
- const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->OffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ(0x7F, offset_p[0]);
@@ -51,45 +41,46 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
EXPECT_EQ('L', offset_p[2]);
EXPECT_EQ('F', offset_p[3]);
- scoped_ptr<AssemblyProgram> program(new AssemblyProgram(EXE_ELF_32_X86));
-
- EXPECT_TRUE(disassembler->Disassemble(program.get()));
-
- const std::vector<RVA>& abs32_list = disassembler->Abs32Locations();
-
- // Flatten the list typed rel32 to a list of rel32 RVAs.
- std::vector<RVA> rel32_list;
- rel32_list.reserve(disassembler->Rel32Locations().size());
- for (TypedRVA* typed_rel32 : disassembler->Rel32Locations())
- rel32_list.push_back(typed_rel32->rva());
-
- EXPECT_EQ(expected_abs_count, abs32_list.size());
- EXPECT_EQ(expected_rel_count, rel32_list.size());
-
- EXPECT_TRUE(std::is_sorted(abs32_list.begin(), abs32_list.end()));
- EXPECT_TRUE(std::is_sorted(rel32_list.begin(), rel32_list.end()));
+ courgette::AssemblyProgram* program =
+ new courgette::AssemblyProgram(courgette::EXE_ELF_32_X86);
+
+ EXPECT_TRUE(disassembler->Disassemble(program));
+
+ EXPECT_EQ(disassembler->Abs32Locations().size(), expected_abs_count);
+ EXPECT_EQ(disassembler->Rel32Locations().size(), expected_rel_count);
+
+ // Prove that none of the rel32 RVAs overlap with abs32 RVAs
+ std::set<courgette::RVA> abs(disassembler->Abs32Locations().begin(),
+ disassembler->Abs32Locations().end());
+ std::set<courgette::DisassemblerElf32::TypedRVA*>
+ rel(disassembler->Rel32Locations().begin(),
+ disassembler->Rel32Locations().end());
+ for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator
+ rel32 = disassembler->Rel32Locations().begin();
+ rel32 != disassembler->Rel32Locations().end();
+ rel32++) {
+ EXPECT_TRUE(abs.find((*rel32)->rva()) == abs.end());
+ }
- // Verify that rel32 RVAs do not overlap with abs32 RVAs.
- // TODO(huangs): Fix this to account for RVA's 4-byte width.
- bool found_match = false;
- std::vector<RVA>::const_iterator abs32_it = abs32_list.begin();
- std::vector<RVA>::const_iterator rel32_it = rel32_list.begin();
- while (abs32_it != abs32_list.end() && rel32_it != rel32_list.end()) {
- if (*abs32_it < *rel32_it) {
- ++abs32_it;
- } else if (*abs32_it > *rel32_it) {
- ++rel32_it;
- } else {
- found_match = true;
+ for (std::vector<courgette::RVA>::iterator abs32 =
+ disassembler->Abs32Locations().begin();
+ abs32 != disassembler->Abs32Locations().end();
+ abs32++) {
+ bool found = false;
+ for (std::vector<courgette::DisassemblerElf32::TypedRVA*>::iterator
+ rel32 = disassembler->Rel32Locations().begin();
+ rel32 != disassembler->Rel32Locations().end();
+ rel32++) {
+ if (*abs32 == (*rel32)->rva()) {
+ found = true;
+ break;
+ }
}
+ EXPECT_TRUE(!found);
}
- EXPECT_FALSE(found_match);
+ delete program;
}
-} // namespace
-
TEST_F(DisassemblerElf32X86Test, All) {
TestExe("elf-32-1", 200, 3442);
}
-
-} // namespace courgette
diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc
index 819b7f2..74b0fe4 100644
--- a/courgette/disassembler_win32_x64.cc
+++ b/courgette/disassembler_win32_x64.cc
@@ -8,73 +8,37 @@
#include <stdint.h>
#include <algorithm>
-#include <iostream>
+#include <string>
+#include <vector>
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
+
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
namespace courgette {
DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length)
- : Disassembler(start, length),
- incomplete_disassembly_(false),
- is_PE32_plus_(false),
- optional_header_(nullptr),
- size_of_optional_header_(0),
- offset_of_data_directories_(0),
- machine_type_(0),
- number_of_sections_(0),
- sections_(nullptr),
- has_text_section_(false),
- size_of_code_(0),
- size_of_initialized_data_(0),
- size_of_uninitialized_data_(0),
- base_of_code_(0),
- base_of_data_(0),
- image_base_(0),
- size_of_image_(0),
- number_of_data_directories_(0) {
-}
-
-FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
- const Section* section = RVAToSection(rva);
- if (section != nullptr) {
- FileOffset offset_in_section = rva - section->virtual_address;
- // Need this extra check, since an |rva| may be valid for a section, but is
- // non-existent in an image (e.g. uninit data).
- if (offset_in_section >= section->size_of_raw_data)
- return kNoFileOffset;
-
- return static_cast<FileOffset>(section->file_offset_of_raw_data +
- offset_in_section);
- }
-
- // Small RVA values point into the file header in the loaded image.
- // RVA 0 is the module load address which Windows uses as the module handle.
- // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
- // DOS header.
- if (rva == 0 || rva == 2)
- return static_cast<FileOffset>(rva);
-
- NOTREACHED();
- return kNoFileOffset;
-}
-
-RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
- for (int i = 0; i < number_of_sections_; ++i) {
- const Section* section = &sections_[i];
- if (file_offset >= section->file_offset_of_raw_data) {
- FileOffset offset_in_section =
- file_offset - section->file_offset_of_raw_data;
- if (offset_in_section < section->size_of_raw_data)
- return static_cast<RVA>(section->virtual_address + offset_in_section);
- }
- }
-
- NOTREACHED();
- return kNoRVA;
+ : Disassembler(start, length),
+ incomplete_disassembly_(false),
+ is_PE32_plus_(false),
+ optional_header_(NULL),
+ size_of_optional_header_(0),
+ offset_of_data_directories_(0),
+ machine_type_(0),
+ number_of_sections_(0),
+ sections_(NULL),
+ has_text_section_(false),
+ size_of_code_(0),
+ size_of_initialized_data_(0),
+ size_of_uninitialized_data_(0),
+ base_of_code_(0),
+ base_of_data_(0),
+ image_base_(0),
+ size_of_image_(0),
+ number_of_data_directories_(0) {
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -93,19 +57,18 @@ bool DisassemblerWin32X64::ParseHeader() {
return Bad("Not MZ");
// offset from DOS header to PE header is stored in DOS header.
- FileOffset file_offset = static_cast<FileOffset>(
- ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader));
+ uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader);
- if (file_offset >= length())
+ if (offset >= length())
return Bad("Bad offset to PE header");
- const uint8_t* const pe_header = FileOffsetToPointer(file_offset);
+ const uint8_t* const pe_header = OffsetToPointer(offset);
const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
if (pe_header <= start() ||
pe_header >= end() - kMinPEHeaderSize)
- return Bad("Bad file offset to PE header");
+ return Bad("Bad offset to PE header");
- if (file_offset % 8 != 0)
+ if (offset % 8 != 0)
return Bad("Misaligned PE header");
// The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
@@ -206,7 +169,7 @@ bool DisassemblerWin32X64::ParseHeader() {
size_of_optional_header_);
size_t detected_length = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
// TODO(sra): consider using the 'characteristics' field of the section
@@ -304,7 +267,7 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) {
RVA rva = page_rva + offset;
// TODO(sebmarchand): Skip the relocs that live outside of the image. See
// the version of this function in disassembler_win32_x86.cc.
- if (type == 10) { // IMAGE_REL_BASED_DIR64
+ if (type == 10) { // IMAGE_REL_BASED_DIR64
relocs->push_back(rva);
} else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE
// Ignore, used as padding.
@@ -324,19 +287,48 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) {
}
const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const {
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; i++) {
const Section* section = &sections_[i];
- if (rva >= section->virtual_address) {
- FileOffset offset_in_section = rva - section->virtual_address;
- if (offset_in_section < section->virtual_size)
- return section;
+ uint32_t offset = rva - section->virtual_address;
+ if (offset < section->virtual_size) {
+ return section;
+ }
+ }
+ return NULL;
+}
+
+int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ if (section) {
+ uint32_t offset = rva - section->virtual_address;
+ if (offset < section->size_of_raw_data) {
+ return section->file_offset_of_raw_data + offset;
+ } else {
+ return kNoOffset; // In section but not in file (e.g. uninit data).
}
}
- return nullptr;
+
+ // Small RVA values point into the file header in the loaded image.
+ // RVA 0 is the module load address which Windows uses as the module handle.
+ // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+ // DOS header.
+ if (rva == 0 || rva == 2)
+ return rva;
+
+ NOTREACHED();
+ return kNoOffset;
+}
+
+const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const {
+ int file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoOffset)
+ return NULL;
+ else
+ return OffsetToPointer(file_offset);
}
std::string DisassemblerWin32X64::SectionName(const Section* section) {
- if (section == nullptr)
+ if (section == NULL)
return "<none>";
char name[9];
memcpy(name, section->name, 8);
@@ -346,25 +338,24 @@ std::string DisassemblerWin32X64::SectionName(const Section* section) {
CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- FileOffset file_offset = 0;
+ uint32_t file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == nullptr) {
- // No more sections. There should not be extra stuff following last
+ if (section == NULL) {
+ // No more sections. There should not be extra stuff following last
// section.
// ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
break;
}
if (file_offset < section->file_offset_of_raw_data) {
- FileOffset section_start_offset = section->file_offset_of_raw_data;
- if (!ParseNonSectionFileRegion(file_offset, section_start_offset,
- program)) {
+ uint32_t section_start_offset = section->file_offset_of_raw_data;
+ if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
+ program))
return false;
- }
file_offset = section_start_offset;
}
- FileOffset end = file_offset + section->size_of_raw_data;
+ uint32_t end = file_offset + section->size_of_raw_data;
if (!ParseFileRegion(section, file_offset, end, program))
return false;
file_offset = end;
@@ -384,7 +375,7 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
return false;
#if COURGETTE_HISTOGRAM_TARGETS
- for (size_t i = 0; i < abs32_locations_.size(); ++i) {
+ for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
@@ -395,10 +386,10 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
}
void DisassemblerWin32X64::ParseRel32RelocsFromSections() {
- FileOffset file_offset = 0;
+ uint32_t file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == nullptr)
+ if (section == NULL)
break;
if (file_offset < section->file_offset_of_raw_data)
file_offset = section->file_offset_of_raw_data;
@@ -420,11 +411,11 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() {
std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
while (abs32_iter != abs32_target_rvas_.end() &&
rel32_iter != rel32_target_rvas_.end()) {
- if (abs32_iter->first < rel32_iter->first) {
+ if (abs32_iter->first < rel32_iter->first)
++abs32_iter;
- } else if (rel32_iter->first < abs32_iter->first) {
+ else if (rel32_iter->first < abs32_iter->first)
++rel32_iter;
- } else {
+ else {
++common;
++abs32_iter;
++rel32_iter;
@@ -440,18 +431,18 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
if (!isCode)
return;
- FileOffset start_file_offset = section->file_offset_of_raw_data;
- FileOffset end_file_offset = start_file_offset + section->size_of_raw_data;
+ uint32_t start_file_offset = section->file_offset_of_raw_data;
+ uint32_t end_file_offset = start_file_offset + section->size_of_raw_data;
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
// Quick way to convert from Pointer to RVA within a single Section is to
- // subtract |pointer_to_rva|.
+ // subtract 'pointer_to_rva'.
const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva;
std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
@@ -468,10 +459,13 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
}
}
+ //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
+ // ++abs32_pos;
+
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = nullptr;
+ const uint8_t* rel32 = NULL;
bool is_rip_relative = false;
if (p + 5 <= end_pointer) {
@@ -522,7 +516,7 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
// To be valid, rel32 target must be within image, and within this
// section.
- if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA.
+ if (IsValidRVA(target_rva) &&
(is_rip_relative ||
(start_rva <= target_rva && target_rva < end_rva))) {
rel32_locations_.push_back(rel32_rva);
@@ -538,14 +532,14 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) {
}
CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion(
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) {
if (incomplete_disassembly_)
return true;
if (end_file_offset > start_file_offset) {
- if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
+ if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset),
end_file_offset - start_file_offset)) {
return false;
}
@@ -555,13 +549,13 @@ CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion(
}
CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section,
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) {
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
@@ -670,7 +664,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind,
size_t count = p->second.size();
std::cout << std::dec << p->first << ": " << count;
if (count <= 2) {
- for (size_t i = 0; i < count; ++i)
+ for (size_t i = 0; i < count; ++i)
std::cout << " " << DescribeRVA(p->second[i]);
}
std::cout << std::endl;
@@ -682,6 +676,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind,
}
#endif // COURGETTE_HISTOGRAM_TARGETS
+
// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
// that during development I'm finding I need to call it when compiled in
// Release mode. Hence:
@@ -700,12 +695,12 @@ std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const {
}
const Section* DisassemblerWin32X64::FindNextSection(
- FileOffset file_offset) const {
+ uint32_t fileOffset) const {
const Section* best = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; i++) {
const Section* section = &sections_[i];
if (section->size_of_raw_data > 0) { // i.e. has data in file.
- if (file_offset <= section->file_offset_of_raw_data) {
+ if (fileOffset <= section->file_offset_of_raw_data) {
if (best == 0 ||
section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
best = section;
@@ -716,15 +711,26 @@ const Section* DisassemblerWin32X64::FindNextSection(
return best;
}
+RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const {
+ for (int i = 0; i < number_of_sections_; i++) {
+ const Section* section = &sections_[i];
+ uint32_t offset = file_offset - section->file_offset_of_raw_data;
+ if (offset < section->size_of_raw_data) {
+ return section->virtual_address + offset;
+ }
+ }
+ return 0;
+}
+
bool DisassemblerWin32X64::ReadDataDirectory(
int index,
ImageDataDirectory* directory) {
if (index < number_of_data_directories_) {
- FileOffset file_offset = index * 8 + offset_of_data_directories_;
- if (file_offset >= size_of_optional_header_)
+ size_t offset = index * 8 + offset_of_data_directories_;
+ if (offset >= size_of_optional_header_)
return Bad("number of data directories inconsistent");
- const uint8_t* data_directory = optional_header_ + file_offset;
+ const uint8_t* data_directory = optional_header_ + offset;
if (data_directory < start() ||
data_directory + 8 >= end())
return Bad("data directory outside image");
diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h
index 20cfc7e..23aee66 100644
--- a/courgette/disassembler_win32_x64.h
+++ b/courgette/disassembler_win32_x64.h
@@ -8,16 +8,15 @@
#include <stddef.h>
#include <stdint.h>
-#include <map>
-#include <string>
-#include <vector>
-
#include "base/macros.h"
#include "courgette/disassembler.h"
-#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_win_pe.h"
+#ifdef COURGETTE_HISTOGRAM_TARGETS
+#include <map>
+#endif
+
namespace courgette {
class AssemblyProgram;
@@ -26,14 +25,19 @@ class DisassemblerWin32X64 : public Disassembler {
public:
explicit DisassemblerWin32X64(const void* start, size_t length);
- // Disassembler interfaces.
- RVA FileOffsetToRVA(FileOffset file_offset) const override;
- FileOffset RVAToFileOffset(RVA rva) const override;
- ExecutableType kind() const override { return EXE_WIN_32_X64; }
- bool ParseHeader() override;
- bool Disassemble(AssemblyProgram* target) override;
+ virtual ExecutableType kind() { return EXE_WIN_32_X64; }
+
+ // Returns 'true' if the buffer appears to point to a Windows 32 bit
+ // executable, 'false' otherwise. If ParseHeader() succeeds, other member
+ // functions may be called.
+ virtual bool ParseHeader();
+ virtual bool Disassemble(AssemblyProgram* target);
+
+ //
// Exposed for test purposes
+ //
+
bool has_text_section() const { return has_text_section_; }
uint32_t size_of_code() const { return size_of_code_; }
bool is_32bit() const { return !is_PE32_plus_; }
@@ -43,9 +47,17 @@ class DisassemblerWin32X64 : public Disassembler {
// that are listed in the base relocation table.
bool ParseRelocs(std::vector<RVA> *addresses);
- // Returns Section containing the relative virtual address, or null if none.
+ // Returns Section containing the relative virtual address, or NULL if none.
const Section* RVAToSection(RVA rva) const;
+ static const int kNoOffset = -1;
+ // Returns kNoOffset if there is no file offset corresponding to 'rva'.
+ int RVAToFileOffset(RVA rva) const;
+
+ // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
+ // is returned if there is no file offset corresponding to 'rva'.
+ const uint8_t* RVAToPointer(RVA rva) const;
+
static std::string SectionName(const Section* section);
protected:
@@ -54,46 +66,62 @@ class DisassemblerWin32X64 : public Disassembler {
void ParseRel32RelocsFromSections();
void ParseRel32RelocsFromSection(const Section* section);
- CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset,
- FileOffset end_file_offset,
+ CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program)
WARN_UNUSED_RESULT;
CheckBool ParseFileRegion(const Section* section,
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
#endif
- // Most addresses are represented as 32-bit RVAs. The one address we can't
- // do this with is the image base address.
+ // Most addresses are represented as 32-bit RVAs. The one address we can't
+ // do this with is the image base address. 'image_base' is valid only for
+ // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
uint64_t image_base() const { return image_base_; }
const ImageDataDirectory& base_relocation_table() const {
return base_relocation_table_;
}
- // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
+ // Subsumes rva != kUnassignedRVA.
+ bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
+
+ // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
std::string DescribeRVA(RVA rva) const;
- // Finds the first section at file_offset or above. Does not return sections
+ // Finds the first section at file_offset or above. Does not return sections
// that have no raw bytes in the file.
- const Section* FindNextSection(FileOffset file_offset) const;
+ const Section* FindNextSection(uint32_t file_offset) const;
+
+ // There are 2 'coordinate systems' for reasoning about executables.
+ // FileOffset - the the offset within a single .EXE or .DLL *file*.
+ // RVA - relative virtual address (offset within *loaded image*)
+ // FileOffsetToRVA and RVAToFileOffset convert between these representations.
+
+ RVA FileOffsetToRVA(uint32_t offset) const;
private:
+
bool ReadDataDirectory(int index, ImageDataDirectory* dir);
- bool incomplete_disassembly_; // true if can omit "uninteresting" bits.
+ bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
std::vector<RVA> abs32_locations_;
std::vector<RVA> rel32_locations_;
//
- // Information that is valid after ParseHeader() succeeds.
+ // Fields that are always valid.
//
- bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
+
+ //
+ // Information that is valid after successful ParseHeader.
+ //
+ bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
// Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
const uint8_t* optional_header_;
@@ -130,9 +158,9 @@ class DisassemblerWin32X64 : public Disassembler {
std::map<RVA, int> rel32_target_rvas_;
#endif
+
DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X64);
};
} // namespace courgette
-
#endif // COURGETTE_DISASSEMBLER_WIN32_X64_H_
diff --git a/courgette/disassembler_win32_x64_unittest.cc b/courgette/disassembler_win32_x64_unittest.cc
index 1121c10..8f732b3 100644
--- a/courgette/disassembler_win32_x64_unittest.cc
+++ b/courgette/disassembler_win32_x64_unittest.cc
@@ -6,9 +6,6 @@
#include <stdint.h>
-#include <string>
-#include <vector>
-
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "courgette/base_test_unittest.h"
@@ -40,16 +37,16 @@ void DisassemblerWin32X64Test::TestExe() const {
disassembler->RVAToSection(0x00401234 - 0x00400000)),
std::string(".text"));
- EXPECT_EQ(0U, disassembler->RVAToFileOffset(0));
- EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096));
- EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000));
+ EXPECT_EQ(0, disassembler->RVAToFileOffset(0));
+ EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096));
+ EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000));
std::vector<courgette::RVA> relocs;
bool can_parse_relocs = disassembler->ParseRelocs(&relocs);
EXPECT_TRUE(can_parse_relocs);
EXPECT_TRUE(base::STLIsSorted(relocs));
- const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->OffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ('M', offset_p[0]);
diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc
index 07bdfbc..aed26c7 100644
--- a/courgette/disassembler_win32_x86.cc
+++ b/courgette/disassembler_win32_x86.cc
@@ -8,73 +8,37 @@
#include <stdint.h>
#include <algorithm>
-#include <iostream>
+#include <string>
+#include <vector>
#include "base/logging.h"
+
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
#include "courgette/rel32_finder_win32_x86.h"
namespace courgette {
DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
- : Disassembler(start, length),
- incomplete_disassembly_(false),
- is_PE32_plus_(false),
- optional_header_(nullptr),
- size_of_optional_header_(0),
- offset_of_data_directories_(0),
- machine_type_(0),
- number_of_sections_(0),
- sections_(nullptr),
- has_text_section_(false),
- size_of_code_(0),
- size_of_initialized_data_(0),
- size_of_uninitialized_data_(0),
- base_of_code_(0),
- base_of_data_(0),
- image_base_(0),
- size_of_image_(0),
- number_of_data_directories_(0) {
-}
-
-FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
- const Section* section = RVAToSection(rva);
- if (section != nullptr) {
- FileOffset offset_in_section = rva - section->virtual_address;
- // Need this extra check, since an |rva| may be valid for a section, but is
- // non-existent in an image (e.g. uninit data).
- if (offset_in_section >= section->size_of_raw_data)
- return kNoFileOffset;
-
- return static_cast<FileOffset>(section->file_offset_of_raw_data +
- offset_in_section);
- }
-
- // Small RVA values point into the file header in the loaded image.
- // RVA 0 is the module load address which Windows uses as the module handle.
- // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
- // DOS header.
- if (rva == 0 || rva == 2)
- return static_cast<FileOffset>(rva);
-
- NOTREACHED();
- return kNoFileOffset;
-}
-
-RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
- for (int i = 0; i < number_of_sections_; ++i) {
- const Section* section = &sections_[i];
- if (file_offset >= section->file_offset_of_raw_data) {
- FileOffset offset_in_section =
- file_offset - section->file_offset_of_raw_data;
- if (offset_in_section < section->size_of_raw_data)
- return static_cast<RVA>(section->virtual_address + offset_in_section);
- }
- }
-
- NOTREACHED();
- return kNoRVA;
+ : Disassembler(start, length),
+ incomplete_disassembly_(false),
+ is_PE32_plus_(false),
+ optional_header_(NULL),
+ size_of_optional_header_(0),
+ offset_of_data_directories_(0),
+ machine_type_(0),
+ number_of_sections_(0),
+ sections_(NULL),
+ has_text_section_(false),
+ size_of_code_(0),
+ size_of_initialized_data_(0),
+ size_of_uninitialized_data_(0),
+ base_of_code_(0),
+ base_of_data_(0),
+ image_base_(0),
+ size_of_image_(0),
+ number_of_data_directories_(0) {
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -93,19 +57,18 @@ bool DisassemblerWin32X86::ParseHeader() {
return Bad("Not MZ");
// offset from DOS header to PE header is stored in DOS header.
- FileOffset file_offset = static_cast<FileOffset>(
- ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader));
+ uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader);
- if (file_offset >= length())
+ if (offset >= length())
return Bad("Bad offset to PE header");
- const uint8_t* const pe_header = FileOffsetToPointer(file_offset);
+ const uint8_t* const pe_header = OffsetToPointer(offset);
const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
if (pe_header <= start() ||
pe_header >= end() - kMinPEHeaderSize)
- return Bad("Bad file offset to PE header");
+ return Bad("Bad offset to PE header");
- if (file_offset % 8 != 0)
+ if (offset % 8 != 0)
return Bad("Misaligned PE header");
// The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
@@ -206,7 +169,7 @@ bool DisassemblerWin32X86::ParseHeader() {
size_of_optional_header_);
size_t detected_length = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
// TODO(sra): consider using the 'characteristics' field of the section
@@ -330,19 +293,48 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
}
const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; i++) {
const Section* section = &sections_[i];
- if (rva >= section->virtual_address) {
- FileOffset offset_in_section = rva - section->virtual_address;
- if (offset_in_section < section->virtual_size)
- return section;
+ uint32_t offset = rva - section->virtual_address;
+ if (offset < section->virtual_size) {
+ return section;
+ }
+ }
+ return NULL;
+}
+
+int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ if (section) {
+ uint32_t offset = rva - section->virtual_address;
+ if (offset < section->size_of_raw_data) {
+ return section->file_offset_of_raw_data + offset;
+ } else {
+ return kNoOffset; // In section but not in file (e.g. uninit data).
}
}
- return nullptr;
+
+ // Small RVA values point into the file header in the loaded image.
+ // RVA 0 is the module load address which Windows uses as the module handle.
+ // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+ // DOS header.
+ if (rva == 0 || rva == 2)
+ return rva;
+
+ NOTREACHED();
+ return kNoOffset;
+}
+
+const uint8_t* DisassemblerWin32X86::RVAToPointer(RVA rva) const {
+ int file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoOffset)
+ return NULL;
+ else
+ return OffsetToPointer(file_offset);
}
std::string DisassemblerWin32X86::SectionName(const Section* section) {
- if (section == nullptr)
+ if (section == NULL)
return "<none>";
char name[9];
memcpy(name, section->name, 8);
@@ -352,25 +344,24 @@ std::string DisassemblerWin32X86::SectionName(const Section* section) {
CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
// Walk all the bytes in the file, whether or not in a section.
- FileOffset file_offset = 0;
+ uint32_t file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == nullptr) {
- // No more sections. There should not be extra stuff following last
+ if (section == NULL) {
+ // No more sections. There should not be extra stuff following last
// section.
// ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
break;
}
if (file_offset < section->file_offset_of_raw_data) {
- FileOffset section_start_offset = section->file_offset_of_raw_data;
- if (!ParseNonSectionFileRegion(file_offset, section_start_offset,
- program)) {
+ uint32_t section_start_offset = section->file_offset_of_raw_data;
+ if(!ParseNonSectionFileRegion(file_offset, section_start_offset,
+ program))
return false;
- }
file_offset = section_start_offset;
}
- FileOffset end = file_offset + section->size_of_raw_data;
+ uint32_t end = file_offset + section->size_of_raw_data;
if (!ParseFileRegion(section, file_offset, end, program))
return false;
file_offset = end;
@@ -390,7 +381,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
return false;
#if COURGETTE_HISTOGRAM_TARGETS
- for (size_t i = 0; i < abs32_locations_.size(); ++i) {
+ for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
@@ -401,10 +392,10 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
}
void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
- FileOffset file_offset = 0;
+ uint32_t file_offset = 0;
while (file_offset < length()) {
const Section* section = FindNextSection(file_offset);
- if (section == nullptr)
+ if (section == NULL)
break;
if (file_offset < section->file_offset_of_raw_data)
file_offset = section->file_offset_of_raw_data;
@@ -426,11 +417,11 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
while (abs32_iter != abs32_target_rvas_.end() &&
rel32_iter != rel32_target_rvas_.end()) {
- if (abs32_iter->first < rel32_iter->first) {
+ if (abs32_iter->first < rel32_iter->first)
++abs32_iter;
- } else if (rel32_iter->first < abs32_iter->first) {
+ else if (rel32_iter->first < abs32_iter->first)
++rel32_iter;
- } else {
+ else {
++common;
++abs32_iter;
++rel32_iter;
@@ -446,18 +437,19 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
if (!isCode)
return;
- FileOffset start_file_offset = section->file_offset_of_raw_data;
- FileOffset end_file_offset = start_file_offset + section->size_of_raw_data;
+ uint32_t start_file_offset = section->file_offset_of_raw_data;
+ uint32_t end_file_offset = start_file_offset + section->size_of_raw_data;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
Rel32FinderWin32X86_Basic finder(
base_relocation_table().address_,
- base_relocation_table().address_ + base_relocation_table().size_);
+ base_relocation_table().address_ + base_relocation_table().size_,
+ size_of_image_);
finder.Find(start_pointer, end_pointer, start_rva, end_rva, abs32_locations_);
finder.SwapRel32Locations(&rel32_locations_);
@@ -468,14 +460,14 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
}
CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) {
if (incomplete_disassembly_)
return true;
if (end_file_offset > start_file_offset) {
- if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
+ if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset),
end_file_offset - start_file_offset)) {
return false;
}
@@ -485,13 +477,13 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
}
CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section,
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) {
RVA relocs_start_rva = base_relocation_table().address_;
- const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset);
- const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset);
+ const uint8_t* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8_t* end_pointer = OffsetToPointer(end_file_offset);
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
@@ -600,7 +592,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
size_t count = p->second.size();
std::cout << std::dec << p->first << ": " << count;
if (count <= 2) {
- for (size_t i = 0; i < count; ++i)
+ for (size_t i = 0; i < count; ++i)
std::cout << " " << DescribeRVA(p->second[i]);
}
std::cout << std::endl;
@@ -612,6 +604,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
}
#endif // COURGETTE_HISTOGRAM_TARGETS
+
// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
// that during development I'm finding I need to call it when compiled in
// Release mode. Hence:
@@ -630,12 +623,12 @@ std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const {
}
const Section* DisassemblerWin32X86::FindNextSection(
- FileOffset file_offset) const {
+ uint32_t fileOffset) const {
const Section* best = 0;
- for (int i = 0; i < number_of_sections_; ++i) {
+ for (int i = 0; i < number_of_sections_; i++) {
const Section* section = &sections_[i];
if (section->size_of_raw_data > 0) { // i.e. has data in file.
- if (file_offset <= section->file_offset_of_raw_data) {
+ if (fileOffset <= section->file_offset_of_raw_data) {
if (best == 0 ||
section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
best = section;
@@ -646,15 +639,26 @@ const Section* DisassemblerWin32X86::FindNextSection(
return best;
}
+RVA DisassemblerWin32X86::FileOffsetToRVA(uint32_t file_offset) const {
+ for (int i = 0; i < number_of_sections_; i++) {
+ const Section* section = &sections_[i];
+ uint32_t offset = file_offset - section->file_offset_of_raw_data;
+ if (offset < section->size_of_raw_data) {
+ return section->virtual_address + offset;
+ }
+ }
+ return 0;
+}
+
bool DisassemblerWin32X86::ReadDataDirectory(
int index,
ImageDataDirectory* directory) {
if (index < number_of_data_directories_) {
- FileOffset file_offset = index * 8 + offset_of_data_directories_;
- if (file_offset >= size_of_optional_header_)
+ size_t offset = index * 8 + offset_of_data_directories_;
+ if (offset >= size_of_optional_header_)
return Bad("number of data directories inconsistent");
- const uint8_t* data_directory = optional_header_ + file_offset;
+ const uint8_t* data_directory = optional_header_ + offset;
if (data_directory < start() ||
data_directory + 8 >= end())
return Bad("data directory outside image");
diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h
index c22872b..891636c 100644
--- a/courgette/disassembler_win32_x86.h
+++ b/courgette/disassembler_win32_x86.h
@@ -8,16 +8,15 @@
#include <stddef.h>
#include <stdint.h>
-#include <map>
-#include <string>
-#include <vector>
-
#include "base/macros.h"
#include "courgette/disassembler.h"
-#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
#include "courgette/types_win_pe.h"
+#ifdef COURGETTE_HISTOGRAM_TARGETS
+#include <map>
+#endif
+
namespace courgette {
class AssemblyProgram;
@@ -26,14 +25,19 @@ class DisassemblerWin32X86 : public Disassembler {
public:
explicit DisassemblerWin32X86(const void* start, size_t length);
- // Disassembler interfaces.
- RVA FileOffsetToRVA(FileOffset file_offset) const override;
- FileOffset RVAToFileOffset(RVA rva) const override;
- ExecutableType kind() const override { return EXE_WIN_32_X86; }
- bool ParseHeader() override;
- bool Disassemble(AssemblyProgram* target) override;
+ virtual ExecutableType kind() { return EXE_WIN_32_X86; }
+ // Returns 'true' if the buffer appears to point to a Windows 32 bit
+ // executable, 'false' otherwise. If ParseHeader() succeeds, other member
+ // functions may be called.
+ virtual bool ParseHeader();
+
+ virtual bool Disassemble(AssemblyProgram* target);
+
+ //
// Exposed for test purposes
+ //
+
bool has_text_section() const { return has_text_section_; }
uint32_t size_of_code() const { return size_of_code_; }
bool is_32bit() const { return !is_PE32_plus_; }
@@ -43,9 +47,17 @@ class DisassemblerWin32X86 : public Disassembler {
// that are listed in the base relocation table.
bool ParseRelocs(std::vector<RVA> *addresses);
- // Returns Section containing the relative virtual address, or null if none.
+ // Returns Section containing the relative virtual address, or NULL if none.
const Section* RVAToSection(RVA rva) const;
+ static const int kNoOffset = -1;
+ // Returns kNoOffset if there is no file offset corresponding to 'rva'.
+ int RVAToFileOffset(RVA rva) const;
+
+ // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
+ // is returned if there is no file offset corresponding to 'rva'.
+ const uint8_t* RVAToPointer(RVA rva) const;
+
static std::string SectionName(const Section* section);
protected:
@@ -54,46 +66,59 @@ class DisassemblerWin32X86 : public Disassembler {
void ParseRel32RelocsFromSections();
void ParseRel32RelocsFromSection(const Section* section);
- CheckBool ParseNonSectionFileRegion(FileOffset start_file_offset,
- FileOffset end_file_offset,
+ CheckBool ParseNonSectionFileRegion(uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program)
WARN_UNUSED_RESULT;
CheckBool ParseFileRegion(const Section* section,
- FileOffset start_file_offset,
- FileOffset end_file_offset,
+ uint32_t start_file_offset,
+ uint32_t end_file_offset,
AssemblyProgram* program) WARN_UNUSED_RESULT;
#if COURGETTE_HISTOGRAM_TARGETS
void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
#endif
- // Most addresses are represented as 32-bit RVAs. The one address we can't
- // do this with is the image base address.
+ // Most addresses are represented as 32-bit RVAs. The one address we can't
+ // do this with is the image base address. 'image_base' is valid only for
+ // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
uint32_t image_base() const { return static_cast<uint32_t>(image_base_); }
const ImageDataDirectory& base_relocation_table() const {
return base_relocation_table_;
}
- // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
+ // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
std::string DescribeRVA(RVA rva) const;
- // Finds the first section at file_offset or above. Does not return sections
+ // Finds the first section at file_offset or above. Does not return sections
// that have no raw bytes in the file.
- const Section* FindNextSection(FileOffset file_offset) const;
+ const Section* FindNextSection(uint32_t file_offset) const;
+
+ // There are 2 'coordinate systems' for reasoning about executables.
+ // FileOffset - the the offset within a single .EXE or .DLL *file*.
+ // RVA - relative virtual address (offset within *loaded image*)
+ // FileOffsetToRVA and RVAToFileOffset convert between these representations.
+
+ RVA FileOffsetToRVA(uint32_t offset) const;
private:
+
bool ReadDataDirectory(int index, ImageDataDirectory* dir);
- bool incomplete_disassembly_; // true if can omit "uninteresting" bits.
+ bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
std::vector<RVA> abs32_locations_;
std::vector<RVA> rel32_locations_;
//
- // Information that is valid after ParseHeader() succeeds.
+ // Fields that are always valid.
//
- bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
+
+ //
+ // Information that is valid after successful ParseHeader.
+ //
+ bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
// Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
const uint8_t* optional_header_;
@@ -130,9 +155,9 @@ class DisassemblerWin32X86 : public Disassembler {
std::map<RVA, int> rel32_target_rvas_;
#endif
+
DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86);
};
} // namespace courgette
-
#endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_
diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc
index 3e43273..4e16464 100644
--- a/courgette/disassembler_win32_x86_unittest.cc
+++ b/courgette/disassembler_win32_x86_unittest.cc
@@ -6,9 +6,6 @@
#include <stdint.h>
-#include <string>
-#include <vector>
-
#include "base/memory/scoped_ptr.h"
#include "base/stl_util.h"
#include "courgette/base_test_unittest.h"
@@ -40,16 +37,16 @@ void DisassemblerWin32X86Test::TestExe() const {
disassembler->RVAToSection(0x00401234 - 0x00400000)),
std::string(".text"));
- EXPECT_EQ(0U, disassembler->RVAToFileOffset(0));
- EXPECT_EQ(1024U, disassembler->RVAToFileOffset(4096));
- EXPECT_EQ(46928U, disassembler->RVAToFileOffset(50000));
+ EXPECT_EQ(0, disassembler->RVAToFileOffset(0));
+ EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096));
+ EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000));
std::vector<courgette::RVA> relocs;
bool can_parse_relocs = disassembler->ParseRelocs(&relocs);
EXPECT_TRUE(can_parse_relocs);
EXPECT_TRUE(base::STLIsSorted(relocs));
- const uint8_t* offset_p = disassembler->FileOffsetToPointer(0);
+ const uint8_t* offset_p = disassembler->OffsetToPointer(0);
EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
reinterpret_cast<const void*>(offset_p));
EXPECT_EQ('M', offset_p[0]);
diff --git a/courgette/image_utils.h b/courgette/image_utils.h
index cfbfcfe..f958cc1 100644
--- a/courgette/image_utils.h
+++ b/courgette/image_utils.h
@@ -14,44 +14,8 @@
namespace courgette {
-// There are several ways to reason about addresses in an image:
-// - File Offset: Position relative to start of image.
-// - VA (Virtual Address): Virtual memory address of a loaded image. This is
-// subject to relocation by the OS.
-// - RVA (Relative Virtual Address): VA relative to some base address. This is
-// the preferred way to specify pointers in an image. Two ways to encode RVA
-// are:
-// - abs32: RVA value is encoded directly.
-// - rel32: RVA is encoded as offset from an instruction address. This is
-// commonly used for relative branch/call opcodes.
-// Courgette operates on File Offsets and RVAs only.
-
-using RVA = uint32_t;
+typedef uint32_t RVA;
const RVA kUnassignedRVA = 0xFFFFFFFFU;
-const RVA kNoRVA = 0xFFFFFFFFU;
-
-using FileOffset = size_t;
-const FileOffset kNoFileOffset = UINTPTR_MAX;
-
-// An interface for {File Offset, RVA, pointer to image data} translation.
-class AddressTranslator {
- public:
- // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent.
- virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;
-
- // Returns the file offset corresponding to |rva|, or kNoFileOffset if
- // nonexistent.
- virtual FileOffset RVAToFileOffset(RVA rva) const = 0;
-
- // Returns the pointer to the image data for |file_offset|. Assumes that
- // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting
- // pointer is an end bound for iteration that should never be dereferenced.
- virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;
-
- // Returns the pointer to the image data for |rva|, or null if |rva| is
- // invalid.
- virtual const uint8_t* RVAToPointer(RVA rva) const = 0;
-};
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
diff --git a/courgette/rel32_finder_win32_x86.cc b/courgette/rel32_finder_win32_x86.cc
index 0ed492f..171b781 100644
--- a/courgette/rel32_finder_win32_x86.cc
+++ b/courgette/rel32_finder_win32_x86.cc
@@ -8,9 +8,11 @@
namespace courgette {
-Rel32FinderWin32X86::Rel32FinderWin32X86(RVA relocs_start_rva,
- RVA relocs_end_rva)
- : relocs_start_rva_(relocs_start_rva), relocs_end_rva_(relocs_end_rva) {
+Rel32FinderWin32X86::Rel32FinderWin32X86(
+ RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
+ : relocs_start_rva_(relocs_start_rva),
+ relocs_end_rva_(relocs_end_rva),
+ image_end_rva_(image_end_rva) {
}
Rel32FinderWin32X86::~Rel32FinderWin32X86() {
@@ -26,9 +28,9 @@ void Rel32FinderWin32X86::SwapRel32TargetRVAs(std::map<RVA, int>* dest) {
}
#endif
-Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(RVA relocs_start_rva,
- RVA relocs_end_rva)
- : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva) {
+Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(
+ RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
+ : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva, image_end_rva) {
}
Rel32FinderWin32X86_Basic::~Rel32FinderWin32X86_Basic() {
@@ -49,10 +51,6 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
const uint8_t* p = start_pointer;
while (p < end_pointer) {
RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
-
- // Skip the base reloation table if we encounter it.
- // Note: We're not bothering to handle the edge case where a Rel32 pointer
- // collides with |relocs_start_rva_| by being {1, 2, 3}-bytes before it.
if (current_rva == relocs_start_rva_) {
if (relocs_start_rva_ < relocs_end_rva_) {
p += relocs_end_rva_ - relocs_start_rva_;
@@ -60,10 +58,13 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
}
}
+ //while (abs32_pos != abs32_locations.end() && *abs32_pos < current_rva)
+ // ++abs32_pos;
+
// Heuristic discovery of rel32 locations in instruction stream: are the
// next few bytes the start of an instruction containing a rel32
// addressing mode?
- const uint8_t* rel32 = nullptr;
+ const uint8_t* rel32 = NULL;
if (p + 5 <= end_pointer) {
if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
@@ -94,9 +95,10 @@ void Rel32FinderWin32X86_Basic::Find(const uint8_t* start_pointer,
}
RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
- // Valid, rel32 target must be within image, and within this section.
- // Subsumes |target_rva| != |kUnassignedRVA|.
- if (start_rva <= target_rva && target_rva < end_rva) {
+ // To be valid, rel32 target must be within image, and within this
+ // section.
+ if (IsValidRVA(target_rva) &&
+ start_rva <= target_rva && target_rva < end_rva) {
rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
++rel32_target_rvas_[target_rva];
diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h
index 98ebd98..01226ae 100644
--- a/courgette/rel32_finder_win32_x86.h
+++ b/courgette/rel32_finder_win32_x86.h
@@ -7,7 +7,9 @@
#include <stdint.h>
+#if COURGETTE_HISTOGRAM_TARGETS
#include <map>
+#endif
#include <vector>
#include "courgette/image_utils.h"
@@ -17,21 +19,25 @@ namespace courgette {
// A helper class to scan through a section of code to extract RVAs.
class Rel32FinderWin32X86 {
public:
- Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva);
+ Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva,
+ RVA image_end_rva);
virtual ~Rel32FinderWin32X86();
- // Swaps data in |rel32_locations_| with |dest|.
+ // Subsumes rva != kUnassignedRVA.
+ bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; }
+
+ // Swaps data in |rel32_locations_| to |dest|.
void SwapRel32Locations(std::vector<RVA>* dest);
#if COURGETTE_HISTOGRAM_TARGETS
- // Swaps data in |rel32_target_rvas_| with |dest|.
+ // Swaps data in |rel32_target_rvas_| to |dest|.
void SwapRel32TargetRVAs(std::map<RVA, int>* dest);
#endif
// Scans through [|start_pointer|, |end_pointer|) for rel32 addresses. Seeks
// RVAs that satisfy the following:
- // - Do not overlap with |abs32_locations| (assumed sorted).
- // - Do not overlap with [relocs_start_rva, relocs_end_rva).
+ // - Do not collide with |abs32_pos| (assumed sorted).
+ // - Do not collide with |base_relocation_table|'s RVA range,
// - Whose targets are in [|start_rva|, |end_rva|).
// The sorted results are written to |rel32_locations_|.
virtual void Find(const uint8_t* start_pointer,
@@ -43,6 +49,7 @@ class Rel32FinderWin32X86 {
protected:
const RVA relocs_start_rva_;
const RVA relocs_end_rva_;
+ const RVA image_end_rva_;
std::vector<RVA> rel32_locations_;
@@ -55,7 +62,8 @@ class Rel32FinderWin32X86 {
// (excluding JPO/JPE) disregarding instruction alignment.
class Rel32FinderWin32X86_Basic : public Rel32FinderWin32X86 {
public:
- Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva);
+ Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva,
+ RVA image_end_rva);
virtual ~Rel32FinderWin32X86_Basic();
// Rel32FinderWin32X86 implementation.
diff --git a/courgette/rel32_finder_win32_x86_unittest.cc b/courgette/rel32_finder_win32_x86_unittest.cc
index 496f0b9..aed5c13 100644
--- a/courgette/rel32_finder_win32_x86_unittest.cc
+++ b/courgette/rel32_finder_win32_x86_unittest.cc
@@ -33,7 +33,8 @@ class Rel32FinderWin32X86TestCase {
}
void RunTestBasic(std::string name) {
- Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_);
+ Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_,
+ image_end_rva_);
ASSERT_FALSE(text_data_.empty());
finder.Find(&text_data_[0], &text_data_[0] + text_data_.size(),
text_start_rva_, text_end_rva_, abs32_locations_);