summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authordgarrett@chromium.org <dgarrett@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-26 00:50:20 +0000
committerdgarrett@chromium.org <dgarrett@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-26 00:50:20 +0000
commit423a381f4fd3efd99dfd7bc932777ea596cf7b17 (patch)
treefdbf4a4bc5f2b8d73b90020da470c40a22f4cc2a /courgette
parentda1543a1a526aefd1114853cf737846eb5c29640 (diff)
downloadchromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.zip
chromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.tar.gz
chromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.tar.bz2
Further refactoring, move ImageInfo into Disassembler/DisassemblerWin32X86.
This means that all PE specific knowledge is now contained in a single class which leaves us in pretty good shape for supporting ELF 32. There are still widespread assumptions about being 32 bit, but those can be addressed at a much later date. BUG=None TEST=Unittests Review URL: http://codereview.chromium.org/8166013 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@107260 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette')
-rw-r--r--courgette/adjustment_method.cc1
-rw-r--r--courgette/adjustment_method_2.cc1
-rw-r--r--courgette/assembly_program.h2
-rw-r--r--courgette/courgette.gyp4
-rw-r--r--courgette/courgette.h14
-rw-r--r--courgette/disassembler.cc74
-rw-r--r--courgette/disassembler.h65
-rw-r--r--courgette/disassembler_win32_x86.cc480
-rw-r--r--courgette/disassembler_win32_x86.h110
-rw-r--r--courgette/disassembler_win32_x86_unittest.cc98
-rw-r--r--courgette/encoded_program.h2
-rw-r--r--courgette/ensemble.cc57
-rw-r--r--courgette/ensemble.h8
-rw-r--r--courgette/ensemble_apply.cc1
-rw-r--r--courgette/ensemble_create.cc1
-rw-r--r--courgette/image_info.cc419
-rw-r--r--courgette/image_info.h200
-rw-r--r--courgette/image_info_unittest.cc77
-rw-r--r--courgette/types_win_pe.h65
19 files changed, 838 insertions, 841 deletions
diff --git a/courgette/adjustment_method.cc b/courgette/adjustment_method.cc
index f967093..53745d7 100644
--- a/courgette/adjustment_method.cc
+++ b/courgette/adjustment_method.cc
@@ -18,7 +18,6 @@
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
-#include "courgette/image_info.h"
namespace courgette {
diff --git a/courgette/adjustment_method_2.cc b/courgette/adjustment_method_2.cc
index b039e63..961beff 100644
--- a/courgette/adjustment_method_2.cc
+++ b/courgette/adjustment_method_2.cc
@@ -20,7 +20,6 @@
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
-#include "courgette/image_info.h"
/*
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
index 0d865f5..5c6b1b1 100644
--- a/courgette/assembly_program.h
+++ b/courgette/assembly_program.h
@@ -12,7 +12,7 @@
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
-#include "courgette/image_info.h"
+#include "courgette/disassembler.h"
#include "courgette/memory_allocator.h"
namespace courgette {
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp
index 4ff0424..a4fb12f 100644
--- a/courgette/courgette.gyp
+++ b/courgette/courgette.gyp
@@ -30,8 +30,6 @@
'ensemble.h',
'ensemble_apply.cc',
'ensemble_create.cc',
- 'image_info.cc',
- 'image_info.h',
'memory_allocator.cc',
'memory_allocator.h',
'region.h',
@@ -91,10 +89,10 @@
'base_test_unittest.cc',
'base_test_unittest.h',
'difference_estimator_unittest.cc',
+ 'disassembler_win32_x86_unittest.cc',
'encoded_program_unittest.cc',
'encode_decode_unittest.cc',
'ensemble_unittest.cc',
- 'image_info_unittest.cc',
'run_all_unittests.cc',
'streams_unittest.cc',
'versioning_unittest.cc',
diff --git a/courgette/courgette.h b/courgette/courgette.h
index 127a150..2970a3f 100644
--- a/courgette/courgette.h
+++ b/courgette/courgette.h
@@ -87,9 +87,17 @@ Status ApplyEnsemblePatch(const FilePath::CharType* old_file_name,
Status GenerateEnsemblePatch(SourceStream* old, SourceStream* target,
SinkStream* patch);
-// Detects the type of an executable, and returns UNKNOWN if it cannot
-// be parsed.
-ExecutableType DetectExecutableType(const void* buffer, size_t length);
+// Detects the type of an executable file, and it's length. The length
+// may be slightly smaller than some executables (like ELF), but will include
+// all bytes the courgette algorithm has special benefit for.
+// On sucess:
+// Fill in type and detected_length, and return C_OK.
+// On failure:
+// Fill in type with UNKNOWN, detected_length with 0, and
+// return C_INPUT_NOT_RECOGNIZED
+Status DetectExecutableType(const void* buffer, size_t length,
+ ExecutableType* type,
+ size_t* detected_length);
// Attempts to detect the type of executable, and parse it with the
// appropriate tools, storing the pointer to the AssemblyProgram in |*output|.
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
index f4ae86d..edacd4b 100644
--- a/courgette/disassembler.cc
+++ b/courgette/disassembler.cc
@@ -15,7 +15,6 @@
#include "courgette/courgette.h"
#include "courgette/disassembler_win32_x86.h"
#include "courgette/encoded_program.h"
-#include "courgette/image_info.h"
// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
// different target addresses are referenced. Purely for debugging.
@@ -25,45 +24,56 @@ namespace courgette {
////////////////////////////////////////////////////////////////////////////////
-ExecutableType DetectExecutableType(const void* buffer, size_t length) {
+Disassembler* DetectDisassembler(const void* buffer, size_t length) {
+ Disassembler* disassembler = NULL;
- bool parsed = false;
+ disassembler = new DisassemblerWin32X86(buffer, length);
+ if (disassembler->ParseHeader())
+ return disassembler;
- PEInfo* pe_info = new PEInfo();
- pe_info->Init(buffer, length);
- parsed = pe_info->ParseHeader();
- delete pe_info;
+ delete disassembler;
+
+ return NULL;
+}
- if (parsed)
- return WIN32_X86;
+Status DetectExecutableType(const void* buffer, size_t length,
+ ExecutableType* type,
+ size_t* detected_length) {
- return UNKNOWN;
+ Disassembler* disassembler = DetectDisassembler(buffer, length);
+
+ if (disassembler) {
+ *type = disassembler->kind();
+ *detected_length = disassembler->length();
+ delete disassembler;
+ return C_OK;
+ }
+
+ // We failed to detect anything
+ *type = UNKNOWN;
+ *detected_length = 0;
+ return C_INPUT_NOT_RECOGNIZED;
}
Status ParseDetectedExecutable(const void* buffer, size_t length,
AssemblyProgram** output) {
*output = NULL;
- PEInfo* pe_info = new PEInfo();
- pe_info->Init(buffer, length);
+ Disassembler* disassembler = DetectDisassembler(buffer, length);
- if (!pe_info->ParseHeader()) {
- delete pe_info;
+ if (!disassembler) {
return C_INPUT_NOT_RECOGNIZED;
}
- Disassembler* disassembler = new DisassemblerWin32X86(pe_info);
AssemblyProgram* program = new AssemblyProgram();
if (!disassembler->Disassemble(program)) {
delete program;
delete disassembler;
- delete pe_info;
return C_DISASSEMBLY_FAILED;
}
delete disassembler;
- delete pe_info;
*output = program;
return C_OK;
}
@@ -72,4 +82,34 @@ void DeleteAssemblyProgram(AssemblyProgram* program) {
delete program;
}
+Disassembler::Disassembler(const void* start, size_t length)
+ : failure_reason_("uninitialized") {
+
+ start_ = reinterpret_cast<const uint8*>(start);
+ length_ = length;
+ end_ = start_ + length_;
+};
+
+Disassembler::~Disassembler() {};
+
+const uint8* Disassembler::OffsetToPointer(size_t offset) const {
+ assert(start_ + offset <= end_);
+ return start_ + offset;
+}
+
+bool Disassembler::Good() {
+ failure_reason_ = NULL;
+ return true;
+}
+
+bool Disassembler::Bad(const char* reason) {
+ failure_reason_ = reason;
+ return false;
+}
+
+void Disassembler::ReduceLength(size_t reduced_length) {
+ if (reduced_length < length_)
+ length_ = reduced_length;
+}
+
} // namespace courgette
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index bef1a90..2b4714d 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -7,23 +7,82 @@
#include "base/basictypes.h"
+#include "courgette/courgette.h"
+
namespace courgette {
class AssemblyProgram;
-class PEInfo;
+
+// A Relative Virtual Address is the address in the image file after it is
+// loaded into memory relative to the image load address.
+typedef uint32 RVA;
class Disassembler {
public:
- virtual ~Disassembler() {}
+ virtual ~Disassembler();
+
+ virtual ExecutableType kind() { return UNKNOWN; }
+
+ // ok() may always be called but returns 'true' only after ParseHeader
+ // succeeds.
+ bool ok() const { return failure_reason_ == NULL; }
+
+ // Returns 'true' if the buffer appears to be a valid executable of the
+ // expected type. It is not required that this be called before Disassemble.
+ virtual bool ParseHeader() = 0;
// Disassembles the item passed to the factory method into the output
// parameter 'program'.
virtual bool Disassemble(AssemblyProgram* program) = 0;
+ // Returns the length of the source executable. May reduce after ParseHeader.
+ size_t length() const { return length_; }
+ const uint8* start() const { return start_; }
+ const uint8* end() const { return end_; }
+
+ // Returns a pointer into the memory copy of the file format.
+ // FileOffsetToPointer(0) returns a pointer to the start of the file format.
+ const uint8* OffsetToPointer(size_t offset) const;
+
protected:
- Disassembler() {}
+ Disassembler(const void* start, size_t length);
+
+ bool Good();
+ bool Bad(const char *reason);
+
+ // These helper functions avoid the need for casts in the main code.
+ uint16 ReadU16(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint16*>(address + offset);
+ }
+
+ uint32 ReadU32(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint32*>(address + offset);
+ }
+
+ uint64 ReadU64(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint64*>(address + offset);
+ }
+
+ static uint32 Read32LittleEndian(const void* address) {
+ return *reinterpret_cast<const uint32*>(address);
+ }
+
+ // Reduce the length of the image in memory. Does not actually free
+ // (or realloc) any memory. Unusally only called via ParseHeader()
+ void ReduceLength(size_t reduced_length);
private:
+ const char* failure_reason_;
+
+ //
+ // Basic information that is always valid after Construction, though
+ // ParseHeader may shorten the length if the executable is shorter than
+ // the total data.
+ //
+ size_t length_; // In current memory.
+ const uint8* start_; // In current memory, base for 'file offsets'.
+ const uint8* end_; // In current memory.
+
DISALLOW_COPY_AND_ASSIGN(Disassembler);
};
diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc
index fb12c22..d09d67d 100644
--- a/courgette/disassembler_win32_x86.cc
+++ b/courgette/disassembler_win32_x86.cc
@@ -14,7 +14,6 @@
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
-#include "courgette/image_info.h"
// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
// different target addresses are referenced. Purely for debugging.
@@ -22,16 +21,189 @@
namespace courgette {
-DisassemblerWin32X86::DisassemblerWin32X86(PEInfo* pe_info)
- : pe_info_(pe_info),
- incomplete_disassembly_(false) {
+DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
+ : Disassembler(start, length),
+ incomplete_disassembly_(false),
+ is_PE32_plus_(false),
+ optional_header_(NULL),
+ size_of_optional_header_(0),
+ offset_of_data_directories_(0),
+ machine_type_(0),
+ number_of_sections_(0),
+ sections_(NULL),
+ has_text_section_(false),
+ size_of_code_(0),
+ size_of_initialized_data_(0),
+ size_of_uninitialized_data_(0),
+ base_of_code_(0),
+ base_of_data_(0),
+ image_base_(0),
+ size_of_image_(0),
+ number_of_data_directories_(0) {
+}
+
+// ParseHeader attempts to match up the buffer with the Windows data
+// structures that exist within a Windows 'Portable Executable' format file.
+// Returns 'true' if the buffer matches, and 'false' if the data looks
+// suspicious. Rather than try to 'map' the buffer to the numerous windows
+// structures, we extract the information we need into the courgette::PEInfo
+// structure.
+//
+bool DisassemblerWin32X86::ParseHeader() {
+ if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/)
+ return Bad("Too small");
+
+ // Have 'MZ' magic for a DOS header?
+ if (start()[0] != 'M' || start()[1] != 'Z')
+ return Bad("Not MZ");
+
+ // offset from DOS header to PE header is stored in DOS header.
+ uint32 offset = ReadU32(start(),
+ kOffsetOfFileAddressOfNewExeHeader);
+
+ if (offset >= length())
+ return Bad("Bad offset to PE header");
+
+ const uint8* const pe_header = OffsetToPointer(offset);
+ const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
+ if (pe_header <= start() ||
+ pe_header >= end() - kMinPEHeaderSize)
+ return Bad("Bad offset to PE header");
+
+ if (offset % 8 != 0)
+ return Bad("Misaligned PE header");
+
+ // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
+ // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx
+ //
+ // The first field of the IMAGE_NT_HEADERS is the signature.
+ if (!(pe_header[0] == 'P' &&
+ pe_header[1] == 'E' &&
+ pe_header[2] == 0 &&
+ pe_header[3] == 0))
+ return Bad("no PE signature");
+
+ // The second field of the IMAGE_NT_HEADERS is the COFF header.
+ // The COFF header is also called an IMAGE_FILE_HEADER
+ // http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx
+ const uint8* const coff_header = pe_header + 4;
+ machine_type_ = ReadU16(coff_header, 0);
+ number_of_sections_ = ReadU16(coff_header, 2);
+ size_of_optional_header_ = ReadU16(coff_header, 16);
+
+ // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64)
+ const uint8* const optional_header = coff_header + kSizeOfCoffHeader;
+ optional_header_ = optional_header;
+
+ if (optional_header + size_of_optional_header_ >= end())
+ return Bad("optional header past end of file");
+
+ // Check we can read the magic.
+ if (size_of_optional_header_ < 2)
+ return Bad("optional header no magic");
+
+ uint16 magic = ReadU16(optional_header, 0);
+
+ if (magic == kImageNtOptionalHdr32Magic) {
+ is_PE32_plus_ = false;
+ offset_of_data_directories_ =
+ kOffsetOfDataDirectoryFromImageOptionalHeader32;
+ } else if (magic == kImageNtOptionalHdr64Magic) {
+ is_PE32_plus_ = true;
+ offset_of_data_directories_ =
+ kOffsetOfDataDirectoryFromImageOptionalHeader64;
+ } else {
+ return Bad("unrecognized magic");
+ }
+
+ // Check that we can read the rest of the the fixed fields. Data directories
+ // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER.
+ if (size_of_optional_header_ < offset_of_data_directories_)
+ return Bad("optional header too short");
+
+ // The optional header is either an IMAGE_OPTIONAL_HEADER32 or
+ // IMAGE_OPTIONAL_HEADER64
+ // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx
+ //
+ // Copy the fields we care about.
+ size_of_code_ = ReadU32(optional_header, 4);
+ size_of_initialized_data_ = ReadU32(optional_header, 8);
+ size_of_uninitialized_data_ = ReadU32(optional_header, 12);
+ base_of_code_ = ReadU32(optional_header, 20);
+ if (is_PE32_plus_) {
+ base_of_data_ = 0;
+ image_base_ = ReadU64(optional_header, 24);
+ } else {
+ base_of_data_ = ReadU32(optional_header, 24);
+ image_base_ = ReadU32(optional_header, 28);
+ }
+ size_of_image_ = ReadU32(optional_header, 56);
+ number_of_data_directories_ =
+ ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92));
+
+ if (size_of_code_ >= length() ||
+ size_of_initialized_data_ >= length() ||
+ size_of_code_ + size_of_initialized_data_ >= length()) {
+ // This validation fires on some perfectly fine executables.
+ // return Bad("code or initialized data too big");
+ }
+
+ // TODO(sra): we can probably get rid of most of the data directories.
+ bool b = true;
+ // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary
+ // for correctness and it compiles smaller this way.
+ b &= ReadDataDirectory(0, &export_table_);
+ b &= ReadDataDirectory(1, &import_table_);
+ b &= ReadDataDirectory(2, &resource_table_);
+ b &= ReadDataDirectory(3, &exception_table_);
+ b &= ReadDataDirectory(5, &base_relocation_table_);
+ b &= ReadDataDirectory(11, &bound_import_table_);
+ b &= ReadDataDirectory(12, &import_address_table_);
+ b &= ReadDataDirectory(13, &delay_import_descriptor_);
+ b &= ReadDataDirectory(14, &clr_runtime_header_);
+ if (!b) {
+ return Bad("malformed data directory");
+ }
+
+ // Sections follow the optional header.
+ sections_ =
+ reinterpret_cast<const Section*>(optional_header +
+ size_of_optional_header_);
+ size_t detected_length = 0;
+
+ for (int i = 0; i < number_of_sections_; ++i) {
+ const Section* section = &sections_[i];
+
+ // TODO(sra): consider using the 'characteristics' field of the section
+ // header to see if the section contains instructions.
+ if (memcmp(section->name, ".text", 6) == 0)
+ has_text_section_ = true;
+
+ uint32 section_end =
+ section->file_offset_of_raw_data + section->size_of_raw_data;
+ if (section_end > detected_length)
+ detected_length = section_end;
+ }
+
+ // Pretend our in-memory copy is only as long as our detected length.
+ ReduceLength(detected_length);
+
+ if (!is_32bit()) {
+ return Bad("64 bit executables are not yet supported");
+ }
+
+ if (!has_text_section()) {
+ return Bad("Resource-only executables are not yet supported");
+ }
+
+ return Good();
}
bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
- if (!pe_info().ok())
+ if (!ok())
return false;
- target->set_image_base(pe_info().image_base());
+ target->set_image_base(image_base());
if (!ParseAbs32Relocs())
return false;
@@ -46,13 +218,159 @@ bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
return true;
}
-static uint32 Read32LittleEndian(const void* address) {
- return *reinterpret_cast<const uint32*>(address);
+////////////////////////////////////////////////////////////////////////////////
+
+bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
+ relocs->clear();
+
+ size_t relocs_size = base_relocation_table_.size_;
+ if (relocs_size == 0)
+ return true;
+
+ // The format of the base relocation table is a sequence of variable sized
+ // IMAGE_BASE_RELOCATION blocks. Search for
+ // "The format of the base relocation data is somewhat quirky"
+ // at http://msdn.microsoft.com/en-us/library/ms809762.aspx
+
+ const uint8* relocs_start = RVAToPointer(base_relocation_table_.address_);
+ const uint8* relocs_end = relocs_start + relocs_size;
+
+ // Make sure entire base relocation table is within the buffer.
+ if (relocs_start < start() ||
+ relocs_start >= end() ||
+ relocs_end <= start() ||
+ relocs_end > end()) {
+ return Bad(".relocs outside image");
+ }
+
+ const uint8* block = relocs_start;
+
+ // Walk the variable sized blocks.
+ while (block + 8 < relocs_end) {
+ RVA page_rva = ReadU32(block, 0);
+ uint32 size = ReadU32(block, 4);
+ if (size < 8 || // Size includes header ...
+ size % 4 != 0) // ... and is word aligned.
+ return Bad("unreasonable relocs block");
+
+ const uint8* end_entries = block + size;
+
+ if (end_entries <= block ||
+ end_entries <= start() ||
+ end_entries > end())
+ return Bad(".relocs block outside image");
+
+ // Walk through the two-byte entries.
+ for (const uint8* p = block + 8; p < end_entries; p += 2) {
+ uint16 entry = ReadU16(p, 0);
+ int type = entry >> 12;
+ int offset = entry & 0xFFF;
+
+ RVA rva = page_rva + offset;
+ if (type == 3) { // IMAGE_REL_BASED_HIGHLOW
+ relocs->push_back(rva);
+ } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE
+ // Ignore, used as padding.
+ } else {
+ // Does not occur in Windows x86 executables.
+ return Bad("unknown type of reloc");
+ }
+ }
+
+ block += size;
+ }
+
+ std::sort(relocs->begin(), relocs->end());
+
+ return true;
+}
+
+const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
+ for (int i = 0; i < number_of_sections_; i++) {
+ const Section* section = &sections_[i];
+ uint32 offset = rva - section->virtual_address;
+ if (offset < section->virtual_size) {
+ return section;
+ }
+ }
+ return NULL;
+}
+
+int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ if (section) {
+ uint32 offset = rva - section->virtual_address;
+ if (offset < section->size_of_raw_data) {
+ return section->file_offset_of_raw_data + offset;
+ } else {
+ return kNoOffset; // In section but not in file (e.g. uninit data).
+ }
+ }
+
+ // Small RVA values point into the file header in the loaded image.
+ // RVA 0 is the module load address which Windows uses as the module handle.
+ // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+ // DOS header.
+ if (rva == 0 || rva == 2)
+ return rva;
+
+ NOTREACHED();
+ return kNoOffset;
+}
+
+const uint8* DisassemblerWin32X86::RVAToPointer(RVA rva) const {
+ int file_offset = RVAToFileOffset(rva);
+ if (file_offset == kNoOffset)
+ return NULL;
+ else
+ return OffsetToPointer(file_offset);
+}
+
+std::string DisassemblerWin32X86::SectionName(const Section* section) {
+ if (section == NULL)
+ return "<none>";
+ char name[9];
+ memcpy(name, section->name, 8);
+ name[8] = '\0'; // Ensure termination.
+ return name;
+}
+
+CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
+ bool ok = true;
+ // Walk all the bytes in the file, whether or not in a section.
+ uint32 file_offset = 0;
+ while (ok && file_offset < length()) {
+ const Section* section = FindNextSection(file_offset);
+ if (section == NULL) {
+ // No more sections. There should not be extra stuff following last
+ // section.
+ // ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
+ break;
+ }
+ if (file_offset < section->file_offset_of_raw_data) {
+ uint32 section_start_offset = section->file_offset_of_raw_data;
+ ok = ParseNonSectionFileRegion(file_offset, section_start_offset,
+ program);
+ file_offset = section_start_offset;
+ }
+ if (ok) {
+ uint32 end = file_offset + section->size_of_raw_data;
+ ok = ParseFileRegion(section, file_offset, end, program);
+ file_offset = end;
+ }
+ }
+
+#if COURGETTE_HISTOGRAM_TARGETS
+ HistogramTargets("abs32 relocs", abs32_target_rvas_);
+ HistogramTargets("rel32 relocs", rel32_target_rvas_);
+#endif
+
+ return ok;
}
bool DisassemblerWin32X86::ParseAbs32Relocs() {
abs32_locations_.clear();
- if (!pe_info().ParseRelocs(&abs32_locations_))
+ if (!ParseRelocs(&abs32_locations_))
return false;
std::sort(abs32_locations_.begin(), abs32_locations_.end());
@@ -61,8 +379,8 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
- uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva));
- ++abs32_target_rvas_[target_address - pe_info().image_base()];
+ uint32 target_address = Read32LittleEndian(RVAToPointer(rva));
+ ++abs32_target_rvas_[target_address - image_base()];
}
#endif
return true;
@@ -70,8 +388,8 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
uint32 file_offset = 0;
- while (file_offset < pe_info().length()) {
- const Section* section = pe_info().FindNextSection(file_offset);
+ while (file_offset < length()) {
+ const Section* section = FindNextSection(file_offset);
if (section == NULL)
break;
if (file_offset < section->file_offset_of_raw_data)
@@ -114,12 +432,12 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
uint32 start_file_offset = section->file_offset_of_raw_data;
uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
- RVA relocs_start_rva = pe_info().base_relocation_table().address_;
+ RVA relocs_start_rva = base_relocation_table().address_;
- const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
- const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
+ const uint8* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8* end_pointer = OffsetToPointer(end_file_offset);
- RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
+ RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
// Quick way to convert from Pointer to RVA within a single Section is to
@@ -133,7 +451,7 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
while (p < end_pointer) {
RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
if (current_rva == relocs_start_rva) {
- uint32 relocs_size = pe_info().base_relocation_table().size_;
+ uint32 relocs_size = base_relocation_table().size_;
if (relocs_size) {
p += relocs_size;
continue;
@@ -179,7 +497,7 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
// To be valid, rel32 target must be within image, and within this
// section.
- if (pe_info().IsValidRVA(target_rva) &&
+ if (IsValidRVA(target_rva) &&
start_rva <= target_rva && target_rva < end_rva) {
rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
@@ -193,39 +511,6 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
}
}
-CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
- bool ok = true;
- // Walk all the bytes in the file, whether or not in a section.
- uint32 file_offset = 0;
- while (ok && file_offset < pe_info().length()) {
- const Section* section = pe_info().FindNextSection(file_offset);
- if (section == NULL) {
- // No more sections. There should not be extra stuff following last
- // section.
- // ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
- break;
- }
- if (file_offset < section->file_offset_of_raw_data) {
- uint32 section_start_offset = section->file_offset_of_raw_data;
- ok = ParseNonSectionFileRegion(file_offset, section_start_offset,
- program);
- file_offset = section_start_offset;
- }
- if (ok) {
- uint32 end = file_offset + section->size_of_raw_data;
- ok = ParseFileRegion(section, file_offset, end, program);
- file_offset = end;
- }
- }
-
-#if COURGETTE_HISTOGRAM_TARGETS
- HistogramTargets("abs32 relocs", abs32_target_rvas_);
- HistogramTargets("rel32 relocs", rel32_target_rvas_);
-#endif
-
- return ok;
-}
-
CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
uint32 start_file_offset,
uint32 end_file_offset,
@@ -233,8 +518,8 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
if (incomplete_disassembly_)
return true;
- const uint8* start = pe_info().FileOffsetToPointer(start_file_offset);
- const uint8* end = pe_info().FileOffsetToPointer(end_file_offset);
+ const uint8* start = OffsetToPointer(start_file_offset);
+ const uint8* end = OffsetToPointer(end_file_offset);
const uint8* p = start;
@@ -251,12 +536,12 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(
const Section* section,
uint32 start_file_offset, uint32 end_file_offset,
AssemblyProgram* program) {
- RVA relocs_start_rva = pe_info().base_relocation_table().address_;
+ RVA relocs_start_rva = base_relocation_table().address_;
- const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
- const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
+ const uint8* start_pointer = OffsetToPointer(start_file_offset);
+ const uint8* end_pointer = OffsetToPointer(end_file_offset);
- RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
+ RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
// Quick way to convert from Pointer to RVA within a single Section is to
@@ -280,7 +565,7 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(
ok = program->EmitMakeRelocsInstruction();
if (!ok)
break;
- uint32 relocs_size = pe_info().base_relocation_table().size_;
+ uint32 relocs_size = base_relocation_table().size_;
if (relocs_size) {
p += relocs_size;
continue;
@@ -292,7 +577,7 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(
if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
uint32 target_address = Read32LittleEndian(p);
- RVA target_rva = target_address - pe_info().image_base();
+ RVA target_rva = target_address - image_base();
// TODO(sra): target could be Label+offset. It is not clear how to guess
// which it might be. We assume offset==0.
ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva));
@@ -363,7 +648,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
std::cout << std::dec << p->first << ": " << count;
if (count <= 2) {
for (size_t i = 0; i < count; ++i)
- std::cout << " " << pe_info().DescribeRVA(p->second[i]);
+ std::cout << " " << DescribeRVA(p->second[i]);
}
std::cout << std::endl;
someSkipped = false;
@@ -374,4 +659,77 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind,
}
#endif // COURGETTE_HISTOGRAM_TARGETS
+
+// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
+// that during development I'm finding I need to call it when compiled in
+// Release mode. Hence:
+// TODO(sra): make this compile only for debug mode.
+std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const {
+ const Section* section = RVAToSection(rva);
+ std::ostringstream s;
+ s << std::hex << rva;
+ if (section) {
+ s << " (";
+ s << SectionName(section) << "+"
+ << std::hex << (rva - section->virtual_address)
+ << ")";
+ }
+ return s.str();
+}
+
+const Section* DisassemblerWin32X86::FindNextSection(uint32 fileOffset) const {
+ const Section* best = 0;
+ for (int i = 0; i < number_of_sections_; i++) {
+ const Section* section = &sections_[i];
+ if (section->size_of_raw_data > 0) { // i.e. has data in file.
+ if (fileOffset <= section->file_offset_of_raw_data) {
+ if (best == 0 ||
+ section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
+ best = section;
+ }
+ }
+ }
+ }
+ return best;
+}
+
+RVA DisassemblerWin32X86::FileOffsetToRVA(uint32 file_offset) const {
+ for (int i = 0; i < number_of_sections_; i++) {
+ const Section* section = &sections_[i];
+ uint32 offset = file_offset - section->file_offset_of_raw_data;
+ if (offset < section->size_of_raw_data) {
+ return section->virtual_address + offset;
+ }
+ }
+ return 0;
+}
+
+bool DisassemblerWin32X86::ReadDataDirectory(
+ int index,
+ ImageDataDirectory* directory) {
+
+ if (index < number_of_data_directories_) {
+ size_t offset = index * 8 + offset_of_data_directories_;
+ if (offset >= size_of_optional_header_)
+ return Bad("number of data directories inconsistent");
+ const uint8* data_directory = optional_header_ + offset;
+ if (data_directory < start() ||
+ data_directory + 8 >= end())
+ return Bad("data directory outside image");
+ RVA rva = ReadU32(data_directory, 0);
+ size_t size = ReadU32(data_directory, 4);
+ if (size > size_of_image_)
+ return Bad("data directory size too big");
+
+ // TODO(sra): validate RVA.
+ directory->address_ = rva;
+ directory->size_ = static_cast<uint32>(size);
+ return true;
+ } else {
+ directory->address_ = 0;
+ directory->size_ = 0;
+ return true;
+ }
+}
+
} // namespace courgette
diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h
index fe00b6d..733222f 100644
--- a/courgette/disassembler_win32_x86.h
+++ b/courgette/disassembler_win32_x86.h
@@ -7,8 +7,8 @@
#include "base/basictypes.h"
#include "courgette/disassembler.h"
-#include "courgette/image_info.h"
#include "courgette/memory_allocator.h"
+#include "courgette/types_win_pe.h"
namespace courgette {
@@ -16,13 +16,44 @@ class AssemblyProgram;
class DisassemblerWin32X86 : public Disassembler {
public:
- explicit DisassemblerWin32X86(PEInfo* pe_info);
+ explicit DisassemblerWin32X86(const void* start, size_t length);
+
+ virtual ExecutableType kind() { return WIN32_X86; }
+
+ // Returns 'true' if the buffer appears to point to a Windows 32 bit
+ // executable, 'false' otherwise. If ParseHeader() succeeds, other member
+ // functions may be called.
+ virtual bool ParseHeader();
virtual bool Disassemble(AssemblyProgram* target);
- protected:
- PEInfo& pe_info() { return *pe_info_; }
+ //
+ // Exposed for test purposes
+ //
+
+ bool has_text_section() const { return has_text_section_; }
+ uint32 size_of_code() const { return size_of_code_; }
+ bool is_32bit() const { return !is_PE32_plus_; }
+
+ // Returns 'true' if the base relocation table can be parsed.
+ // Output is a vector of the RVAs corresponding to locations within executable
+ // that are listed in the base relocation table.
+ bool ParseRelocs(std::vector<RVA> *addresses);
+
+ // Returns Section containing the relative virtual address, or NULL if none.
+ const Section* RVAToSection(RVA rva) const;
+
+ static const int kNoOffset = -1;
+ // Returns kNoOffset if there is no file offset corresponding to 'rva'.
+ int RVAToFileOffset(RVA rva) const;
+ // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
+ // is returned if there is no file offset corresponding to 'rva'.
+ const uint8* RVAToPointer(RVA rva) const;
+
+ static std::string SectionName(const Section* section);
+
+ protected:
CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT;
bool ParseAbs32Relocs();
void ParseRel32RelocsFromSections();
@@ -38,17 +69,86 @@ class DisassemblerWin32X86 : public Disassembler {
void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
#endif
- PEInfo* pe_info_;
+ // Most addresses are represented as 32-bit RVAs. The one address we can't
+ // do this with is the image base address. 'image_base' is valid only for
+ // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
+ uint32 image_base() const { return static_cast<uint32>(image_base_); }
+
+ const ImageDataDirectory& base_relocation_table() const {
+ return base_relocation_table_;
+ }
+
+ bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
+
+ // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
+ std::string DescribeRVA(RVA rva) const;
+
+ // Finds the first section at file_offset or above. Does not return sections
+ // that have no raw bytes in the file.
+ const Section* FindNextSection(uint32 file_offset) const;
+
+ // There are 2 'coordinate systems' for reasoning about executables.
+ // FileOffset - the the offset within a single .EXE or .DLL *file*.
+ // RVA - relative virtual address (offset within *loaded image*)
+ // FileOffsetToRVA and RVAToFileOffset convert between these representations.
+
+ RVA FileOffsetToRVA(uint32 offset) const;
+
+
+ private:
+
+ bool ReadDataDirectory(int index, ImageDataDirectory* dir);
+
bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits
std::vector<RVA> abs32_locations_;
std::vector<RVA> rel32_locations_;
+ //
+ // Fields that are always valid.
+ //
+
+ //
+ // Information that is valid after successful ParseHeader.
+ //
+ bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
+
+ // Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
+ const uint8 *optional_header_;
+ uint16 size_of_optional_header_;
+ uint16 offset_of_data_directories_;
+
+ uint16 machine_type_;
+ uint16 number_of_sections_;
+ const Section *sections_;
+ bool has_text_section_;
+
+ uint32 size_of_code_;
+ uint32 size_of_initialized_data_;
+ uint32 size_of_uninitialized_data_;
+ RVA base_of_code_;
+ RVA base_of_data_;
+
+ uint64 image_base_; // range limited to 32 bits for 32 bit executable
+ uint32 size_of_image_;
+ int number_of_data_directories_;
+
+ ImageDataDirectory export_table_;
+ ImageDataDirectory import_table_;
+ ImageDataDirectory resource_table_;
+ ImageDataDirectory exception_table_;
+ ImageDataDirectory base_relocation_table_;
+ ImageDataDirectory bound_import_table_;
+ ImageDataDirectory import_address_table_;
+ ImageDataDirectory delay_import_descriptor_;
+ ImageDataDirectory clr_runtime_header_;
+
#if COURGETTE_HISTOGRAM_TARGETS
std::map<RVA, int> abs32_target_rvas_;
std::map<RVA, int> rel32_target_rvas_;
#endif
+
DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86);
};
diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc
new file mode 100644
index 0000000..c310675
--- /dev/null
+++ b/courgette/disassembler_win32_x86_unittest.cc
@@ -0,0 +1,98 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/disassembler_win32_x86.h"
+
+#include "courgette/base_test_unittest.h"
+
+class DisassemblerWin32X86Test : public BaseTest {
+ public:
+
+ void TestExe() const;
+ void TestExe64() const;
+ void TestResourceDll() const;
+};
+
+void DisassemblerWin32X86Test::TestExe() const {
+ std::string file1 = FileContents("setup1.exe");
+
+ scoped_ptr<courgette::DisassemblerWin32X86> disassembler(
+ new courgette::DisassemblerWin32X86(file1.c_str(), file1.length()));
+
+ bool can_parse_header = disassembler->ParseHeader();
+ EXPECT_TRUE(can_parse_header);
+
+ // The executable is the whole file, not 'embedded' with the file
+ EXPECT_EQ(file1.length(), disassembler->length());
+
+ EXPECT_TRUE(disassembler->ok());
+ EXPECT_TRUE(disassembler->has_text_section());
+ EXPECT_EQ(449536U, disassembler->size_of_code());
+ EXPECT_TRUE(disassembler->is_32bit());
+ EXPECT_EQ(courgette::DisassemblerWin32X86::SectionName(
+ disassembler->RVAToSection(0x00401234 - 0x00400000)),
+ std::string(".text"));
+
+ EXPECT_EQ(0, disassembler->RVAToFileOffset(0));
+ EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096));
+ EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000));
+
+ std::vector<courgette::RVA> relocs;
+ bool can_parse_relocs = disassembler->ParseRelocs(&relocs);
+ EXPECT_TRUE(can_parse_relocs);
+
+ const uint8* offset_p = disassembler->OffsetToPointer(0);
+ EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
+ reinterpret_cast<const void*>(offset_p));
+ EXPECT_EQ('M', offset_p[0]);
+ EXPECT_EQ('Z', offset_p[1]);
+
+ const uint8* rva_p = disassembler->RVAToPointer(0);
+ EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
+ reinterpret_cast<const void*>(rva_p));
+ EXPECT_EQ('M', rva_p[0]);
+ EXPECT_EQ('Z', rva_p[1]);
+}
+
+void DisassemblerWin32X86Test::TestExe64() const {
+ std::string file1 = FileContents("pe-64.exe");
+
+ scoped_ptr<courgette::DisassemblerWin32X86> disassembler(
+ new courgette::DisassemblerWin32X86(file1.c_str(), file1.length()));
+
+ bool can_parse_header = disassembler->ParseHeader();
+ EXPECT_FALSE(can_parse_header);
+
+ // The executable is the whole file, not 'embedded' with the file
+ EXPECT_EQ(file1.length(), disassembler->length());
+
+ EXPECT_FALSE(disassembler->ok());
+ EXPECT_TRUE(disassembler->has_text_section());
+ EXPECT_EQ(43008U, disassembler->size_of_code());
+ EXPECT_FALSE(disassembler->is_32bit());
+}
+
+void DisassemblerWin32X86Test::TestResourceDll() const {
+ std::string file1 = FileContents("en-US.dll");
+
+ scoped_ptr<courgette::DisassemblerWin32X86> disassembler(
+ new courgette::DisassemblerWin32X86(file1.c_str(), file1.length()));
+
+ bool can_parse_header = disassembler->ParseHeader();
+ EXPECT_FALSE(can_parse_header);
+
+ // The executable is the whole file, not 'embedded' with the file
+ EXPECT_EQ(file1.length(), disassembler->length());
+
+ EXPECT_FALSE(disassembler->ok());
+ EXPECT_FALSE(disassembler->has_text_section());
+ EXPECT_EQ(0U, disassembler->size_of_code());
+ EXPECT_TRUE(disassembler->is_32bit());
+}
+
+TEST_F(DisassemblerWin32X86Test, All) {
+ TestExe();
+ TestExe64();
+ TestResourceDll();
+}
diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h
index 5acfeb6..b120353 100644
--- a/courgette/encoded_program.h
+++ b/courgette/encoded_program.h
@@ -8,7 +8,7 @@
#include <vector>
#include "base/basictypes.h"
-#include "courgette/image_info.h"
+#include "courgette/disassembler.h"
#include "courgette/memory_allocator.h"
namespace courgette {
diff --git a/courgette/ensemble.cc b/courgette/ensemble.cc
index a2bea8f..fb9b25b 100644
--- a/courgette/ensemble.cc
+++ b/courgette/ensemble.cc
@@ -7,7 +7,6 @@
#include "base/basictypes.h"
#include "base/string_number_conversions.h"
-#include "courgette/image_info.h"
#include "courgette/region.h"
#include "courgette/streams.h"
#include "courgette/simple_delta.h"
@@ -16,14 +15,11 @@ namespace courgette {
Element::Element(ExecutableType kind,
Ensemble* ensemble,
- const Region& region,
- PEInfo* info)
- : kind_(kind), ensemble_(ensemble), region_(region), info_(info) {
+ const Region& region)
+ : kind_(kind), ensemble_(ensemble), region_(region) {
}
-Element::~Element() {
- delete info_;
-}
+Element::~Element() {}
std::string Element::Name() const {
return ensemble_->name() + "("
@@ -41,41 +37,22 @@ Status Ensemble::FindEmbeddedElements() {
size_t position = 0;
while (position < length) {
- ExecutableType type = DetectExecutableType(start + position,
- length - position);
+ ExecutableType type;
+ size_t detected_length;
+
+ Status result = DetectExecutableType(start + position,
+ length - position,
+ &type, &detected_length);
- //
- // TODO(dgarrett) This switch can go away totally after two things.
- //
- // Make ImageInfo generic for all executable types.
- // Find a generic way to handle length detection for executables.
- //
- // When this switch is gone, that's one less piece of code that is
- // executable type aware.
- //
- switch (type) {
- case UNKNOWN: {
- // No Element found at current position.
- ++position;
- break;
- }
- case WIN32_X86: {
- // The Info is only created to detect the length of the executable
- courgette::PEInfo* info(new courgette::PEInfo());
- info->Init(start + position, length - position);
- if (!info->ParseHeader()) {
- delete info;
- position++;
- break;
- }
- Region region(start + position, info->length());
+ if (result == C_OK) {
+ Region region(start + position, detected_length);
- Element* element = new Element(type, this, region, info);
- owned_elements_.push_back(element);
- elements_.push_back(element);
- position += region.length();
- break;
- }
+ Element* element = new Element(type, this, region);
+ owned_elements_.push_back(element);
+ elements_.push_back(element);
+ position += region.length();
+ } else {
+ position++;
}
}
return C_OK;
diff --git a/courgette/ensemble.h b/courgette/ensemble.h
index e766782..4d26076 100644
--- a/courgette/ensemble.h
+++ b/courgette/ensemble.h
@@ -30,7 +30,6 @@ namespace courgette {
// Forward declarations:
class Ensemble;
-class PEInfo;
// An Element is a region of an Ensemble with an identifyable kind.
//
@@ -38,8 +37,7 @@ class Element {
public:
Element(ExecutableType kind,
Ensemble* ensemble,
- const Region& region,
- PEInfo*info);
+ const Region& region);
virtual ~Element();
@@ -53,14 +51,10 @@ class Element {
// containing Ensemble.
size_t offset_in_ensemble() const;
- // The ImageInfo for this executable
- virtual PEInfo* GetImageInfo() const { return info_; }
-
private:
ExecutableType kind_;
Ensemble* ensemble_;
Region region_;
- PEInfo *info_;
DISALLOW_COPY_AND_ASSIGN(Element);
};
diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc
index 499ccac..475b0a4 100644
--- a/courgette/ensemble_apply.cc
+++ b/courgette/ensemble_apply.cc
@@ -11,7 +11,6 @@
#include "base/logging.h"
#include "courgette/crc.h"
-#include "courgette/image_info.h"
#include "courgette/region.h"
#include "courgette/streams.h"
#include "courgette/simple_delta.h"
diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc
index 62105b9..07ede7e 100644
--- a/courgette/ensemble_create.cc
+++ b/courgette/ensemble_create.cc
@@ -24,7 +24,6 @@
#include "courgette/third_party/bsdiff.h"
#include "courgette/crc.h"
#include "courgette/difference_estimator.h"
-#include "courgette/image_info.h"
#include "courgette/streams.h"
#include "courgette/region.h"
#include "courgette/simple_delta.h"
diff --git a/courgette/image_info.cc b/courgette/image_info.cc
deleted file mode 100644
index ce0e0ae..0000000
--- a/courgette/image_info.cc
+++ /dev/null
@@ -1,419 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "courgette/image_info.h"
-
-#include <memory.h>
-#include <algorithm>
-#include <map>
-#include <set>
-#include <sstream>
-#include <vector>
-
-#include "base/logging.h"
-
-namespace courgette {
-
-std::string SectionName(const Section* section) {
- if (section == NULL)
- return "<none>";
- char name[9];
- memcpy(name, section->name, 8);
- name[8] = '\0'; // Ensure termination.
- return name;
-}
-
-PEInfo::PEInfo()
- : failure_reason_("uninitialized"),
- start_(0),
- end_(0),
- length_(0),
- is_PE32_plus_(false),
- file_length_(0),
- optional_header_(NULL),
- size_of_optional_header_(0),
- offset_of_data_directories_(0),
- machine_type_(0),
- number_of_sections_(0),
- sections_(NULL),
- has_text_section_(false),
- size_of_code_(0),
- size_of_initialized_data_(0),
- size_of_uninitialized_data_(0),
- base_of_code_(0),
- base_of_data_(0),
- image_base_(0),
- size_of_image_(0),
- number_of_data_directories_(0) {
-}
-
-void PEInfo::Init(const void* start, size_t length) {
- start_ = reinterpret_cast<const uint8*>(start);
- length_ = static_cast<int>(length);
- end_ = start_ + length_;
- failure_reason_ = "unparsed";
-}
-
-// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except
-// that during development I'm finding I need to call it when compiled in
-// Release mode. Hence:
-// TODO(sra): make this compile only for debug mode.
-std::string PEInfo::DescribeRVA(RVA rva) const {
- const Section* section = RVAToSection(rva);
- std::ostringstream s;
- s << std::hex << rva;
- if (section) {
- s << " (";
- s << SectionName(section) << "+"
- << std::hex << (rva - section->virtual_address)
- << ")";
- }
- return s.str();
-}
-
-const Section* PEInfo::FindNextSection(uint32 fileOffset) const {
- const Section* best = 0;
- for (int i = 0; i < number_of_sections_; i++) {
- const Section* section = &sections_[i];
- if (section->size_of_raw_data > 0) { // i.e. has data in file.
- if (fileOffset <= section->file_offset_of_raw_data) {
- if (best == 0 ||
- section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
- best = section;
- }
- }
- }
- }
- return best;
-}
-
-const Section* PEInfo::RVAToSection(RVA rva) const {
- for (int i = 0; i < number_of_sections_; i++) {
- const Section* section = &sections_[i];
- uint32 offset = rva - section->virtual_address;
- if (offset < section->virtual_size) {
- return section;
- }
- }
- return NULL;
-}
-
-int PEInfo::RVAToFileOffset(RVA rva) const {
- const Section* section = RVAToSection(rva);
- if (section) {
- uint32 offset = rva - section->virtual_address;
- if (offset < section->size_of_raw_data) {
- return section->file_offset_of_raw_data + offset;
- } else {
- return kNoOffset; // In section but not in file (e.g. uninit data).
- }
- }
-
- // Small RVA values point into the file header in the loaded image.
- // RVA 0 is the module load address which Windows uses as the module handle.
- // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
- // DOS header.
- if (rva == 0 || rva == 2)
- return rva;
-
- NOTREACHED();
- return kNoOffset;
-}
-
-const uint8* PEInfo::RVAToPointer(RVA rva) const {
- int file_offset = RVAToFileOffset(rva);
- if (file_offset == kNoOffset)
- return NULL;
- else
- return start_ + file_offset;
-}
-
-RVA PEInfo::FileOffsetToRVA(uint32 file_offset) const {
- for (int i = 0; i < number_of_sections_; i++) {
- const Section* section = &sections_[i];
- uint32 offset = file_offset - section->file_offset_of_raw_data;
- if (offset < section->size_of_raw_data) {
- return section->virtual_address + offset;
- }
- }
- return 0;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-// Constants and offsets gleaned from WINNT.H and various articles on the
-// format of Windows PE executables.
-
-// This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew):
-const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c;
-
-const uint16 kImageNtOptionalHdr32Magic = 0x10b;
-const uint16 kImageNtOptionalHdr64Magic = 0x20b;
-
-const size_t kSizeOfCoffHeader = 20;
-const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96;
-const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112;
-
-// These helper functions avoid the need for casts in the main code.
-inline uint16 ReadU16(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint16*>(address + offset);
-}
-
-inline uint32 ReadU32(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint32*>(address + offset);
-}
-
-inline uint64 ReadU64(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint64*>(address + offset);
-}
-
-} // namespace
-
-// ParseHeader attempts to match up the buffer with the Windows data
-// structures that exist within a Windows 'Portable Executable' format file.
-// Returns 'true' if the buffer matches, and 'false' if the data looks
-// suspicious. Rather than try to 'map' the buffer to the numerous windows
-// structures, we extract the information we need into the courgette::PEInfo
-// structure.
-//
-bool PEInfo::ParseHeader() {
- if (length_ < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/)
- return Bad("Too small");
-
- // Have 'MZ' magic for a DOS header?
- if (start_[0] != 'M' || start_[1] != 'Z')
- return Bad("Not MZ");
-
- // offset from DOS header to PE header is stored in DOS header.
- uint32 offset = ReadU32(start_, kOffsetOfFileAddressOfNewExeHeader);
-
- const uint8* const pe_header = start_ + offset;
- const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
- if (pe_header <= start_ || pe_header >= end_ - kMinPEHeaderSize)
- return Bad("Bad offset to PE header");
-
- if (offset % 8 != 0)
- return Bad("Misaligned PE header");
-
- // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
- // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx
- //
- // The first field of the IMAGE_NT_HEADERS is the signature.
- if (!(pe_header[0] == 'P' &&
- pe_header[1] == 'E' &&
- pe_header[2] == 0 &&
- pe_header[3] == 0))
- return Bad("no PE signature");
-
- // The second field of the IMAGE_NT_HEADERS is the COFF header.
- // The COFF header is also called an IMAGE_FILE_HEADER
- // http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx
- const uint8* const coff_header = pe_header + 4;
- machine_type_ = ReadU16(coff_header, 0);
- number_of_sections_ = ReadU16(coff_header, 2);
- size_of_optional_header_ = ReadU16(coff_header, 16);
-
- // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64)
- const uint8* const optional_header = coff_header + kSizeOfCoffHeader;
- optional_header_ = optional_header;
-
- if (optional_header + size_of_optional_header_ >= end_)
- return Bad("optional header past end of file");
-
- // Check we can read the magic.
- if (size_of_optional_header_ < 2)
- return Bad("optional header no magic");
-
- uint16 magic = ReadU16(optional_header, 0);
-
- if (magic == kImageNtOptionalHdr32Magic) {
- is_PE32_plus_ = false;
- offset_of_data_directories_ =
- kOffsetOfDataDirectoryFromImageOptionalHeader32;
- } else if (magic == kImageNtOptionalHdr64Magic) {
- is_PE32_plus_ = true;
- offset_of_data_directories_ =
- kOffsetOfDataDirectoryFromImageOptionalHeader64;
- } else {
- return Bad("unrecognized magic");
- }
-
- // Check that we can read the rest of the the fixed fields. Data directories
- // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER.
- if (size_of_optional_header_ < offset_of_data_directories_)
- return Bad("optional header too short");
-
- // The optional header is either an IMAGE_OPTIONAL_HEADER32 or
- // IMAGE_OPTIONAL_HEADER64
- // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx
- //
- // Copy the fields we care about.
- size_of_code_ = ReadU32(optional_header, 4);
- size_of_initialized_data_ = ReadU32(optional_header, 8);
- size_of_uninitialized_data_ = ReadU32(optional_header, 12);
- base_of_code_ = ReadU32(optional_header, 20);
- if (is_PE32_plus_) {
- base_of_data_ = 0;
- image_base_ = ReadU64(optional_header, 24);
- } else {
- base_of_data_ = ReadU32(optional_header, 24);
- image_base_ = ReadU32(optional_header, 28);
- }
- size_of_image_ = ReadU32(optional_header, 56);
- number_of_data_directories_ =
- ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92));
-
- if (size_of_code_ >= length_ ||
- size_of_initialized_data_ >= length_ ||
- size_of_code_ + size_of_initialized_data_ >= length_) {
- // This validation fires on some perfectly fine executables.
- // return Bad("code or initialized data too big");
- }
-
- // TODO(sra): we can probably get rid of most of the data directories.
- bool b = true;
- // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary
- // for correctness and it compiles smaller this way.
- b &= ReadDataDirectory(0, &export_table_);
- b &= ReadDataDirectory(1, &import_table_);
- b &= ReadDataDirectory(2, &resource_table_);
- b &= ReadDataDirectory(3, &exception_table_);
- b &= ReadDataDirectory(5, &base_relocation_table_);
- b &= ReadDataDirectory(11, &bound_import_table_);
- b &= ReadDataDirectory(12, &import_address_table_);
- b &= ReadDataDirectory(13, &delay_import_descriptor_);
- b &= ReadDataDirectory(14, &clr_runtime_header_);
- if (!b) {
- return Bad("malformed data directory");
- }
-
- // Sections follow the optional header.
- sections_ =
- reinterpret_cast<const Section*>(optional_header +
- size_of_optional_header_);
- file_length_ = 0;
-
- for (int i = 0; i < number_of_sections_; ++i) {
- const Section* section = &sections_[i];
-
- // TODO(sra): consider using the 'characteristics' field of the section
- // header to see if the section contains instructions.
- if (memcmp(section->name, ".text", 6) == 0)
- has_text_section_ = true;
-
- uint32 section_end =
- section->file_offset_of_raw_data + section->size_of_raw_data;
- if (section_end > file_length_)
- file_length_ = section_end;
- }
-
- if (!is_32bit()) {
- return Bad("64 bit executables are not yet supported");
- }
-
- if (!has_text_section()) {
- return Bad("Resource-only executables are not yet supported");
- }
-
- failure_reason_ = NULL;
- return true;
-}
-
-bool PEInfo::ReadDataDirectory(int index, ImageDataDirectory* directory) {
- if (index < number_of_data_directories_) {
- size_t offset = index * 8 + offset_of_data_directories_;
- if (offset >= size_of_optional_header_)
- return Bad("number of data directories inconsistent");
- const uint8* data_directory = optional_header_ + offset;
- if (data_directory < start_ || data_directory + 8 >= end_)
- return Bad("data directory outside image");
- RVA rva = ReadU32(data_directory, 0);
- size_t size = ReadU32(data_directory, 4);
- if (size > size_of_image_)
- return Bad("data directory size too big");
-
- // TODO(sra): validate RVA.
- directory->address_ = rva;
- directory->size_ = static_cast<uint32>(size);
- return true;
- } else {
- directory->address_ = 0;
- directory->size_ = 0;
- return true;
- }
-}
-
-bool PEInfo::Bad(const char* reason) {
- failure_reason_ = reason;
- return false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-bool PEInfo::ParseRelocs(std::vector<RVA> *relocs) {
- relocs->clear();
-
- size_t relocs_size = base_relocation_table_.size_;
- if (relocs_size == 0)
- return true;
-
- // The format of the base relocation table is a sequence of variable sized
- // IMAGE_BASE_RELOCATION blocks. Search for
- // "The format of the base relocation data is somewhat quirky"
- // at http://msdn.microsoft.com/en-us/library/ms809762.aspx
-
- const uint8* start = RVAToPointer(base_relocation_table_.address_);
- const uint8* end = start + relocs_size;
-
- // Make sure entire base relocation table is within the buffer.
- if (start < start_ ||
- start >= end_ ||
- end <= start_ ||
- end > end_) {
- return Bad(".relocs outside image");
- }
-
- const uint8* block = start;
-
- // Walk the variable sized blocks.
- while (block + 8 < end) {
- RVA page_rva = ReadU32(block, 0);
- uint32 size = ReadU32(block, 4);
- if (size < 8 || // Size includes header ...
- size % 4 != 0) // ... and is word aligned.
- return Bad("unreasonable relocs block");
-
- const uint8* end_entries = block + size;
-
- if (end_entries <= block || end_entries <= start_ || end_entries > end_)
- return Bad(".relocs block outside image");
-
- // Walk through the two-byte entries.
- for (const uint8* p = block + 8; p < end_entries; p += 2) {
- uint16 entry = ReadU16(p, 0);
- int type = entry >> 12;
- int offset = entry & 0xFFF;
-
- RVA rva = page_rva + offset;
- if (type == 3) { // IMAGE_REL_BASED_HIGHLOW
- relocs->push_back(rva);
- } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE
- // Ignore, used as padding.
- } else {
- // Does not occur in Windows x86 executables.
- return Bad("unknown type of reloc");
- }
- }
-
- block += size;
- }
-
- std::sort(relocs->begin(), relocs->end());
-
- return true;
-}
-
-} // namespace courgette
diff --git a/courgette/image_info.h b/courgette/image_info.h
deleted file mode 100644
index 17936e1..0000000
--- a/courgette/image_info.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef COURGETTE_IMAGE_INFO_H_
-#define COURGETTE_IMAGE_INFO_H_
-
-#include <string>
-#include <vector>
-
-#include "base/basictypes.h"
-
-namespace courgette {
-
-// A Relative Virtual Address is the address in the image file after it is
-// loaded into memory relative to the image load address.
-typedef uint32 RVA;
-
-// PE file section header. This struct has the same layout as the
-// IMAGE_SECTION_HEADER structure from WINNT.H
-// http://msdn.microsoft.com/en-us/library/ms680341(VS.85).aspx
-//
-#pragma pack(push, 1) // Supported by MSVC and GCC. Ensures no gaps in packing.
-struct Section {
- char name[8];
- uint32 virtual_size;
- uint32 virtual_address;
- uint32 size_of_raw_data;
- uint32 file_offset_of_raw_data;
- uint32 pointer_to_relocations; // Always zero in an image.
- uint32 pointer_to_line_numbers; // Always zero in an image.
- uint16 number_of_relocations; // Always zero in an image.
- uint16 number_of_line_numbers; // Always zero in an image.
- uint32 characteristics;
-};
-#pragma pack(pop)
-
-COMPILE_ASSERT(sizeof(Section) == 40, section_is_40_bytes);
-
-// Returns the name of a section, solving the problem that the name is not
-// always properly NUL-terminated. Used only for debugging.
-std::string SectionName(const Section* section);
-
-// ImageDataDirectory has same layout as IMAGE_DATA_DIRECTORY structure from
-// WINNT.H
-// http://msdn.microsoft.com/en-us/library/ms680305(VS.85).aspx
-//
-class ImageDataDirectory {
- public:
- ImageDataDirectory() : address_(0), size_(0) {}
- RVA address_;
- uint32 size_;
-};
-
-COMPILE_ASSERT(sizeof(ImageDataDirectory) == 8,
- image_data_directory_is_8_bytes);
-
-//
-// PEInfo holds information about a single Windows 'Portable Executable' format
-// file in the on-disk format.
-//
-// Imagine you had concatenated a bunch of 'original' files into one 'big'
-// file and read the big file into memory. You could find the executables
-// from the original files by calling PEInfo::Init with different addresses.
-// If PEInfo::TryParseHeader returns true, then Init was passed the address
-// of the first byte of one of the original executables, and PEIinfo::length
-// will tell how long the file was.
-//
-class PEInfo {
- public:
- PEInfo();
-
- // ok() may always be called but returns 'true' only after ParseHeader
- // succeeds.
- bool ok() const { return failure_reason_ == NULL; }
-
- // Initialize with buffer. This just sets up the region of memory that
- // potentially contains the bytes from an executable file. The caller
- // continues to own 'start'.
- void Init(const void* start, size_t length);
-
- // Returns 'true' if the buffer appears to point to a Windows 32 bit
- // executable, 'false' otherwise. If ParseHeader() succeeds, other member
- // functions may be called.
- bool ParseHeader();
-
- // Returns 'true' if the base relocation table can be parsed.
- // Output is a vector of the RVAs corresponding to locations within executable
- // that are listed in the base relocation table.
- bool ParseRelocs(std::vector<RVA> *addresses);
-
- // Returns the length of the image. Valid only if ParseHeader succeeded.
- uint32 length() const { return file_length_; }
-
- bool has_text_section() const { return has_text_section_; }
-
- uint32 size_of_code() const { return size_of_code_; }
-
- bool is_32bit() const { return !is_PE32_plus_; }
-
- // Most addresses are represented as 32-bit RVAs. The one address we can't
- // do this with is the image base address. 'image_base' is valid only for
- // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
- uint32 image_base() const { return static_cast<uint32>(image_base_); }
- uint64 image_base_64() const { return image_base_; }
-
- const ImageDataDirectory& base_relocation_table() const {
- return base_relocation_table_;
- }
-
- bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
-
- // Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
- std::string DescribeRVA(RVA rva) const;
-
- // Returns a pointer into the memory copy of the file format.
- // FileOffsetToPointer(0) returns a pointer to the start of the file format.
- const uint8* FileOffsetToPointer(uint32 offset) const {
- return start_ + offset;
- }
-
- // Finds the first section at file_offset or above. Does not return sections
- // that have no raw bytes in the file.
- const Section* FindNextSection(uint32 file_offset) const;
- // Returns Section containing the relative virtual address, or NULL if none.
- const Section* RVAToSection(RVA rva) const;
-
- // There are 2 'coordinate systems' for reasoning about executables.
- // FileOffset - the the offset within a single .EXE or .DLL *file*.
- // RVA - relative virtual address (offset within *loaded image*)
- // FileOffsetToRVA and RVAToFileOffset convert between these representations.
-
- RVA FileOffsetToRVA(uint32 offset) const;
-
- static const int kNoOffset = -1;
- // Returns kNoOffset if there is no file offset corresponding to 'rva'.
- int RVAToFileOffset(RVA rva) const;
-
- // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
- // is returned if there is no file offset corresponding to 'rva'.
- const uint8* RVAToPointer(RVA rva) const;
-
- protected:
- //
- // Fields that are always valid.
- //
- const char* failure_reason_;
-
- //
- // Basic information that is always valid after Init.
- //
- const uint8* start_; // In current memory, base for 'file offsets'.
- const uint8* end_; // In current memory.
- unsigned int length_; // In current memory.
-
- //
- // Information that is valid after successful ParseHeader.
- //
- bool is_PE32_plus_; // PE32_plus is for 64 bit executables.
- uint32 file_length_;
-
- // Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
- const uint8 *optional_header_;
- uint16 size_of_optional_header_;
- uint16 offset_of_data_directories_;
-
- uint16 machine_type_;
- uint16 number_of_sections_;
- const Section *sections_;
- bool has_text_section_;
-
- uint32 size_of_code_;
- uint32 size_of_initialized_data_;
- uint32 size_of_uninitialized_data_;
- RVA base_of_code_;
- RVA base_of_data_;
-
- uint64 image_base_; // range limited to 32 bits for 32 bit executable
- uint32 size_of_image_;
- int number_of_data_directories_;
-
- ImageDataDirectory export_table_;
- ImageDataDirectory import_table_;
- ImageDataDirectory resource_table_;
- ImageDataDirectory exception_table_;
- ImageDataDirectory base_relocation_table_;
- ImageDataDirectory bound_import_table_;
- ImageDataDirectory import_address_table_;
- ImageDataDirectory delay_import_descriptor_;
- ImageDataDirectory clr_runtime_header_;
-
- private:
- bool ReadDataDirectory(int index, ImageDataDirectory* dir);
- bool Bad(const char *reason);
-
- DISALLOW_COPY_AND_ASSIGN(PEInfo);
-};
-
-} // namespace
-#endif // COURGETTE_IMAGE_INFO_H_
diff --git a/courgette/image_info_unittest.cc b/courgette/image_info_unittest.cc
deleted file mode 100644
index e0cac7d..0000000
--- a/courgette/image_info_unittest.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "courgette/base_test_unittest.h"
-#include "courgette/image_info.h"
-
-class ImageInfoTest : public BaseTest {
- public:
-
- void TestExe() const;
- void TestResourceDll() const;
-
- private:
- void ExpectExecutable(courgette::PEInfo* info) const;
-
-};
-
-void ImageInfoTest::ExpectExecutable(courgette::PEInfo* info) const {
- EXPECT_TRUE(info->ok());
- EXPECT_TRUE(info->has_text_section());
-}
-
-void ImageInfoTest::TestExe() const {
- std::string file1 = FileContents("setup1.exe");
-
- scoped_ptr<courgette::PEInfo> info(new courgette::PEInfo());
- info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length());
-
- bool can_parse_header = info->ParseHeader();
- EXPECT_TRUE(can_parse_header);
-
- // The executable is the whole file, not 'embedded' with the file
- EXPECT_EQ(file1.length(), info->length());
-
- ExpectExecutable(info.get());
- EXPECT_EQ(449536U, info->size_of_code());
- EXPECT_EQ(SectionName(info->RVAToSection(0x00401234 - 0x00400000)),
- std::string(".text"));
-
- EXPECT_EQ(0, info->RVAToFileOffset(0));
- EXPECT_EQ(1024, info->RVAToFileOffset(4096));
- EXPECT_EQ(46928, info->RVAToFileOffset(50000));
-
- std::vector<courgette::RVA> relocs;
- bool can_parse_relocs = info->ParseRelocs(&relocs);
- EXPECT_TRUE(can_parse_relocs);
-
- const uint8* p = info->RVAToPointer(0);
- EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
- reinterpret_cast<const void*>(p));
- EXPECT_EQ('M', p[0]);
- EXPECT_EQ('Z', p[1]);
-}
-
-void ImageInfoTest::TestResourceDll() const {
- std::string file1 = FileContents("en-US.dll");
-
- scoped_ptr<courgette::PEInfo> info(new courgette::PEInfo());
- info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length());
-
- // This is expected to fail, since we don't really support them yet.
- bool can_parse_header = info->ParseHeader();
- EXPECT_FALSE(can_parse_header);
-
- // The executable is the whole file, not 'embedded' with the file
- EXPECT_EQ(file1.length(), info->length());
-
- EXPECT_FALSE(info->ok());
- EXPECT_FALSE(info->has_text_section());
- EXPECT_EQ(0U, info->size_of_code());
-}
-
-TEST_F(ImageInfoTest, All) {
- TestExe();
- TestResourceDll();
-}
diff --git a/courgette/types_win_pe.h b/courgette/types_win_pe.h
new file mode 100644
index 0000000..64fd541
--- /dev/null
+++ b/courgette/types_win_pe.h
@@ -0,0 +1,65 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TYPES_WIN_PE_H_
+#define TYPES_WIN_PE_H_
+
+#include "base/basictypes.h"
+
+
+namespace courgette {
+
+// PE file section header. This struct has the same layout as the
+// IMAGE_SECTION_HEADER structure from WINNT.H
+// http://msdn.microsoft.com/en-us/library/ms680341(VS.85).aspx
+//
+#pragma pack(push, 1) // Supported by MSVC and GCC. Ensures no gaps in packing.
+struct Section {
+ char name[8];
+ uint32 virtual_size;
+ uint32 virtual_address;
+ uint32 size_of_raw_data;
+ uint32 file_offset_of_raw_data;
+ uint32 pointer_to_relocations; // Always zero in an image.
+ uint32 pointer_to_line_numbers; // Always zero in an image.
+ uint16 number_of_relocations; // Always zero in an image.
+ uint16 number_of_line_numbers; // Always zero in an image.
+ uint32 characteristics;
+};
+#pragma pack(pop)
+
+COMPILE_ASSERT(sizeof(Section) == 40, section_is_40_bytes);
+
+// ImageDataDirectory has same layout as IMAGE_DATA_DIRECTORY structure from
+// WINNT.H
+// http://msdn.microsoft.com/en-us/library/ms680305(VS.85).aspx
+//
+class ImageDataDirectory {
+ public:
+ ImageDataDirectory() : address_(0), size_(0) {}
+ RVA address_;
+ uint32 size_;
+};
+
+COMPILE_ASSERT(sizeof(ImageDataDirectory) == 8,
+ image_data_directory_is_8_bytes);
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Constants and offsets gleaned from WINNT.H and various articles on the
+// format of Windows PE executables.
+
+// This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew):
+const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c;
+
+const uint16 kImageNtOptionalHdr32Magic = 0x10b;
+const uint16 kImageNtOptionalHdr64Magic = 0x20b;
+
+const size_t kSizeOfCoffHeader = 20;
+const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96;
+const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112;
+
+} // namespace
+#endif // TYPES_WIN_PE_H_