diff options
author | dgarrett@chromium.org <dgarrett@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-26 00:50:20 +0000 |
---|---|---|
committer | dgarrett@chromium.org <dgarrett@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-26 00:50:20 +0000 |
commit | 423a381f4fd3efd99dfd7bc932777ea596cf7b17 (patch) | |
tree | fdbf4a4bc5f2b8d73b90020da470c40a22f4cc2a /courgette | |
parent | da1543a1a526aefd1114853cf737846eb5c29640 (diff) | |
download | chromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.zip chromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.tar.gz chromium_src-423a381f4fd3efd99dfd7bc932777ea596cf7b17.tar.bz2 |
Further refactoring, move ImageInfo into Disassembler/DisassemblerWin32X86.
This means that all PE specific knowledge is now contained in a single class
which leaves us in pretty good shape for supporting ELF 32.
There are still widespread assumptions about being 32 bit, but those can be
addressed at a much later date.
BUG=None
TEST=Unittests
Review URL: http://codereview.chromium.org/8166013
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@107260 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette')
-rw-r--r-- | courgette/adjustment_method.cc | 1 | ||||
-rw-r--r-- | courgette/adjustment_method_2.cc | 1 | ||||
-rw-r--r-- | courgette/assembly_program.h | 2 | ||||
-rw-r--r-- | courgette/courgette.gyp | 4 | ||||
-rw-r--r-- | courgette/courgette.h | 14 | ||||
-rw-r--r-- | courgette/disassembler.cc | 74 | ||||
-rw-r--r-- | courgette/disassembler.h | 65 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 480 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.h | 110 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86_unittest.cc | 98 | ||||
-rw-r--r-- | courgette/encoded_program.h | 2 | ||||
-rw-r--r-- | courgette/ensemble.cc | 57 | ||||
-rw-r--r-- | courgette/ensemble.h | 8 | ||||
-rw-r--r-- | courgette/ensemble_apply.cc | 1 | ||||
-rw-r--r-- | courgette/ensemble_create.cc | 1 | ||||
-rw-r--r-- | courgette/image_info.cc | 419 | ||||
-rw-r--r-- | courgette/image_info.h | 200 | ||||
-rw-r--r-- | courgette/image_info_unittest.cc | 77 | ||||
-rw-r--r-- | courgette/types_win_pe.h | 65 |
19 files changed, 838 insertions, 841 deletions
diff --git a/courgette/adjustment_method.cc b/courgette/adjustment_method.cc index f967093..53745d7 100644 --- a/courgette/adjustment_method.cc +++ b/courgette/adjustment_method.cc @@ -18,7 +18,6 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" #include "courgette/encoded_program.h" -#include "courgette/image_info.h" namespace courgette { diff --git a/courgette/adjustment_method_2.cc b/courgette/adjustment_method_2.cc index b039e63..961beff 100644 --- a/courgette/adjustment_method_2.cc +++ b/courgette/adjustment_method_2.cc @@ -20,7 +20,6 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" #include "courgette/encoded_program.h" -#include "courgette/image_info.h" /* diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h index 0d865f5..5c6b1b1 100644 --- a/courgette/assembly_program.h +++ b/courgette/assembly_program.h @@ -12,7 +12,7 @@ #include "base/basictypes.h" #include "base/memory/scoped_ptr.h" -#include "courgette/image_info.h" +#include "courgette/disassembler.h" #include "courgette/memory_allocator.h" namespace courgette { diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp index 4ff0424..a4fb12f 100644 --- a/courgette/courgette.gyp +++ b/courgette/courgette.gyp @@ -30,8 +30,6 @@ 'ensemble.h', 'ensemble_apply.cc', 'ensemble_create.cc', - 'image_info.cc', - 'image_info.h', 'memory_allocator.cc', 'memory_allocator.h', 'region.h', @@ -91,10 +89,10 @@ 'base_test_unittest.cc', 'base_test_unittest.h', 'difference_estimator_unittest.cc', + 'disassembler_win32_x86_unittest.cc', 'encoded_program_unittest.cc', 'encode_decode_unittest.cc', 'ensemble_unittest.cc', - 'image_info_unittest.cc', 'run_all_unittests.cc', 'streams_unittest.cc', 'versioning_unittest.cc', diff --git a/courgette/courgette.h b/courgette/courgette.h index 127a150..2970a3f 100644 --- a/courgette/courgette.h +++ b/courgette/courgette.h @@ -87,9 +87,17 @@ Status ApplyEnsemblePatch(const FilePath::CharType* old_file_name, Status GenerateEnsemblePatch(SourceStream* old, SourceStream* target, SinkStream* patch); -// Detects the type of an executable, and returns UNKNOWN if it cannot -// be parsed. -ExecutableType DetectExecutableType(const void* buffer, size_t length); +// Detects the type of an executable file, and it's length. The length +// may be slightly smaller than some executables (like ELF), but will include +// all bytes the courgette algorithm has special benefit for. +// On sucess: +// Fill in type and detected_length, and return C_OK. +// On failure: +// Fill in type with UNKNOWN, detected_length with 0, and +// return C_INPUT_NOT_RECOGNIZED +Status DetectExecutableType(const void* buffer, size_t length, + ExecutableType* type, + size_t* detected_length); // Attempts to detect the type of executable, and parse it with the // appropriate tools, storing the pointer to the AssemblyProgram in |*output|. diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index f4ae86d..edacd4b 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -15,7 +15,6 @@ #include "courgette/courgette.h" #include "courgette/disassembler_win32_x86.h" #include "courgette/encoded_program.h" -#include "courgette/image_info.h" // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently // different target addresses are referenced. Purely for debugging. @@ -25,45 +24,56 @@ namespace courgette { //////////////////////////////////////////////////////////////////////////////// -ExecutableType DetectExecutableType(const void* buffer, size_t length) { +Disassembler* DetectDisassembler(const void* buffer, size_t length) { + Disassembler* disassembler = NULL; - bool parsed = false; + disassembler = new DisassemblerWin32X86(buffer, length); + if (disassembler->ParseHeader()) + return disassembler; - PEInfo* pe_info = new PEInfo(); - pe_info->Init(buffer, length); - parsed = pe_info->ParseHeader(); - delete pe_info; + delete disassembler; + + return NULL; +} - if (parsed) - return WIN32_X86; +Status DetectExecutableType(const void* buffer, size_t length, + ExecutableType* type, + size_t* detected_length) { - return UNKNOWN; + Disassembler* disassembler = DetectDisassembler(buffer, length); + + if (disassembler) { + *type = disassembler->kind(); + *detected_length = disassembler->length(); + delete disassembler; + return C_OK; + } + + // We failed to detect anything + *type = UNKNOWN; + *detected_length = 0; + return C_INPUT_NOT_RECOGNIZED; } Status ParseDetectedExecutable(const void* buffer, size_t length, AssemblyProgram** output) { *output = NULL; - PEInfo* pe_info = new PEInfo(); - pe_info->Init(buffer, length); + Disassembler* disassembler = DetectDisassembler(buffer, length); - if (!pe_info->ParseHeader()) { - delete pe_info; + if (!disassembler) { return C_INPUT_NOT_RECOGNIZED; } - Disassembler* disassembler = new DisassemblerWin32X86(pe_info); AssemblyProgram* program = new AssemblyProgram(); if (!disassembler->Disassemble(program)) { delete program; delete disassembler; - delete pe_info; return C_DISASSEMBLY_FAILED; } delete disassembler; - delete pe_info; *output = program; return C_OK; } @@ -72,4 +82,34 @@ void DeleteAssemblyProgram(AssemblyProgram* program) { delete program; } +Disassembler::Disassembler(const void* start, size_t length) + : failure_reason_("uninitialized") { + + start_ = reinterpret_cast<const uint8*>(start); + length_ = length; + end_ = start_ + length_; +}; + +Disassembler::~Disassembler() {}; + +const uint8* Disassembler::OffsetToPointer(size_t offset) const { + assert(start_ + offset <= end_); + return start_ + offset; +} + +bool Disassembler::Good() { + failure_reason_ = NULL; + return true; +} + +bool Disassembler::Bad(const char* reason) { + failure_reason_ = reason; + return false; +} + +void Disassembler::ReduceLength(size_t reduced_length) { + if (reduced_length < length_) + length_ = reduced_length; +} + } // namespace courgette diff --git a/courgette/disassembler.h b/courgette/disassembler.h index bef1a90..2b4714d 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -7,23 +7,82 @@ #include "base/basictypes.h" +#include "courgette/courgette.h" + namespace courgette { class AssemblyProgram; -class PEInfo; + +// A Relative Virtual Address is the address in the image file after it is +// loaded into memory relative to the image load address. +typedef uint32 RVA; class Disassembler { public: - virtual ~Disassembler() {} + virtual ~Disassembler(); + + virtual ExecutableType kind() { return UNKNOWN; } + + // ok() may always be called but returns 'true' only after ParseHeader + // succeeds. + bool ok() const { return failure_reason_ == NULL; } + + // Returns 'true' if the buffer appears to be a valid executable of the + // expected type. It is not required that this be called before Disassemble. + virtual bool ParseHeader() = 0; // Disassembles the item passed to the factory method into the output // parameter 'program'. virtual bool Disassemble(AssemblyProgram* program) = 0; + // Returns the length of the source executable. May reduce after ParseHeader. + size_t length() const { return length_; } + const uint8* start() const { return start_; } + const uint8* end() const { return end_; } + + // Returns a pointer into the memory copy of the file format. + // FileOffsetToPointer(0) returns a pointer to the start of the file format. + const uint8* OffsetToPointer(size_t offset) const; + protected: - Disassembler() {} + Disassembler(const void* start, size_t length); + + bool Good(); + bool Bad(const char *reason); + + // These helper functions avoid the need for casts in the main code. + uint16 ReadU16(const uint8* address, size_t offset) { + return *reinterpret_cast<const uint16*>(address + offset); + } + + uint32 ReadU32(const uint8* address, size_t offset) { + return *reinterpret_cast<const uint32*>(address + offset); + } + + uint64 ReadU64(const uint8* address, size_t offset) { + return *reinterpret_cast<const uint64*>(address + offset); + } + + static uint32 Read32LittleEndian(const void* address) { + return *reinterpret_cast<const uint32*>(address); + } + + // Reduce the length of the image in memory. Does not actually free + // (or realloc) any memory. Unusally only called via ParseHeader() + void ReduceLength(size_t reduced_length); private: + const char* failure_reason_; + + // + // Basic information that is always valid after Construction, though + // ParseHeader may shorten the length if the executable is shorter than + // the total data. + // + size_t length_; // In current memory. + const uint8* start_; // In current memory, base for 'file offsets'. + const uint8* end_; // In current memory. + DISALLOW_COPY_AND_ASSIGN(Disassembler); }; diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc index fb12c22..d09d67d 100644 --- a/courgette/disassembler_win32_x86.cc +++ b/courgette/disassembler_win32_x86.cc @@ -14,7 +14,6 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" #include "courgette/encoded_program.h" -#include "courgette/image_info.h" // COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently // different target addresses are referenced. Purely for debugging. @@ -22,16 +21,189 @@ namespace courgette { -DisassemblerWin32X86::DisassemblerWin32X86(PEInfo* pe_info) - : pe_info_(pe_info), - incomplete_disassembly_(false) { +DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length) + : Disassembler(start, length), + incomplete_disassembly_(false), + is_PE32_plus_(false), + optional_header_(NULL), + size_of_optional_header_(0), + offset_of_data_directories_(0), + machine_type_(0), + number_of_sections_(0), + sections_(NULL), + has_text_section_(false), + size_of_code_(0), + size_of_initialized_data_(0), + size_of_uninitialized_data_(0), + base_of_code_(0), + base_of_data_(0), + image_base_(0), + size_of_image_(0), + number_of_data_directories_(0) { +} + +// ParseHeader attempts to match up the buffer with the Windows data +// structures that exist within a Windows 'Portable Executable' format file. +// Returns 'true' if the buffer matches, and 'false' if the data looks +// suspicious. Rather than try to 'map' the buffer to the numerous windows +// structures, we extract the information we need into the courgette::PEInfo +// structure. +// +bool DisassemblerWin32X86::ParseHeader() { + if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) + return Bad("Too small"); + + // Have 'MZ' magic for a DOS header? + if (start()[0] != 'M' || start()[1] != 'Z') + return Bad("Not MZ"); + + // offset from DOS header to PE header is stored in DOS header. + uint32 offset = ReadU32(start(), + kOffsetOfFileAddressOfNewExeHeader); + + if (offset >= length()) + return Bad("Bad offset to PE header"); + + const uint8* const pe_header = OffsetToPointer(offset); + const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; + if (pe_header <= start() || + pe_header >= end() - kMinPEHeaderSize) + return Bad("Bad offset to PE header"); + + if (offset % 8 != 0) + return Bad("Misaligned PE header"); + + // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. + // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx + // + // The first field of the IMAGE_NT_HEADERS is the signature. + if (!(pe_header[0] == 'P' && + pe_header[1] == 'E' && + pe_header[2] == 0 && + pe_header[3] == 0)) + return Bad("no PE signature"); + + // The second field of the IMAGE_NT_HEADERS is the COFF header. + // The COFF header is also called an IMAGE_FILE_HEADER + // http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx + const uint8* const coff_header = pe_header + 4; + machine_type_ = ReadU16(coff_header, 0); + number_of_sections_ = ReadU16(coff_header, 2); + size_of_optional_header_ = ReadU16(coff_header, 16); + + // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64) + const uint8* const optional_header = coff_header + kSizeOfCoffHeader; + optional_header_ = optional_header; + + if (optional_header + size_of_optional_header_ >= end()) + return Bad("optional header past end of file"); + + // Check we can read the magic. + if (size_of_optional_header_ < 2) + return Bad("optional header no magic"); + + uint16 magic = ReadU16(optional_header, 0); + + if (magic == kImageNtOptionalHdr32Magic) { + is_PE32_plus_ = false; + offset_of_data_directories_ = + kOffsetOfDataDirectoryFromImageOptionalHeader32; + } else if (magic == kImageNtOptionalHdr64Magic) { + is_PE32_plus_ = true; + offset_of_data_directories_ = + kOffsetOfDataDirectoryFromImageOptionalHeader64; + } else { + return Bad("unrecognized magic"); + } + + // Check that we can read the rest of the the fixed fields. Data directories + // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER. + if (size_of_optional_header_ < offset_of_data_directories_) + return Bad("optional header too short"); + + // The optional header is either an IMAGE_OPTIONAL_HEADER32 or + // IMAGE_OPTIONAL_HEADER64 + // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx + // + // Copy the fields we care about. + size_of_code_ = ReadU32(optional_header, 4); + size_of_initialized_data_ = ReadU32(optional_header, 8); + size_of_uninitialized_data_ = ReadU32(optional_header, 12); + base_of_code_ = ReadU32(optional_header, 20); + if (is_PE32_plus_) { + base_of_data_ = 0; + image_base_ = ReadU64(optional_header, 24); + } else { + base_of_data_ = ReadU32(optional_header, 24); + image_base_ = ReadU32(optional_header, 28); + } + size_of_image_ = ReadU32(optional_header, 56); + number_of_data_directories_ = + ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92)); + + if (size_of_code_ >= length() || + size_of_initialized_data_ >= length() || + size_of_code_ + size_of_initialized_data_ >= length()) { + // This validation fires on some perfectly fine executables. + // return Bad("code or initialized data too big"); + } + + // TODO(sra): we can probably get rid of most of the data directories. + bool b = true; + // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary + // for correctness and it compiles smaller this way. + b &= ReadDataDirectory(0, &export_table_); + b &= ReadDataDirectory(1, &import_table_); + b &= ReadDataDirectory(2, &resource_table_); + b &= ReadDataDirectory(3, &exception_table_); + b &= ReadDataDirectory(5, &base_relocation_table_); + b &= ReadDataDirectory(11, &bound_import_table_); + b &= ReadDataDirectory(12, &import_address_table_); + b &= ReadDataDirectory(13, &delay_import_descriptor_); + b &= ReadDataDirectory(14, &clr_runtime_header_); + if (!b) { + return Bad("malformed data directory"); + } + + // Sections follow the optional header. + sections_ = + reinterpret_cast<const Section*>(optional_header + + size_of_optional_header_); + size_t detected_length = 0; + + for (int i = 0; i < number_of_sections_; ++i) { + const Section* section = §ions_[i]; + + // TODO(sra): consider using the 'characteristics' field of the section + // header to see if the section contains instructions. + if (memcmp(section->name, ".text", 6) == 0) + has_text_section_ = true; + + uint32 section_end = + section->file_offset_of_raw_data + section->size_of_raw_data; + if (section_end > detected_length) + detected_length = section_end; + } + + // Pretend our in-memory copy is only as long as our detected length. + ReduceLength(detected_length); + + if (!is_32bit()) { + return Bad("64 bit executables are not yet supported"); + } + + if (!has_text_section()) { + return Bad("Resource-only executables are not yet supported"); + } + + return Good(); } bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { - if (!pe_info().ok()) + if (!ok()) return false; - target->set_image_base(pe_info().image_base()); + target->set_image_base(image_base()); if (!ParseAbs32Relocs()) return false; @@ -46,13 +218,159 @@ bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { return true; } -static uint32 Read32LittleEndian(const void* address) { - return *reinterpret_cast<const uint32*>(address); +//////////////////////////////////////////////////////////////////////////////// + +bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) { + relocs->clear(); + + size_t relocs_size = base_relocation_table_.size_; + if (relocs_size == 0) + return true; + + // The format of the base relocation table is a sequence of variable sized + // IMAGE_BASE_RELOCATION blocks. Search for + // "The format of the base relocation data is somewhat quirky" + // at http://msdn.microsoft.com/en-us/library/ms809762.aspx + + const uint8* relocs_start = RVAToPointer(base_relocation_table_.address_); + const uint8* relocs_end = relocs_start + relocs_size; + + // Make sure entire base relocation table is within the buffer. + if (relocs_start < start() || + relocs_start >= end() || + relocs_end <= start() || + relocs_end > end()) { + return Bad(".relocs outside image"); + } + + const uint8* block = relocs_start; + + // Walk the variable sized blocks. + while (block + 8 < relocs_end) { + RVA page_rva = ReadU32(block, 0); + uint32 size = ReadU32(block, 4); + if (size < 8 || // Size includes header ... + size % 4 != 0) // ... and is word aligned. + return Bad("unreasonable relocs block"); + + const uint8* end_entries = block + size; + + if (end_entries <= block || + end_entries <= start() || + end_entries > end()) + return Bad(".relocs block outside image"); + + // Walk through the two-byte entries. + for (const uint8* p = block + 8; p < end_entries; p += 2) { + uint16 entry = ReadU16(p, 0); + int type = entry >> 12; + int offset = entry & 0xFFF; + + RVA rva = page_rva + offset; + if (type == 3) { // IMAGE_REL_BASED_HIGHLOW + relocs->push_back(rva); + } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE + // Ignore, used as padding. + } else { + // Does not occur in Windows x86 executables. + return Bad("unknown type of reloc"); + } + } + + block += size; + } + + std::sort(relocs->begin(), relocs->end()); + + return true; +} + +const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const { + for (int i = 0; i < number_of_sections_; i++) { + const Section* section = §ions_[i]; + uint32 offset = rva - section->virtual_address; + if (offset < section->virtual_size) { + return section; + } + } + return NULL; +} + +int DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { + const Section* section = RVAToSection(rva); + if (section) { + uint32 offset = rva - section->virtual_address; + if (offset < section->size_of_raw_data) { + return section->file_offset_of_raw_data + offset; + } else { + return kNoOffset; // In section but not in file (e.g. uninit data). + } + } + + // Small RVA values point into the file header in the loaded image. + // RVA 0 is the module load address which Windows uses as the module handle. + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the + // DOS header. + if (rva == 0 || rva == 2) + return rva; + + NOTREACHED(); + return kNoOffset; +} + +const uint8* DisassemblerWin32X86::RVAToPointer(RVA rva) const { + int file_offset = RVAToFileOffset(rva); + if (file_offset == kNoOffset) + return NULL; + else + return OffsetToPointer(file_offset); +} + +std::string DisassemblerWin32X86::SectionName(const Section* section) { + if (section == NULL) + return "<none>"; + char name[9]; + memcpy(name, section->name, 8); + name[8] = '\0'; // Ensure termination. + return name; +} + +CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { + bool ok = true; + // Walk all the bytes in the file, whether or not in a section. + uint32 file_offset = 0; + while (ok && file_offset < length()) { + const Section* section = FindNextSection(file_offset); + if (section == NULL) { + // No more sections. There should not be extra stuff following last + // section. + // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); + break; + } + if (file_offset < section->file_offset_of_raw_data) { + uint32 section_start_offset = section->file_offset_of_raw_data; + ok = ParseNonSectionFileRegion(file_offset, section_start_offset, + program); + file_offset = section_start_offset; + } + if (ok) { + uint32 end = file_offset + section->size_of_raw_data; + ok = ParseFileRegion(section, file_offset, end, program); + file_offset = end; + } + } + +#if COURGETTE_HISTOGRAM_TARGETS + HistogramTargets("abs32 relocs", abs32_target_rvas_); + HistogramTargets("rel32 relocs", rel32_target_rvas_); +#endif + + return ok; } bool DisassemblerWin32X86::ParseAbs32Relocs() { abs32_locations_.clear(); - if (!pe_info().ParseRelocs(&abs32_locations_)) + if (!ParseRelocs(&abs32_locations_)) return false; std::sort(abs32_locations_.begin(), abs32_locations_.end()); @@ -61,8 +379,8 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. - uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); - ++abs32_target_rvas_[target_address - pe_info().image_base()]; + uint32 target_address = Read32LittleEndian(RVAToPointer(rva)); + ++abs32_target_rvas_[target_address - image_base()]; } #endif return true; @@ -70,8 +388,8 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { void DisassemblerWin32X86::ParseRel32RelocsFromSections() { uint32 file_offset = 0; - while (file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); + while (file_offset < length()) { + const Section* section = FindNextSection(file_offset); if (section == NULL) break; if (file_offset < section->file_offset_of_raw_data) @@ -114,12 +432,12 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { uint32 start_file_offset = section->file_offset_of_raw_data; uint32 end_file_offset = start_file_offset + section->size_of_raw_data; - RVA relocs_start_rva = pe_info().base_relocation_table().address_; + RVA relocs_start_rva = base_relocation_table().address_; - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + const uint8* start_pointer = OffsetToPointer(start_file_offset); + const uint8* end_pointer = OffsetToPointer(end_file_offset); - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; // Quick way to convert from Pointer to RVA within a single Section is to @@ -133,7 +451,7 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { while (p < end_pointer) { RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); if (current_rva == relocs_start_rva) { - uint32 relocs_size = pe_info().base_relocation_table().size_; + uint32 relocs_size = base_relocation_table().size_; if (relocs_size) { p += relocs_size; continue; @@ -179,7 +497,7 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); // To be valid, rel32 target must be within image, and within this // section. - if (pe_info().IsValidRVA(target_rva) && + if (IsValidRVA(target_rva) && start_rva <= target_rva && target_rva < end_rva) { rel32_locations_.push_back(rel32_rva); #if COURGETTE_HISTOGRAM_TARGETS @@ -193,39 +511,6 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { } } -CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { - bool ok = true; - // Walk all the bytes in the file, whether or not in a section. - uint32 file_offset = 0; - while (ok && file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); - if (section == NULL) { - // No more sections. There should not be extra stuff following last - // section. - // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); - break; - } - if (file_offset < section->file_offset_of_raw_data) { - uint32 section_start_offset = section->file_offset_of_raw_data; - ok = ParseNonSectionFileRegion(file_offset, section_start_offset, - program); - file_offset = section_start_offset; - } - if (ok) { - uint32 end = file_offset + section->size_of_raw_data; - ok = ParseFileRegion(section, file_offset, end, program); - file_offset = end; - } - } - -#if COURGETTE_HISTOGRAM_TARGETS - HistogramTargets("abs32 relocs", abs32_target_rvas_); - HistogramTargets("rel32 relocs", rel32_target_rvas_); -#endif - - return ok; -} - CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( uint32 start_file_offset, uint32 end_file_offset, @@ -233,8 +518,8 @@ CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( if (incomplete_disassembly_) return true; - const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); + const uint8* start = OffsetToPointer(start_file_offset); + const uint8* end = OffsetToPointer(end_file_offset); const uint8* p = start; @@ -251,12 +536,12 @@ CheckBool DisassemblerWin32X86::ParseFileRegion( const Section* section, uint32 start_file_offset, uint32 end_file_offset, AssemblyProgram* program) { - RVA relocs_start_rva = pe_info().base_relocation_table().address_; + RVA relocs_start_rva = base_relocation_table().address_; - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + const uint8* start_pointer = OffsetToPointer(start_file_offset); + const uint8* end_pointer = OffsetToPointer(end_file_offset); - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA start_rva = FileOffsetToRVA(start_file_offset); RVA end_rva = start_rva + section->virtual_size; // Quick way to convert from Pointer to RVA within a single Section is to @@ -280,7 +565,7 @@ CheckBool DisassemblerWin32X86::ParseFileRegion( ok = program->EmitMakeRelocsInstruction(); if (!ok) break; - uint32 relocs_size = pe_info().base_relocation_table().size_; + uint32 relocs_size = base_relocation_table().size_; if (relocs_size) { p += relocs_size; continue; @@ -292,7 +577,7 @@ CheckBool DisassemblerWin32X86::ParseFileRegion( if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { uint32 target_address = Read32LittleEndian(p); - RVA target_rva = target_address - pe_info().image_base(); + RVA target_rva = target_address - image_base(); // TODO(sra): target could be Label+offset. It is not clear how to guess // which it might be. We assume offset==0. ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); @@ -363,7 +648,7 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, std::cout << std::dec << p->first << ": " << count; if (count <= 2) { for (size_t i = 0; i < count; ++i) - std::cout << " " << pe_info().DescribeRVA(p->second[i]); + std::cout << " " << DescribeRVA(p->second[i]); } std::cout << std::endl; someSkipped = false; @@ -374,4 +659,77 @@ void DisassemblerWin32X86::HistogramTargets(const char* kind, } #endif // COURGETTE_HISTOGRAM_TARGETS + +// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except +// that during development I'm finding I need to call it when compiled in +// Release mode. Hence: +// TODO(sra): make this compile only for debug mode. +std::string DisassemblerWin32X86::DescribeRVA(RVA rva) const { + const Section* section = RVAToSection(rva); + std::ostringstream s; + s << std::hex << rva; + if (section) { + s << " ("; + s << SectionName(section) << "+" + << std::hex << (rva - section->virtual_address) + << ")"; + } + return s.str(); +} + +const Section* DisassemblerWin32X86::FindNextSection(uint32 fileOffset) const { + const Section* best = 0; + for (int i = 0; i < number_of_sections_; i++) { + const Section* section = §ions_[i]; + if (section->size_of_raw_data > 0) { // i.e. has data in file. + if (fileOffset <= section->file_offset_of_raw_data) { + if (best == 0 || + section->file_offset_of_raw_data < best->file_offset_of_raw_data) { + best = section; + } + } + } + } + return best; +} + +RVA DisassemblerWin32X86::FileOffsetToRVA(uint32 file_offset) const { + for (int i = 0; i < number_of_sections_; i++) { + const Section* section = §ions_[i]; + uint32 offset = file_offset - section->file_offset_of_raw_data; + if (offset < section->size_of_raw_data) { + return section->virtual_address + offset; + } + } + return 0; +} + +bool DisassemblerWin32X86::ReadDataDirectory( + int index, + ImageDataDirectory* directory) { + + if (index < number_of_data_directories_) { + size_t offset = index * 8 + offset_of_data_directories_; + if (offset >= size_of_optional_header_) + return Bad("number of data directories inconsistent"); + const uint8* data_directory = optional_header_ + offset; + if (data_directory < start() || + data_directory + 8 >= end()) + return Bad("data directory outside image"); + RVA rva = ReadU32(data_directory, 0); + size_t size = ReadU32(data_directory, 4); + if (size > size_of_image_) + return Bad("data directory size too big"); + + // TODO(sra): validate RVA. + directory->address_ = rva; + directory->size_ = static_cast<uint32>(size); + return true; + } else { + directory->address_ = 0; + directory->size_ = 0; + return true; + } +} + } // namespace courgette diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h index fe00b6d..733222f 100644 --- a/courgette/disassembler_win32_x86.h +++ b/courgette/disassembler_win32_x86.h @@ -7,8 +7,8 @@ #include "base/basictypes.h" #include "courgette/disassembler.h" -#include "courgette/image_info.h" #include "courgette/memory_allocator.h" +#include "courgette/types_win_pe.h" namespace courgette { @@ -16,13 +16,44 @@ class AssemblyProgram; class DisassemblerWin32X86 : public Disassembler { public: - explicit DisassemblerWin32X86(PEInfo* pe_info); + explicit DisassemblerWin32X86(const void* start, size_t length); + + virtual ExecutableType kind() { return WIN32_X86; } + + // Returns 'true' if the buffer appears to point to a Windows 32 bit + // executable, 'false' otherwise. If ParseHeader() succeeds, other member + // functions may be called. + virtual bool ParseHeader(); virtual bool Disassemble(AssemblyProgram* target); - protected: - PEInfo& pe_info() { return *pe_info_; } + // + // Exposed for test purposes + // + + bool has_text_section() const { return has_text_section_; } + uint32 size_of_code() const { return size_of_code_; } + bool is_32bit() const { return !is_PE32_plus_; } + + // Returns 'true' if the base relocation table can be parsed. + // Output is a vector of the RVAs corresponding to locations within executable + // that are listed in the base relocation table. + bool ParseRelocs(std::vector<RVA> *addresses); + + // Returns Section containing the relative virtual address, or NULL if none. + const Section* RVAToSection(RVA rva) const; + + static const int kNoOffset = -1; + // Returns kNoOffset if there is no file offset corresponding to 'rva'. + int RVAToFileOffset(RVA rva) const; + // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL + // is returned if there is no file offset corresponding to 'rva'. + const uint8* RVAToPointer(RVA rva) const; + + static std::string SectionName(const Section* section); + + protected: CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; bool ParseAbs32Relocs(); void ParseRel32RelocsFromSections(); @@ -38,17 +69,86 @@ class DisassemblerWin32X86 : public Disassembler { void HistogramTargets(const char* kind, const std::map<RVA, int>& map); #endif - PEInfo* pe_info_; + // Most addresses are represented as 32-bit RVAs. The one address we can't + // do this with is the image base address. 'image_base' is valid only for + // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. + uint32 image_base() const { return static_cast<uint32>(image_base_); } + + const ImageDataDirectory& base_relocation_table() const { + return base_relocation_table_; + } + + bool IsValidRVA(RVA rva) const { return rva < size_of_image_; } + + // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. + std::string DescribeRVA(RVA rva) const; + + // Finds the first section at file_offset or above. Does not return sections + // that have no raw bytes in the file. + const Section* FindNextSection(uint32 file_offset) const; + + // There are 2 'coordinate systems' for reasoning about executables. + // FileOffset - the the offset within a single .EXE or .DLL *file*. + // RVA - relative virtual address (offset within *loaded image*) + // FileOffsetToRVA and RVAToFileOffset convert between these representations. + + RVA FileOffsetToRVA(uint32 offset) const; + + + private: + + bool ReadDataDirectory(int index, ImageDataDirectory* dir); + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits std::vector<RVA> abs32_locations_; std::vector<RVA> rel32_locations_; + // + // Fields that are always valid. + // + + // + // Information that is valid after successful ParseHeader. + // + bool is_PE32_plus_; // PE32_plus is for 64 bit executables. + + // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. + const uint8 *optional_header_; + uint16 size_of_optional_header_; + uint16 offset_of_data_directories_; + + uint16 machine_type_; + uint16 number_of_sections_; + const Section *sections_; + bool has_text_section_; + + uint32 size_of_code_; + uint32 size_of_initialized_data_; + uint32 size_of_uninitialized_data_; + RVA base_of_code_; + RVA base_of_data_; + + uint64 image_base_; // range limited to 32 bits for 32 bit executable + uint32 size_of_image_; + int number_of_data_directories_; + + ImageDataDirectory export_table_; + ImageDataDirectory import_table_; + ImageDataDirectory resource_table_; + ImageDataDirectory exception_table_; + ImageDataDirectory base_relocation_table_; + ImageDataDirectory bound_import_table_; + ImageDataDirectory import_address_table_; + ImageDataDirectory delay_import_descriptor_; + ImageDataDirectory clr_runtime_header_; + #if COURGETTE_HISTOGRAM_TARGETS std::map<RVA, int> abs32_target_rvas_; std::map<RVA, int> rel32_target_rvas_; #endif + DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86); }; diff --git a/courgette/disassembler_win32_x86_unittest.cc b/courgette/disassembler_win32_x86_unittest.cc new file mode 100644 index 0000000..c310675 --- /dev/null +++ b/courgette/disassembler_win32_x86_unittest.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/disassembler_win32_x86.h" + +#include "courgette/base_test_unittest.h" + +class DisassemblerWin32X86Test : public BaseTest { + public: + + void TestExe() const; + void TestExe64() const; + void TestResourceDll() const; +}; + +void DisassemblerWin32X86Test::TestExe() const { + std::string file1 = FileContents("setup1.exe"); + + scoped_ptr<courgette::DisassemblerWin32X86> disassembler( + new courgette::DisassemblerWin32X86(file1.c_str(), file1.length())); + + bool can_parse_header = disassembler->ParseHeader(); + EXPECT_TRUE(can_parse_header); + + // The executable is the whole file, not 'embedded' with the file + EXPECT_EQ(file1.length(), disassembler->length()); + + EXPECT_TRUE(disassembler->ok()); + EXPECT_TRUE(disassembler->has_text_section()); + EXPECT_EQ(449536U, disassembler->size_of_code()); + EXPECT_TRUE(disassembler->is_32bit()); + EXPECT_EQ(courgette::DisassemblerWin32X86::SectionName( + disassembler->RVAToSection(0x00401234 - 0x00400000)), + std::string(".text")); + + EXPECT_EQ(0, disassembler->RVAToFileOffset(0)); + EXPECT_EQ(1024, disassembler->RVAToFileOffset(4096)); + EXPECT_EQ(46928, disassembler->RVAToFileOffset(50000)); + + std::vector<courgette::RVA> relocs; + bool can_parse_relocs = disassembler->ParseRelocs(&relocs); + EXPECT_TRUE(can_parse_relocs); + + const uint8* offset_p = disassembler->OffsetToPointer(0); + EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), + reinterpret_cast<const void*>(offset_p)); + EXPECT_EQ('M', offset_p[0]); + EXPECT_EQ('Z', offset_p[1]); + + const uint8* rva_p = disassembler->RVAToPointer(0); + EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), + reinterpret_cast<const void*>(rva_p)); + EXPECT_EQ('M', rva_p[0]); + EXPECT_EQ('Z', rva_p[1]); +} + +void DisassemblerWin32X86Test::TestExe64() const { + std::string file1 = FileContents("pe-64.exe"); + + scoped_ptr<courgette::DisassemblerWin32X86> disassembler( + new courgette::DisassemblerWin32X86(file1.c_str(), file1.length())); + + bool can_parse_header = disassembler->ParseHeader(); + EXPECT_FALSE(can_parse_header); + + // The executable is the whole file, not 'embedded' with the file + EXPECT_EQ(file1.length(), disassembler->length()); + + EXPECT_FALSE(disassembler->ok()); + EXPECT_TRUE(disassembler->has_text_section()); + EXPECT_EQ(43008U, disassembler->size_of_code()); + EXPECT_FALSE(disassembler->is_32bit()); +} + +void DisassemblerWin32X86Test::TestResourceDll() const { + std::string file1 = FileContents("en-US.dll"); + + scoped_ptr<courgette::DisassemblerWin32X86> disassembler( + new courgette::DisassemblerWin32X86(file1.c_str(), file1.length())); + + bool can_parse_header = disassembler->ParseHeader(); + EXPECT_FALSE(can_parse_header); + + // The executable is the whole file, not 'embedded' with the file + EXPECT_EQ(file1.length(), disassembler->length()); + + EXPECT_FALSE(disassembler->ok()); + EXPECT_FALSE(disassembler->has_text_section()); + EXPECT_EQ(0U, disassembler->size_of_code()); + EXPECT_TRUE(disassembler->is_32bit()); +} + +TEST_F(DisassemblerWin32X86Test, All) { + TestExe(); + TestExe64(); + TestResourceDll(); +} diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h index 5acfeb6..b120353 100644 --- a/courgette/encoded_program.h +++ b/courgette/encoded_program.h @@ -8,7 +8,7 @@ #include <vector> #include "base/basictypes.h" -#include "courgette/image_info.h" +#include "courgette/disassembler.h" #include "courgette/memory_allocator.h" namespace courgette { diff --git a/courgette/ensemble.cc b/courgette/ensemble.cc index a2bea8f..fb9b25b 100644 --- a/courgette/ensemble.cc +++ b/courgette/ensemble.cc @@ -7,7 +7,6 @@ #include "base/basictypes.h" #include "base/string_number_conversions.h" -#include "courgette/image_info.h" #include "courgette/region.h" #include "courgette/streams.h" #include "courgette/simple_delta.h" @@ -16,14 +15,11 @@ namespace courgette { Element::Element(ExecutableType kind, Ensemble* ensemble, - const Region& region, - PEInfo* info) - : kind_(kind), ensemble_(ensemble), region_(region), info_(info) { + const Region& region) + : kind_(kind), ensemble_(ensemble), region_(region) { } -Element::~Element() { - delete info_; -} +Element::~Element() {} std::string Element::Name() const { return ensemble_->name() + "(" @@ -41,41 +37,22 @@ Status Ensemble::FindEmbeddedElements() { size_t position = 0; while (position < length) { - ExecutableType type = DetectExecutableType(start + position, - length - position); + ExecutableType type; + size_t detected_length; + + Status result = DetectExecutableType(start + position, + length - position, + &type, &detected_length); - // - // TODO(dgarrett) This switch can go away totally after two things. - // - // Make ImageInfo generic for all executable types. - // Find a generic way to handle length detection for executables. - // - // When this switch is gone, that's one less piece of code that is - // executable type aware. - // - switch (type) { - case UNKNOWN: { - // No Element found at current position. - ++position; - break; - } - case WIN32_X86: { - // The Info is only created to detect the length of the executable - courgette::PEInfo* info(new courgette::PEInfo()); - info->Init(start + position, length - position); - if (!info->ParseHeader()) { - delete info; - position++; - break; - } - Region region(start + position, info->length()); + if (result == C_OK) { + Region region(start + position, detected_length); - Element* element = new Element(type, this, region, info); - owned_elements_.push_back(element); - elements_.push_back(element); - position += region.length(); - break; - } + Element* element = new Element(type, this, region); + owned_elements_.push_back(element); + elements_.push_back(element); + position += region.length(); + } else { + position++; } } return C_OK; diff --git a/courgette/ensemble.h b/courgette/ensemble.h index e766782..4d26076 100644 --- a/courgette/ensemble.h +++ b/courgette/ensemble.h @@ -30,7 +30,6 @@ namespace courgette { // Forward declarations: class Ensemble; -class PEInfo; // An Element is a region of an Ensemble with an identifyable kind. // @@ -38,8 +37,7 @@ class Element { public: Element(ExecutableType kind, Ensemble* ensemble, - const Region& region, - PEInfo*info); + const Region& region); virtual ~Element(); @@ -53,14 +51,10 @@ class Element { // containing Ensemble. size_t offset_in_ensemble() const; - // The ImageInfo for this executable - virtual PEInfo* GetImageInfo() const { return info_; } - private: ExecutableType kind_; Ensemble* ensemble_; Region region_; - PEInfo *info_; DISALLOW_COPY_AND_ASSIGN(Element); }; diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc index 499ccac..475b0a4 100644 --- a/courgette/ensemble_apply.cc +++ b/courgette/ensemble_apply.cc @@ -11,7 +11,6 @@ #include "base/logging.h" #include "courgette/crc.h" -#include "courgette/image_info.h" #include "courgette/region.h" #include "courgette/streams.h" #include "courgette/simple_delta.h" diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc index 62105b9..07ede7e 100644 --- a/courgette/ensemble_create.cc +++ b/courgette/ensemble_create.cc @@ -24,7 +24,6 @@ #include "courgette/third_party/bsdiff.h" #include "courgette/crc.h" #include "courgette/difference_estimator.h" -#include "courgette/image_info.h" #include "courgette/streams.h" #include "courgette/region.h" #include "courgette/simple_delta.h" diff --git a/courgette/image_info.cc b/courgette/image_info.cc deleted file mode 100644 index ce0e0ae..0000000 --- a/courgette/image_info.cc +++ /dev/null @@ -1,419 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "courgette/image_info.h" - -#include <memory.h> -#include <algorithm> -#include <map> -#include <set> -#include <sstream> -#include <vector> - -#include "base/logging.h" - -namespace courgette { - -std::string SectionName(const Section* section) { - if (section == NULL) - return "<none>"; - char name[9]; - memcpy(name, section->name, 8); - name[8] = '\0'; // Ensure termination. - return name; -} - -PEInfo::PEInfo() - : failure_reason_("uninitialized"), - start_(0), - end_(0), - length_(0), - is_PE32_plus_(false), - file_length_(0), - optional_header_(NULL), - size_of_optional_header_(0), - offset_of_data_directories_(0), - machine_type_(0), - number_of_sections_(0), - sections_(NULL), - has_text_section_(false), - size_of_code_(0), - size_of_initialized_data_(0), - size_of_uninitialized_data_(0), - base_of_code_(0), - base_of_data_(0), - image_base_(0), - size_of_image_(0), - number_of_data_directories_(0) { -} - -void PEInfo::Init(const void* start, size_t length) { - start_ = reinterpret_cast<const uint8*>(start); - length_ = static_cast<int>(length); - end_ = start_ + length_; - failure_reason_ = "unparsed"; -} - -// DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except -// that during development I'm finding I need to call it when compiled in -// Release mode. Hence: -// TODO(sra): make this compile only for debug mode. -std::string PEInfo::DescribeRVA(RVA rva) const { - const Section* section = RVAToSection(rva); - std::ostringstream s; - s << std::hex << rva; - if (section) { - s << " ("; - s << SectionName(section) << "+" - << std::hex << (rva - section->virtual_address) - << ")"; - } - return s.str(); -} - -const Section* PEInfo::FindNextSection(uint32 fileOffset) const { - const Section* best = 0; - for (int i = 0; i < number_of_sections_; i++) { - const Section* section = §ions_[i]; - if (section->size_of_raw_data > 0) { // i.e. has data in file. - if (fileOffset <= section->file_offset_of_raw_data) { - if (best == 0 || - section->file_offset_of_raw_data < best->file_offset_of_raw_data) { - best = section; - } - } - } - } - return best; -} - -const Section* PEInfo::RVAToSection(RVA rva) const { - for (int i = 0; i < number_of_sections_; i++) { - const Section* section = §ions_[i]; - uint32 offset = rva - section->virtual_address; - if (offset < section->virtual_size) { - return section; - } - } - return NULL; -} - -int PEInfo::RVAToFileOffset(RVA rva) const { - const Section* section = RVAToSection(rva); - if (section) { - uint32 offset = rva - section->virtual_address; - if (offset < section->size_of_raw_data) { - return section->file_offset_of_raw_data + offset; - } else { - return kNoOffset; // In section but not in file (e.g. uninit data). - } - } - - // Small RVA values point into the file header in the loaded image. - // RVA 0 is the module load address which Windows uses as the module handle. - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the - // DOS header. - if (rva == 0 || rva == 2) - return rva; - - NOTREACHED(); - return kNoOffset; -} - -const uint8* PEInfo::RVAToPointer(RVA rva) const { - int file_offset = RVAToFileOffset(rva); - if (file_offset == kNoOffset) - return NULL; - else - return start_ + file_offset; -} - -RVA PEInfo::FileOffsetToRVA(uint32 file_offset) const { - for (int i = 0; i < number_of_sections_; i++) { - const Section* section = §ions_[i]; - uint32 offset = file_offset - section->file_offset_of_raw_data; - if (offset < section->size_of_raw_data) { - return section->virtual_address + offset; - } - } - return 0; -} - -//////////////////////////////////////////////////////////////////////////////// - -namespace { - -// Constants and offsets gleaned from WINNT.H and various articles on the -// format of Windows PE executables. - -// This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew): -const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c; - -const uint16 kImageNtOptionalHdr32Magic = 0x10b; -const uint16 kImageNtOptionalHdr64Magic = 0x20b; - -const size_t kSizeOfCoffHeader = 20; -const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96; -const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112; - -// These helper functions avoid the need for casts in the main code. -inline uint16 ReadU16(const uint8* address, size_t offset) { - return *reinterpret_cast<const uint16*>(address + offset); -} - -inline uint32 ReadU32(const uint8* address, size_t offset) { - return *reinterpret_cast<const uint32*>(address + offset); -} - -inline uint64 ReadU64(const uint8* address, size_t offset) { - return *reinterpret_cast<const uint64*>(address + offset); -} - -} // namespace - -// ParseHeader attempts to match up the buffer with the Windows data -// structures that exist within a Windows 'Portable Executable' format file. -// Returns 'true' if the buffer matches, and 'false' if the data looks -// suspicious. Rather than try to 'map' the buffer to the numerous windows -// structures, we extract the information we need into the courgette::PEInfo -// structure. -// -bool PEInfo::ParseHeader() { - if (length_ < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) - return Bad("Too small"); - - // Have 'MZ' magic for a DOS header? - if (start_[0] != 'M' || start_[1] != 'Z') - return Bad("Not MZ"); - - // offset from DOS header to PE header is stored in DOS header. - uint32 offset = ReadU32(start_, kOffsetOfFileAddressOfNewExeHeader); - - const uint8* const pe_header = start_ + offset; - const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; - if (pe_header <= start_ || pe_header >= end_ - kMinPEHeaderSize) - return Bad("Bad offset to PE header"); - - if (offset % 8 != 0) - return Bad("Misaligned PE header"); - - // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. - // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx - // - // The first field of the IMAGE_NT_HEADERS is the signature. - if (!(pe_header[0] == 'P' && - pe_header[1] == 'E' && - pe_header[2] == 0 && - pe_header[3] == 0)) - return Bad("no PE signature"); - - // The second field of the IMAGE_NT_HEADERS is the COFF header. - // The COFF header is also called an IMAGE_FILE_HEADER - // http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx - const uint8* const coff_header = pe_header + 4; - machine_type_ = ReadU16(coff_header, 0); - number_of_sections_ = ReadU16(coff_header, 2); - size_of_optional_header_ = ReadU16(coff_header, 16); - - // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64) - const uint8* const optional_header = coff_header + kSizeOfCoffHeader; - optional_header_ = optional_header; - - if (optional_header + size_of_optional_header_ >= end_) - return Bad("optional header past end of file"); - - // Check we can read the magic. - if (size_of_optional_header_ < 2) - return Bad("optional header no magic"); - - uint16 magic = ReadU16(optional_header, 0); - - if (magic == kImageNtOptionalHdr32Magic) { - is_PE32_plus_ = false; - offset_of_data_directories_ = - kOffsetOfDataDirectoryFromImageOptionalHeader32; - } else if (magic == kImageNtOptionalHdr64Magic) { - is_PE32_plus_ = true; - offset_of_data_directories_ = - kOffsetOfDataDirectoryFromImageOptionalHeader64; - } else { - return Bad("unrecognized magic"); - } - - // Check that we can read the rest of the the fixed fields. Data directories - // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER. - if (size_of_optional_header_ < offset_of_data_directories_) - return Bad("optional header too short"); - - // The optional header is either an IMAGE_OPTIONAL_HEADER32 or - // IMAGE_OPTIONAL_HEADER64 - // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx - // - // Copy the fields we care about. - size_of_code_ = ReadU32(optional_header, 4); - size_of_initialized_data_ = ReadU32(optional_header, 8); - size_of_uninitialized_data_ = ReadU32(optional_header, 12); - base_of_code_ = ReadU32(optional_header, 20); - if (is_PE32_plus_) { - base_of_data_ = 0; - image_base_ = ReadU64(optional_header, 24); - } else { - base_of_data_ = ReadU32(optional_header, 24); - image_base_ = ReadU32(optional_header, 28); - } - size_of_image_ = ReadU32(optional_header, 56); - number_of_data_directories_ = - ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92)); - - if (size_of_code_ >= length_ || - size_of_initialized_data_ >= length_ || - size_of_code_ + size_of_initialized_data_ >= length_) { - // This validation fires on some perfectly fine executables. - // return Bad("code or initialized data too big"); - } - - // TODO(sra): we can probably get rid of most of the data directories. - bool b = true; - // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary - // for correctness and it compiles smaller this way. - b &= ReadDataDirectory(0, &export_table_); - b &= ReadDataDirectory(1, &import_table_); - b &= ReadDataDirectory(2, &resource_table_); - b &= ReadDataDirectory(3, &exception_table_); - b &= ReadDataDirectory(5, &base_relocation_table_); - b &= ReadDataDirectory(11, &bound_import_table_); - b &= ReadDataDirectory(12, &import_address_table_); - b &= ReadDataDirectory(13, &delay_import_descriptor_); - b &= ReadDataDirectory(14, &clr_runtime_header_); - if (!b) { - return Bad("malformed data directory"); - } - - // Sections follow the optional header. - sections_ = - reinterpret_cast<const Section*>(optional_header + - size_of_optional_header_); - file_length_ = 0; - - for (int i = 0; i < number_of_sections_; ++i) { - const Section* section = §ions_[i]; - - // TODO(sra): consider using the 'characteristics' field of the section - // header to see if the section contains instructions. - if (memcmp(section->name, ".text", 6) == 0) - has_text_section_ = true; - - uint32 section_end = - section->file_offset_of_raw_data + section->size_of_raw_data; - if (section_end > file_length_) - file_length_ = section_end; - } - - if (!is_32bit()) { - return Bad("64 bit executables are not yet supported"); - } - - if (!has_text_section()) { - return Bad("Resource-only executables are not yet supported"); - } - - failure_reason_ = NULL; - return true; -} - -bool PEInfo::ReadDataDirectory(int index, ImageDataDirectory* directory) { - if (index < number_of_data_directories_) { - size_t offset = index * 8 + offset_of_data_directories_; - if (offset >= size_of_optional_header_) - return Bad("number of data directories inconsistent"); - const uint8* data_directory = optional_header_ + offset; - if (data_directory < start_ || data_directory + 8 >= end_) - return Bad("data directory outside image"); - RVA rva = ReadU32(data_directory, 0); - size_t size = ReadU32(data_directory, 4); - if (size > size_of_image_) - return Bad("data directory size too big"); - - // TODO(sra): validate RVA. - directory->address_ = rva; - directory->size_ = static_cast<uint32>(size); - return true; - } else { - directory->address_ = 0; - directory->size_ = 0; - return true; - } -} - -bool PEInfo::Bad(const char* reason) { - failure_reason_ = reason; - return false; -} - -//////////////////////////////////////////////////////////////////////////////// - -bool PEInfo::ParseRelocs(std::vector<RVA> *relocs) { - relocs->clear(); - - size_t relocs_size = base_relocation_table_.size_; - if (relocs_size == 0) - return true; - - // The format of the base relocation table is a sequence of variable sized - // IMAGE_BASE_RELOCATION blocks. Search for - // "The format of the base relocation data is somewhat quirky" - // at http://msdn.microsoft.com/en-us/library/ms809762.aspx - - const uint8* start = RVAToPointer(base_relocation_table_.address_); - const uint8* end = start + relocs_size; - - // Make sure entire base relocation table is within the buffer. - if (start < start_ || - start >= end_ || - end <= start_ || - end > end_) { - return Bad(".relocs outside image"); - } - - const uint8* block = start; - - // Walk the variable sized blocks. - while (block + 8 < end) { - RVA page_rva = ReadU32(block, 0); - uint32 size = ReadU32(block, 4); - if (size < 8 || // Size includes header ... - size % 4 != 0) // ... and is word aligned. - return Bad("unreasonable relocs block"); - - const uint8* end_entries = block + size; - - if (end_entries <= block || end_entries <= start_ || end_entries > end_) - return Bad(".relocs block outside image"); - - // Walk through the two-byte entries. - for (const uint8* p = block + 8; p < end_entries; p += 2) { - uint16 entry = ReadU16(p, 0); - int type = entry >> 12; - int offset = entry & 0xFFF; - - RVA rva = page_rva + offset; - if (type == 3) { // IMAGE_REL_BASED_HIGHLOW - relocs->push_back(rva); - } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE - // Ignore, used as padding. - } else { - // Does not occur in Windows x86 executables. - return Bad("unknown type of reloc"); - } - } - - block += size; - } - - std::sort(relocs->begin(), relocs->end()); - - return true; -} - -} // namespace courgette diff --git a/courgette/image_info.h b/courgette/image_info.h deleted file mode 100644 index 17936e1..0000000 --- a/courgette/image_info.h +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef COURGETTE_IMAGE_INFO_H_ -#define COURGETTE_IMAGE_INFO_H_ - -#include <string> -#include <vector> - -#include "base/basictypes.h" - -namespace courgette { - -// A Relative Virtual Address is the address in the image file after it is -// loaded into memory relative to the image load address. -typedef uint32 RVA; - -// PE file section header. This struct has the same layout as the -// IMAGE_SECTION_HEADER structure from WINNT.H -// http://msdn.microsoft.com/en-us/library/ms680341(VS.85).aspx -// -#pragma pack(push, 1) // Supported by MSVC and GCC. Ensures no gaps in packing. -struct Section { - char name[8]; - uint32 virtual_size; - uint32 virtual_address; - uint32 size_of_raw_data; - uint32 file_offset_of_raw_data; - uint32 pointer_to_relocations; // Always zero in an image. - uint32 pointer_to_line_numbers; // Always zero in an image. - uint16 number_of_relocations; // Always zero in an image. - uint16 number_of_line_numbers; // Always zero in an image. - uint32 characteristics; -}; -#pragma pack(pop) - -COMPILE_ASSERT(sizeof(Section) == 40, section_is_40_bytes); - -// Returns the name of a section, solving the problem that the name is not -// always properly NUL-terminated. Used only for debugging. -std::string SectionName(const Section* section); - -// ImageDataDirectory has same layout as IMAGE_DATA_DIRECTORY structure from -// WINNT.H -// http://msdn.microsoft.com/en-us/library/ms680305(VS.85).aspx -// -class ImageDataDirectory { - public: - ImageDataDirectory() : address_(0), size_(0) {} - RVA address_; - uint32 size_; -}; - -COMPILE_ASSERT(sizeof(ImageDataDirectory) == 8, - image_data_directory_is_8_bytes); - -// -// PEInfo holds information about a single Windows 'Portable Executable' format -// file in the on-disk format. -// -// Imagine you had concatenated a bunch of 'original' files into one 'big' -// file and read the big file into memory. You could find the executables -// from the original files by calling PEInfo::Init with different addresses. -// If PEInfo::TryParseHeader returns true, then Init was passed the address -// of the first byte of one of the original executables, and PEIinfo::length -// will tell how long the file was. -// -class PEInfo { - public: - PEInfo(); - - // ok() may always be called but returns 'true' only after ParseHeader - // succeeds. - bool ok() const { return failure_reason_ == NULL; } - - // Initialize with buffer. This just sets up the region of memory that - // potentially contains the bytes from an executable file. The caller - // continues to own 'start'. - void Init(const void* start, size_t length); - - // Returns 'true' if the buffer appears to point to a Windows 32 bit - // executable, 'false' otherwise. If ParseHeader() succeeds, other member - // functions may be called. - bool ParseHeader(); - - // Returns 'true' if the base relocation table can be parsed. - // Output is a vector of the RVAs corresponding to locations within executable - // that are listed in the base relocation table. - bool ParseRelocs(std::vector<RVA> *addresses); - - // Returns the length of the image. Valid only if ParseHeader succeeded. - uint32 length() const { return file_length_; } - - bool has_text_section() const { return has_text_section_; } - - uint32 size_of_code() const { return size_of_code_; } - - bool is_32bit() const { return !is_PE32_plus_; } - - // Most addresses are represented as 32-bit RVAs. The one address we can't - // do this with is the image base address. 'image_base' is valid only for - // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable. - uint32 image_base() const { return static_cast<uint32>(image_base_); } - uint64 image_base_64() const { return image_base_; } - - const ImageDataDirectory& base_relocation_table() const { - return base_relocation_table_; - } - - bool IsValidRVA(RVA rva) const { return rva < size_of_image_; } - - // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. - std::string DescribeRVA(RVA rva) const; - - // Returns a pointer into the memory copy of the file format. - // FileOffsetToPointer(0) returns a pointer to the start of the file format. - const uint8* FileOffsetToPointer(uint32 offset) const { - return start_ + offset; - } - - // Finds the first section at file_offset or above. Does not return sections - // that have no raw bytes in the file. - const Section* FindNextSection(uint32 file_offset) const; - // Returns Section containing the relative virtual address, or NULL if none. - const Section* RVAToSection(RVA rva) const; - - // There are 2 'coordinate systems' for reasoning about executables. - // FileOffset - the the offset within a single .EXE or .DLL *file*. - // RVA - relative virtual address (offset within *loaded image*) - // FileOffsetToRVA and RVAToFileOffset convert between these representations. - - RVA FileOffsetToRVA(uint32 offset) const; - - static const int kNoOffset = -1; - // Returns kNoOffset if there is no file offset corresponding to 'rva'. - int RVAToFileOffset(RVA rva) const; - - // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL - // is returned if there is no file offset corresponding to 'rva'. - const uint8* RVAToPointer(RVA rva) const; - - protected: - // - // Fields that are always valid. - // - const char* failure_reason_; - - // - // Basic information that is always valid after Init. - // - const uint8* start_; // In current memory, base for 'file offsets'. - const uint8* end_; // In current memory. - unsigned int length_; // In current memory. - - // - // Information that is valid after successful ParseHeader. - // - bool is_PE32_plus_; // PE32_plus is for 64 bit executables. - uint32 file_length_; - - // Location and size of IMAGE_OPTIONAL_HEADER in the buffer. - const uint8 *optional_header_; - uint16 size_of_optional_header_; - uint16 offset_of_data_directories_; - - uint16 machine_type_; - uint16 number_of_sections_; - const Section *sections_; - bool has_text_section_; - - uint32 size_of_code_; - uint32 size_of_initialized_data_; - uint32 size_of_uninitialized_data_; - RVA base_of_code_; - RVA base_of_data_; - - uint64 image_base_; // range limited to 32 bits for 32 bit executable - uint32 size_of_image_; - int number_of_data_directories_; - - ImageDataDirectory export_table_; - ImageDataDirectory import_table_; - ImageDataDirectory resource_table_; - ImageDataDirectory exception_table_; - ImageDataDirectory base_relocation_table_; - ImageDataDirectory bound_import_table_; - ImageDataDirectory import_address_table_; - ImageDataDirectory delay_import_descriptor_; - ImageDataDirectory clr_runtime_header_; - - private: - bool ReadDataDirectory(int index, ImageDataDirectory* dir); - bool Bad(const char *reason); - - DISALLOW_COPY_AND_ASSIGN(PEInfo); -}; - -} // namespace -#endif // COURGETTE_IMAGE_INFO_H_ diff --git a/courgette/image_info_unittest.cc b/courgette/image_info_unittest.cc deleted file mode 100644 index e0cac7d..0000000 --- a/courgette/image_info_unittest.cc +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "courgette/base_test_unittest.h" -#include "courgette/image_info.h" - -class ImageInfoTest : public BaseTest { - public: - - void TestExe() const; - void TestResourceDll() const; - - private: - void ExpectExecutable(courgette::PEInfo* info) const; - -}; - -void ImageInfoTest::ExpectExecutable(courgette::PEInfo* info) const { - EXPECT_TRUE(info->ok()); - EXPECT_TRUE(info->has_text_section()); -} - -void ImageInfoTest::TestExe() const { - std::string file1 = FileContents("setup1.exe"); - - scoped_ptr<courgette::PEInfo> info(new courgette::PEInfo()); - info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length()); - - bool can_parse_header = info->ParseHeader(); - EXPECT_TRUE(can_parse_header); - - // The executable is the whole file, not 'embedded' with the file - EXPECT_EQ(file1.length(), info->length()); - - ExpectExecutable(info.get()); - EXPECT_EQ(449536U, info->size_of_code()); - EXPECT_EQ(SectionName(info->RVAToSection(0x00401234 - 0x00400000)), - std::string(".text")); - - EXPECT_EQ(0, info->RVAToFileOffset(0)); - EXPECT_EQ(1024, info->RVAToFileOffset(4096)); - EXPECT_EQ(46928, info->RVAToFileOffset(50000)); - - std::vector<courgette::RVA> relocs; - bool can_parse_relocs = info->ParseRelocs(&relocs); - EXPECT_TRUE(can_parse_relocs); - - const uint8* p = info->RVAToPointer(0); - EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()), - reinterpret_cast<const void*>(p)); - EXPECT_EQ('M', p[0]); - EXPECT_EQ('Z', p[1]); -} - -void ImageInfoTest::TestResourceDll() const { - std::string file1 = FileContents("en-US.dll"); - - scoped_ptr<courgette::PEInfo> info(new courgette::PEInfo()); - info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length()); - - // This is expected to fail, since we don't really support them yet. - bool can_parse_header = info->ParseHeader(); - EXPECT_FALSE(can_parse_header); - - // The executable is the whole file, not 'embedded' with the file - EXPECT_EQ(file1.length(), info->length()); - - EXPECT_FALSE(info->ok()); - EXPECT_FALSE(info->has_text_section()); - EXPECT_EQ(0U, info->size_of_code()); -} - -TEST_F(ImageInfoTest, All) { - TestExe(); - TestResourceDll(); -} diff --git a/courgette/types_win_pe.h b/courgette/types_win_pe.h new file mode 100644 index 0000000..64fd541 --- /dev/null +++ b/courgette/types_win_pe.h @@ -0,0 +1,65 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TYPES_WIN_PE_H_ +#define TYPES_WIN_PE_H_ + +#include "base/basictypes.h" + + +namespace courgette { + +// PE file section header. This struct has the same layout as the +// IMAGE_SECTION_HEADER structure from WINNT.H +// http://msdn.microsoft.com/en-us/library/ms680341(VS.85).aspx +// +#pragma pack(push, 1) // Supported by MSVC and GCC. Ensures no gaps in packing. +struct Section { + char name[8]; + uint32 virtual_size; + uint32 virtual_address; + uint32 size_of_raw_data; + uint32 file_offset_of_raw_data; + uint32 pointer_to_relocations; // Always zero in an image. + uint32 pointer_to_line_numbers; // Always zero in an image. + uint16 number_of_relocations; // Always zero in an image. + uint16 number_of_line_numbers; // Always zero in an image. + uint32 characteristics; +}; +#pragma pack(pop) + +COMPILE_ASSERT(sizeof(Section) == 40, section_is_40_bytes); + +// ImageDataDirectory has same layout as IMAGE_DATA_DIRECTORY structure from +// WINNT.H +// http://msdn.microsoft.com/en-us/library/ms680305(VS.85).aspx +// +class ImageDataDirectory { + public: + ImageDataDirectory() : address_(0), size_(0) {} + RVA address_; + uint32 size_; +}; + +COMPILE_ASSERT(sizeof(ImageDataDirectory) == 8, + image_data_directory_is_8_bytes); + + +//////////////////////////////////////////////////////////////////////////////// + +// Constants and offsets gleaned from WINNT.H and various articles on the +// format of Windows PE executables. + +// This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew): +const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c; + +const uint16 kImageNtOptionalHdr32Magic = 0x10b; +const uint16 kImageNtOptionalHdr64Magic = 0x20b; + +const size_t kSizeOfCoffHeader = 20; +const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96; +const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112; + +} // namespace +#endif // TYPES_WIN_PE_H_ |