diff options
-rw-r--r-- | courgette/courgette.gyp | 4 | ||||
-rw-r--r-- | courgette/courgette.h | 19 | ||||
-rw-r--r-- | courgette/courgette_tool.cc | 27 | ||||
-rw-r--r-- | courgette/disassembler.cc | 408 | ||||
-rw-r--r-- | courgette/disassembler.h | 11 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 377 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.h | 56 | ||||
-rw-r--r-- | courgette/encode_decode_unittest.cc | 3 | ||||
-rw-r--r-- | courgette/encoded_program_fuzz_unittest.cc | 5 | ||||
-rw-r--r-- | courgette/ensemble.cc | 99 | ||||
-rw-r--r-- | courgette/ensemble.h | 24 | ||||
-rw-r--r-- | courgette/ensemble_apply.cc | 16 | ||||
-rw-r--r-- | courgette/ensemble_create.cc | 24 | ||||
-rw-r--r-- | courgette/win32_x86_generator.h | 12 | ||||
-rw-r--r-- | courgette/win32_x86_patcher.h | 6 |
15 files changed, 512 insertions, 579 deletions
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp index 877faf5..e9fa170 100644 --- a/courgette/courgette.gyp +++ b/courgette/courgette.gyp @@ -1,4 +1,4 @@ -# Copyright (c) 2011 The Chromium Authors. All rights reserved. +# Copyright (c) 2009 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -22,8 +22,6 @@ 'difference_estimator.h', 'disassembler.cc', 'disassembler.h', - 'disassembler_win32_x86.cc', - 'disassembler_win32_x86.h', 'encoded_program.cc', 'encoded_program.h', 'ensemble.cc', diff --git a/courgette/courgette.h b/courgette/courgette.h index 70a6436..521053e 100644 --- a/courgette/courgette.h +++ b/courgette/courgette.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -50,13 +50,6 @@ enum Status { C_ADJUSTMENT_FAILED = 27, // }; -// What type of executable is something -// Generally corresponds to CourgettePatchFile::TransformationMethodId -enum ExecutableType { - UNKNOWN, - WIN32_X86 -}; - class SinkStream; class SinkStreamSet; class SourceStream; @@ -91,14 +84,8 @@ Status GenerateEnsemblePatch(SourceStream* old, SourceStream* target, // storing the pointer to the AssemblyProgram in |*output|. // Returns C_OK if successful, otherwise returns an error status and sets // |*output| to NULL. -ExecutableType DetectExecutableType(const void* buffer, size_t length); - -// Attempts to detect the type of executable, and parse it with the -// appropriate tools, storing the pointer to the AssemblyProgram in |*output|. -// Returns C_OK if successful, otherwise returns an error status and sets -// |*output| to NULL. -Status ParseDetectedExecutable(const void* buffer, size_t length, - AssemblyProgram** output); +Status ParseWin32X86PE(const void* buffer, size_t length, + AssemblyProgram** output); // Converts |program| into encoded form, returning it as |*output|. // Returns C_OK if succeeded, otherwise returns an error status and diff --git a/courgette/courgette_tool.cc b/courgette/courgette_tool.cc index 6b2e14e..d36f4f1 100644 --- a/courgette/courgette_tool.cc +++ b/courgette/courgette_tool.cc @@ -85,8 +85,7 @@ void Disassemble(const std::wstring& input_file, courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseDetectedExecutable(buffer.c_str(), buffer.length(), - &program); + courgette::ParseWin32X86PE(buffer.c_str(), buffer.length(), &program); if (parse_status != courgette::C_OK) Problem("Can't parse input."); @@ -123,17 +122,17 @@ void DisassembleAndAdjust(const std::wstring& program_file, courgette::AssemblyProgram* program = NULL; const courgette::Status parse_program_status = - courgette::ParseDetectedExecutable(program_buffer.c_str(), - program_buffer.length(), - &program); + courgette::ParseWin32X86PE(program_buffer.c_str(), + program_buffer.length(), + &program); if (parse_program_status != courgette::C_OK) Problem("Can't parse program input."); courgette::AssemblyProgram* model = NULL; const courgette::Status parse_model_status = - courgette::ParseDetectedExecutable(model_buffer.c_str(), - model_buffer.length(), - &model); + courgette::ParseWin32X86PE(model_buffer.c_str(), + model_buffer.length(), + &model); if (parse_model_status != courgette::C_OK) Problem("Can't parse model input."); @@ -179,17 +178,17 @@ void DisassembleAdjustDiff(const std::wstring& model_file, courgette::AssemblyProgram* model = NULL; const courgette::Status parse_model_status = - courgette::ParseDetectedExecutable(model_buffer.c_str(), - model_buffer.length(), - &model); + courgette::ParseWin32X86PE(model_buffer.c_str(), + model_buffer.length(), + &model); if (parse_model_status != courgette::C_OK) Problem("Can't parse model input."); courgette::AssemblyProgram* program = NULL; const courgette::Status parse_program_status = - courgette::ParseDetectedExecutable(program_buffer.c_str(), - program_buffer.length(), - &program); + courgette::ParseWin32X86PE(program_buffer.c_str(), + program_buffer.length(), + &program); if (parse_program_status != courgette::C_OK) Problem("Can't parse program input."); diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index f4ae86d..e3dd71a 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -13,7 +13,6 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/disassembler_win32_x86.h" #include "courgette/encoded_program.h" #include "courgette/image_info.h" @@ -23,25 +22,402 @@ namespace courgette { -//////////////////////////////////////////////////////////////////////////////// +class DisassemblerWin32X86 : public Disassembler { + public: + explicit DisassemblerWin32X86(PEInfo* pe_info) + : pe_info_(pe_info), + incomplete_disassembly_(false) { + } -ExecutableType DetectExecutableType(const void* buffer, size_t length) { + virtual bool Disassemble(AssemblyProgram* target); - bool parsed = false; + virtual void Destroy() { delete this; } - PEInfo* pe_info = new PEInfo(); - pe_info->Init(buffer, length); - parsed = pe_info->ParseHeader(); - delete pe_info; + protected: + PEInfo& pe_info() { return *pe_info_; } + + CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; + bool ParseAbs32Relocs(); + void ParseRel32RelocsFromSections(); + void ParseRel32RelocsFromSection(const Section* section); + + CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, + uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; + CheckBool ParseFileRegion(const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) WARN_UNUSED_RESULT; + +#if COURGETTE_HISTOGRAM_TARGETS + void HistogramTargets(const char* kind, const std::map<RVA, int>& map); +#endif + + PEInfo* pe_info_; + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits + + std::vector<RVA> abs32_locations_; + std::vector<RVA> rel32_locations_; + +#if COURGETTE_HISTOGRAM_TARGETS + std::map<RVA, int> abs32_target_rvas_; + std::map<RVA, int> rel32_target_rvas_; +#endif +}; + +bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { + if (!pe_info().ok()) + return false; + + target->set_image_base(pe_info().image_base()); + + if (!ParseAbs32Relocs()) + return false; + + ParseRel32RelocsFromSections(); + + if (!ParseFile(target)) + return false; + + target->DefaultAssignIndexes(); + + return true; +} + +static uint32 Read32LittleEndian(const void* address) { + return *reinterpret_cast<const uint32*>(address); +} - if (parsed) - return WIN32_X86; +bool DisassemblerWin32X86::ParseAbs32Relocs() { + abs32_locations_.clear(); + if (!pe_info().ParseRelocs(&abs32_locations_)) + return false; - return UNKNOWN; + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + for (size_t i = 0; i < abs32_locations_.size(); ++i) { + RVA rva = abs32_locations_[i]; + // The 4 bytes at the relocation are a reference to some address. + uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); + ++abs32_target_rvas_[target_address - pe_info().image_base()]; + } +#endif + return true; } -Status ParseDetectedExecutable(const void* buffer, size_t length, - AssemblyProgram** output) { +void DisassemblerWin32X86::ParseRel32RelocsFromSections() { + uint32 file_offset = 0; + while (file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) + break; + if (file_offset < section->file_offset_of_raw_data) + file_offset = section->file_offset_of_raw_data; + ParseRel32RelocsFromSection(section); + file_offset += section->size_of_raw_data; + } + std::sort(rel32_locations_.begin(), rel32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + VLOG(1) << "abs32_locations_ " << abs32_locations_.size() + << "\nrel32_locations_ " << rel32_locations_.size() + << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() + << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); + + int common = 0; + std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); + std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); + while (abs32_iter != abs32_target_rvas_.end() && + rel32_iter != rel32_target_rvas_.end()) { + if (abs32_iter->first < rel32_iter->first) + ++abs32_iter; + else if (rel32_iter->first < abs32_iter->first) + ++rel32_iter; + else { + ++common; + ++abs32_iter; + ++rel32_iter; + } + } + VLOG(1) << "common " << common; +#endif +} + +void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { + // TODO(sra): use characteristic. + bool isCode = strcmp(section->name, ".text") == 0; + if (!isCode) + return; + + uint32 start_file_offset = section->file_offset_of_raw_data; + uint32 end_file_offset = start_file_offset + section->size_of_raw_data; + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + // Find the rel32 relocations. + const uint8* p = start_pointer; + while (p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + if (current_rva == relocs_start_rva) { + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + // ++abs32_pos; + + // Heuristic discovery of rel32 locations in instruction stream: are the + // next few bytes the start of an instruction containing a rel32 + // addressing mode? + const uint8* rel32 = NULL; + + if (p + 5 < end_pointer) { + if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 + rel32 = p + 1; + } + } + if (p + 6 < end_pointer) { + if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form + if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely + rel32 = p + 2; + } + } + if (rel32) { + RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); + + // Is there an abs32 reloc overlapping the candidate? + while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) + ++abs32_pos; + // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte + // region that could overlap rel32_rva. + if (abs32_pos != abs32_locations_.end()) { + if (*abs32_pos < rel32_rva + 4) { + // Beginning of abs32 reloc is before end of rel32 reloc so they + // overlap. Skip four bytes past the abs32 reloc. + p += (*abs32_pos + 4) - current_rva; + continue; + } + } + + RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); + // To be valid, rel32 target must be within image, and within this + // section. + if (pe_info().IsValidRVA(target_rva) && + start_rva <= target_rva && target_rva < end_rva) { + rel32_locations_.push_back(rel32_rva); +#if COURGETTE_HISTOGRAM_TARGETS + ++rel32_target_rvas_[target_rva]; +#endif + p += 4; + continue; + } + } + p += 1; + } +} + +CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { + bool ok = true; + // Walk all the bytes in the file, whether or not in a section. + uint32 file_offset = 0; + while (ok && file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) { + // No more sections. There should not be extra stuff following last + // section. + // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); + break; + } + if (file_offset < section->file_offset_of_raw_data) { + uint32 section_start_offset = section->file_offset_of_raw_data; + ok = ParseNonSectionFileRegion(file_offset, section_start_offset, + program); + file_offset = section_start_offset; + } + if (ok) { + uint32 end = file_offset + section->size_of_raw_data; + ok = ParseFileRegion(section, file_offset, end, program); + file_offset = end; + } + } + +#if COURGETTE_HISTOGRAM_TARGETS + HistogramTargets("abs32 relocs", abs32_target_rvas_); + HistogramTargets("rel32 relocs", rel32_target_rvas_); +#endif + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( + uint32 start_file_offset, + uint32 end_file_offset, + AssemblyProgram* program) { + if (incomplete_disassembly_) + return true; + + const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); + + const uint8* p = start; + + bool ok = true; + while (p < end && ok) { + ok = program->EmitByteInstruction(*p); + ++p; + } + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseFileRegion( + const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) { + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + bool ok = program->EmitOriginInstruction(start_rva); + + const uint8* p = start_pointer; + + while (ok && p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + + // The base relocation table is usually in the .relocs section, but it could + // actually be anywhere. Make sure we skip it because we will regenerate it + // during assembly. + if (current_rva == relocs_start_rva) { + ok = program->EmitMakeRelocsInstruction(); + if (!ok) + break; + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + ++abs32_pos; + + if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { + uint32 target_address = Read32LittleEndian(p); + RVA target_rva = target_address - pe_info().image_base(); + // TODO(sra): target could be Label+offset. It is not clear how to guess + // which it might be. We assume offset==0. + ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); + if (!ok) + break; + p += 4; + continue; + } + + while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) + ++rel32_pos; + + if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { + RVA target_rva = current_rva + 4 + Read32LittleEndian(p); + ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); + p += 4; + continue; + } + + if (incomplete_disassembly_) { + if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && + (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && + (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { + // No more relocs in this section, don't bother encoding bytes. + break; + } + } + + ok = program->EmitByteInstruction(*p); + p += 1; + } + + return ok; +} + +#if COURGETTE_HISTOGRAM_TARGETS +// Histogram is printed to std::cout. It is purely for debugging the algorithm +// and is only enabled manually in 'exploration' builds. I don't want to add +// command-line configuration for this feature because this code has to be +// small, which means compiled-out. +void DisassemblerWin32X86::HistogramTargets(const char* kind, + const std::map<RVA, int>& map) { + int total = 0; + std::map<int, std::vector<RVA> > h; + for (std::map<RVA, int>::const_iterator p = map.begin(); + p != map.end(); + ++p) { + h[p->second].push_back(p->first); + total += p->second; + } + + std::cout << total << " " << kind << " to " + << map.size() << " unique targets" << std::endl; + + std::cout << "indegree: #targets-with-indegree (example)" << std::endl; + const int kFirstN = 15; + bool someSkipped = false; + int index = 0; + for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); + p != h.rend(); + ++p) { + ++index; + if (index <= kFirstN || p->first <= 3) { + if (someSkipped) { + std::cout << "..." << std::endl; + } + size_t count = p->second.size(); + std::cout << std::dec << p->first << ": " << count; + if (count <= 2) { + for (size_t i = 0; i < count; ++i) + std::cout << " " << pe_info().DescribeRVA(p->second[i]); + } + std::cout << std::endl; + someSkipped = false; + } else { + someSkipped = true; + } + } +} +#endif // COURGETTE_HISTOGRAM_TARGETS + +Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) { + return new DisassemblerWin32X86(pe_info); +} + +//////////////////////////////////////////////////////////////////////////////// + +Status ParseWin32X86PE(const void* buffer, size_t length, + AssemblyProgram** output) { *output = NULL; PEInfo* pe_info = new PEInfo(); @@ -52,17 +428,17 @@ Status ParseDetectedExecutable(const void* buffer, size_t length, return C_INPUT_NOT_RECOGNIZED; } - Disassembler* disassembler = new DisassemblerWin32X86(pe_info); + Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); AssemblyProgram* program = new AssemblyProgram(); if (!disassembler->Disassemble(program)) { delete program; - delete disassembler; + disassembler->Destroy(); delete pe_info; return C_DISASSEMBLY_FAILED; } - delete disassembler; + disassembler->Destroy(); delete pe_info; *output = program; return C_OK; diff --git a/courgette/disassembler.h b/courgette/disassembler.h index bef1a90..fa7c908 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,14 +14,21 @@ class PEInfo; class Disassembler { public: - virtual ~Disassembler() {} + // Factory methods for making disassemblers for various kinds of executables. + // We have only one so far. + + static Disassembler* MakeDisassemberWin32X86(PEInfo* pe_info); // Disassembles the item passed to the factory method into the output // parameter 'program'. virtual bool Disassemble(AssemblyProgram* program) = 0; + // Deletes 'this' disassembler. + virtual void Destroy() = 0; + protected: Disassembler() {} + virtual ~Disassembler() {} private: DISALLOW_COPY_AND_ASSIGN(Disassembler); diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc deleted file mode 100644 index fb12c22..0000000 --- a/courgette/disassembler_win32_x86.cc +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "courgette/disassembler_win32_x86.h" - -#include <algorithm> -#include <string> -#include <vector> - -#include "base/basictypes.h" -#include "base/logging.h" - -#include "courgette/assembly_program.h" -#include "courgette/courgette.h" -#include "courgette/encoded_program.h" -#include "courgette/image_info.h" - -// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently -// different target addresses are referenced. Purely for debugging. -#define COURGETTE_HISTOGRAM_TARGETS 0 - -namespace courgette { - -DisassemblerWin32X86::DisassemblerWin32X86(PEInfo* pe_info) - : pe_info_(pe_info), - incomplete_disassembly_(false) { -} - -bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { - if (!pe_info().ok()) - return false; - - target->set_image_base(pe_info().image_base()); - - if (!ParseAbs32Relocs()) - return false; - - ParseRel32RelocsFromSections(); - - if (!ParseFile(target)) - return false; - - target->DefaultAssignIndexes(); - - return true; -} - -static uint32 Read32LittleEndian(const void* address) { - return *reinterpret_cast<const uint32*>(address); -} - -bool DisassemblerWin32X86::ParseAbs32Relocs() { - abs32_locations_.clear(); - if (!pe_info().ParseRelocs(&abs32_locations_)) - return false; - - std::sort(abs32_locations_.begin(), abs32_locations_.end()); - -#if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { - RVA rva = abs32_locations_[i]; - // The 4 bytes at the relocation are a reference to some address. - uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); - ++abs32_target_rvas_[target_address - pe_info().image_base()]; - } -#endif - return true; -} - -void DisassemblerWin32X86::ParseRel32RelocsFromSections() { - uint32 file_offset = 0; - while (file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); - if (section == NULL) - break; - if (file_offset < section->file_offset_of_raw_data) - file_offset = section->file_offset_of_raw_data; - ParseRel32RelocsFromSection(section); - file_offset += section->size_of_raw_data; - } - std::sort(rel32_locations_.begin(), rel32_locations_.end()); - -#if COURGETTE_HISTOGRAM_TARGETS - VLOG(1) << "abs32_locations_ " << abs32_locations_.size() - << "\nrel32_locations_ " << rel32_locations_.size() - << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() - << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); - - int common = 0; - std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); - std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); - while (abs32_iter != abs32_target_rvas_.end() && - rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) - ++abs32_iter; - else if (rel32_iter->first < abs32_iter->first) - ++rel32_iter; - else { - ++common; - ++abs32_iter; - ++rel32_iter; - } - } - VLOG(1) << "common " << common; -#endif -} - -void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { - // TODO(sra): use characteristic. - bool isCode = strcmp(section->name, ".text") == 0; - if (!isCode) - return; - - uint32 start_file_offset = section->file_offset_of_raw_data; - uint32 end_file_offset = start_file_offset + section->size_of_raw_data; - RVA relocs_start_rva = pe_info().base_relocation_table().address_; - - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); - - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); - RVA end_rva = start_rva + section->virtual_size; - - // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. - const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; - - std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); - - // Find the rel32 relocations. - const uint8* p = start_pointer; - while (p < end_pointer) { - RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); - if (current_rva == relocs_start_rva) { - uint32 relocs_size = pe_info().base_relocation_table().size_; - if (relocs_size) { - p += relocs_size; - continue; - } - } - - //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) - // ++abs32_pos; - - // Heuristic discovery of rel32 locations in instruction stream: are the - // next few bytes the start of an instruction containing a rel32 - // addressing mode? - const uint8* rel32 = NULL; - - if (p + 5 < end_pointer) { - if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 - rel32 = p + 1; - } - } - if (p + 6 < end_pointer) { - if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form - if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely - rel32 = p + 2; - } - } - if (rel32) { - RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); - - // Is there an abs32 reloc overlapping the candidate? - while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) - ++abs32_pos; - // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte - // region that could overlap rel32_rva. - if (abs32_pos != abs32_locations_.end()) { - if (*abs32_pos < rel32_rva + 4) { - // Beginning of abs32 reloc is before end of rel32 reloc so they - // overlap. Skip four bytes past the abs32 reloc. - p += (*abs32_pos + 4) - current_rva; - continue; - } - } - - RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); - // To be valid, rel32 target must be within image, and within this - // section. - if (pe_info().IsValidRVA(target_rva) && - start_rva <= target_rva && target_rva < end_rva) { - rel32_locations_.push_back(rel32_rva); -#if COURGETTE_HISTOGRAM_TARGETS - ++rel32_target_rvas_[target_rva]; -#endif - p += 4; - continue; - } - } - p += 1; - } -} - -CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { - bool ok = true; - // Walk all the bytes in the file, whether or not in a section. - uint32 file_offset = 0; - while (ok && file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); - if (section == NULL) { - // No more sections. There should not be extra stuff following last - // section. - // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); - break; - } - if (file_offset < section->file_offset_of_raw_data) { - uint32 section_start_offset = section->file_offset_of_raw_data; - ok = ParseNonSectionFileRegion(file_offset, section_start_offset, - program); - file_offset = section_start_offset; - } - if (ok) { - uint32 end = file_offset + section->size_of_raw_data; - ok = ParseFileRegion(section, file_offset, end, program); - file_offset = end; - } - } - -#if COURGETTE_HISTOGRAM_TARGETS - HistogramTargets("abs32 relocs", abs32_target_rvas_); - HistogramTargets("rel32 relocs", rel32_target_rvas_); -#endif - - return ok; -} - -CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( - uint32 start_file_offset, - uint32 end_file_offset, - AssemblyProgram* program) { - if (incomplete_disassembly_) - return true; - - const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); - - const uint8* p = start; - - bool ok = true; - while (p < end && ok) { - ok = program->EmitByteInstruction(*p); - ++p; - } - - return ok; -} - -CheckBool DisassemblerWin32X86::ParseFileRegion( - const Section* section, - uint32 start_file_offset, uint32 end_file_offset, - AssemblyProgram* program) { - RVA relocs_start_rva = pe_info().base_relocation_table().address_; - - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); - - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); - RVA end_rva = start_rva + section->virtual_size; - - // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. - const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; - - std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); - std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); - - bool ok = program->EmitOriginInstruction(start_rva); - - const uint8* p = start_pointer; - - while (ok && p < end_pointer) { - RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); - - // The base relocation table is usually in the .relocs section, but it could - // actually be anywhere. Make sure we skip it because we will regenerate it - // during assembly. - if (current_rva == relocs_start_rva) { - ok = program->EmitMakeRelocsInstruction(); - if (!ok) - break; - uint32 relocs_size = pe_info().base_relocation_table().size_; - if (relocs_size) { - p += relocs_size; - continue; - } - } - - while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) - ++abs32_pos; - - if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { - uint32 target_address = Read32LittleEndian(p); - RVA target_rva = target_address - pe_info().image_base(); - // TODO(sra): target could be Label+offset. It is not clear how to guess - // which it might be. We assume offset==0. - ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); - if (!ok) - break; - p += 4; - continue; - } - - while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) - ++rel32_pos; - - if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { - RVA target_rva = current_rva + 4 + Read32LittleEndian(p); - ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); - p += 4; - continue; - } - - if (incomplete_disassembly_) { - if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && - (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && - (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { - // No more relocs in this section, don't bother encoding bytes. - break; - } - } - - ok = program->EmitByteInstruction(*p); - p += 1; - } - - return ok; -} - -#if COURGETTE_HISTOGRAM_TARGETS -// Histogram is printed to std::cout. It is purely for debugging the algorithm -// and is only enabled manually in 'exploration' builds. I don't want to add -// command-line configuration for this feature because this code has to be -// small, which means compiled-out. -void DisassemblerWin32X86::HistogramTargets(const char* kind, - const std::map<RVA, int>& map) { - int total = 0; - std::map<int, std::vector<RVA> > h; - for (std::map<RVA, int>::const_iterator p = map.begin(); - p != map.end(); - ++p) { - h[p->second].push_back(p->first); - total += p->second; - } - - std::cout << total << " " << kind << " to " - << map.size() << " unique targets" << std::endl; - - std::cout << "indegree: #targets-with-indegree (example)" << std::endl; - const int kFirstN = 15; - bool someSkipped = false; - int index = 0; - for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); - p != h.rend(); - ++p) { - ++index; - if (index <= kFirstN || p->first <= 3) { - if (someSkipped) { - std::cout << "..." << std::endl; - } - size_t count = p->second.size(); - std::cout << std::dec << p->first << ": " << count; - if (count <= 2) { - for (size_t i = 0; i < count; ++i) - std::cout << " " << pe_info().DescribeRVA(p->second[i]); - } - std::cout << std::endl; - someSkipped = false; - } else { - someSkipped = true; - } - } -} -#endif // COURGETTE_HISTOGRAM_TARGETS - -} // namespace courgette diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h deleted file mode 100644 index fe00b6d..0000000 --- a/courgette/disassembler_win32_x86.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef COURGETTE_DISASSEMBLER_WIN32_X86_H_ -#define COURGETTE_DISASSEMBLER_WIN32_X86_H_ - -#include "base/basictypes.h" -#include "courgette/disassembler.h" -#include "courgette/image_info.h" -#include "courgette/memory_allocator.h" - -namespace courgette { - -class AssemblyProgram; - -class DisassemblerWin32X86 : public Disassembler { - public: - explicit DisassemblerWin32X86(PEInfo* pe_info); - - virtual bool Disassemble(AssemblyProgram* target); - - protected: - PEInfo& pe_info() { return *pe_info_; } - - CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; - bool ParseAbs32Relocs(); - void ParseRel32RelocsFromSections(); - void ParseRel32RelocsFromSection(const Section* section); - - CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, - uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; - CheckBool ParseFileRegion(const Section* section, - uint32 start_file_offset, uint32 end_file_offset, - AssemblyProgram* program) WARN_UNUSED_RESULT; - -#if COURGETTE_HISTOGRAM_TARGETS - void HistogramTargets(const char* kind, const std::map<RVA, int>& map); -#endif - - PEInfo* pe_info_; - bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits - - std::vector<RVA> abs32_locations_; - std::vector<RVA> rel32_locations_; - -#if COURGETTE_HISTOGRAM_TARGETS - std::map<RVA, int> abs32_target_rvas_; - std::map<RVA, int> rel32_target_rvas_; -#endif - - DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86); -}; - -} // namespace courgette -#endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_ diff --git a/courgette/encode_decode_unittest.cc b/courgette/encode_decode_unittest.cc index 21e2e65..c14dc9f 100644 --- a/courgette/encode_decode_unittest.cc +++ b/courgette/encode_decode_unittest.cc @@ -53,8 +53,7 @@ void EncodeDecodeTest::TestExe(const char* file_name) const { courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseDetectedExecutable(original_buffer, original_length, - &program); + courgette::ParseWin32X86PE(original_buffer, original_length, &program); EXPECT_EQ(courgette::C_OK, parse_status); courgette::EncodedProgram* encoded = NULL; diff --git a/courgette/encoded_program_fuzz_unittest.cc b/courgette/encoded_program_fuzz_unittest.cc index 1625fd0..a869bc2 100644 --- a/courgette/encoded_program_fuzz_unittest.cc +++ b/courgette/encoded_program_fuzz_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -70,8 +70,7 @@ void DecodeFuzzTest::FuzzExe(const char* file_name) const { courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseDetectedExecutable(original_buffer, original_length, - &program); + courgette::ParseWin32X86PE(original_buffer, original_length, &program); EXPECT_EQ(courgette::C_OK, parse_status); courgette::EncodedProgram* encoded = NULL; diff --git a/courgette/ensemble.cc b/courgette/ensemble.cc index a2bea8f..69e07a7 100644 --- a/courgette/ensemble.cc +++ b/courgette/ensemble.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,15 +14,8 @@ namespace courgette { -Element::Element(ExecutableType kind, - Ensemble* ensemble, - const Region& region, - PEInfo* info) - : kind_(kind), ensemble_(ensemble), region_(region), info_(info) { -} - -Element::~Element() { - delete info_; +Element::Element(Kind kind, Ensemble* ensemble, const Region& region) + : kind_(kind), ensemble_(ensemble), region_(region) { } std::string Element::Name() const { @@ -32,51 +25,71 @@ std::string Element::Name() const { + base::Uint64ToString(region().length()) + ")"; } +// A subclass of Element that has a PEInfo. +class ElementWinPE : public Element { + public: + ElementWinPE(Kind kind, Ensemble* ensemble, const Region& region, + PEInfo* info) + : Element(kind, ensemble, region), + pe_info_(info) { + } + + virtual PEInfo* GetPEInfo() const { return pe_info_; } + + protected: + ~ElementWinPE() { delete pe_info_; } + + private: + PEInfo* pe_info_; // Owned by |this|. +}; + // Scans the Ensemble's region, sniffing out Elements. We assume that the // elements do not overlap. Status Ensemble::FindEmbeddedElements() { - size_t length = region_.length(); const uint8* start = region_.start(); size_t position = 0; while (position < length) { - ExecutableType type = DetectExecutableType(start + position, - length - position); - - // - // TODO(dgarrett) This switch can go away totally after two things. - // - // Make ImageInfo generic for all executable types. - // Find a generic way to handle length detection for executables. - // - // When this switch is gone, that's one less piece of code that is - // executable type aware. - // - switch (type) { - case UNKNOWN: { - // No Element found at current position. - ++position; - break; - } - case WIN32_X86: { - // The Info is only created to detect the length of the executable - courgette::PEInfo* info(new courgette::PEInfo()); - info->Init(start + position, length - position); - if (!info->ParseHeader()) { - delete info; - position++; - break; - } + // Quick test; Windows executables begin with 'MZ'. + if (start[position] == 'M' && + position + 1 < length && start[position + 1] == 'Z') { + courgette::PEInfo *info = new courgette::PEInfo(); + info->Init(start + position, length - position); + if (info->ParseHeader()) { Region region(start + position, info->length()); - Element* element = new Element(type, this, region, info); - owned_elements_.push_back(element); - elements_.push_back(element); - position += region.length(); - break; + if (info->has_text_section()) { + if (info->is_32bit()) { + Element* element = new ElementWinPE(Element::WIN32_X86_WITH_CODE, + this, region, info); + owned_elements_.push_back(element); + elements_.push_back(element); + position += region.length(); + continue; + } + // TODO(sra): Extend to 64-bit executables. + } + + // If we had a clever transformation for resource-only executables we + // should identify the suitable elements here: + if (!info->has_text_section() && false) { + Element* element = new ElementWinPE(Element::WIN32_NOCODE, + this, region, info); + owned_elements_.push_back(element); + elements_.push_back(element); + position += region.length(); + continue; + } } + delete info; } + + // This is where to add new formats, e.g. Linux executables, Dalvik + // executables etc. + + // No Element found at current position. + ++position; } return C_OK; } diff --git a/courgette/ensemble.h b/courgette/ensemble.h index e766782..f907f9d 100644 --- a/courgette/ensemble.h +++ b/courgette/ensemble.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -36,14 +36,11 @@ class PEInfo; // class Element { public: - Element(ExecutableType kind, - Ensemble* ensemble, - const Region& region, - PEInfo*info); + enum Kind { WIN32_X86_WITH_CODE, WIN32_NOCODE }; - virtual ~Element(); + virtual ~Element() {} - ExecutableType kind() const { return kind_; } + Kind kind() const { return kind_; } const Region& region() const { return region_; } // The name is used only for debugging and logging. @@ -53,14 +50,16 @@ class Element { // containing Ensemble. size_t offset_in_ensemble() const; - // The ImageInfo for this executable - virtual PEInfo* GetImageInfo() const { return info_; } + // Some subclasses of Element might have a PEInfo. + virtual PEInfo* GetPEInfo() const { return NULL; } + + protected: + Element(Kind kind, Ensemble* ensemble, const Region& region); private: - ExecutableType kind_; + Kind kind_; Ensemble* ensemble_; Region region_; - PEInfo *info_; DISALLOW_COPY_AND_ASSIGN(Element); }; @@ -140,8 +139,7 @@ struct CourgettePatchFile { static const uint32 kVersion = 20110216; - // Transformation method IDs. These are embedded in generated files, so - // never remove or change an existing id. + // Transformation method IDs. enum TransformationMethodId { T_COURGETTE_WIN32_X86 = 1, // Windows 32 bit 'Portable Executable' x86. }; diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc index 499ccac..6efbc40 100644 --- a/courgette/ensemble_apply.cc +++ b/courgette/ensemble_apply.cc @@ -136,19 +136,13 @@ Status EnsemblePatchApplication::ReadInitialParameters( if (!transformation_parameters->ReadVarint32(&kind)) return C_BAD_ENSEMBLE_HEADER; - TransformationPatcher* patcher = NULL; - - switch (kind) - { - case CourgettePatchFile::T_COURGETTE_WIN32_X86: - patcher = new CourgetteWin32X86Patcher(base_region_); - break; - } - - if (patcher) + if (kind == CourgettePatchFile::T_COURGETTE_WIN32_X86) { + TransformationPatcher* patcher = + new CourgetteWin32X86Patcher(base_region_); patchers_.push_back(patcher); - else + } else { return C_BAD_ENSEMBLE_HEADER; + } } for (size_t i = 0; i < patchers_.size(); ++i) { diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc index 62105b9..ec33689 100644 --- a/courgette/ensemble_create.cc +++ b/courgette/ensemble_create.cc @@ -65,21 +65,17 @@ Status TransformationPatchGenerator::Reform( // Element kind. TransformationPatchGenerator* MakeGenerator(Element* old_element, Element* new_element) { - switch (new_element->kind()) { - case UNKNOWN: - break; - case WIN32_X86: { - TransformationPatchGenerator* generator = - new CourgetteWin32X86PatchGenerator( - old_element, - new_element, - new CourgetteWin32X86Patcher(old_element->region())); - return generator; - } + if (new_element->kind() == Element::WIN32_X86_WITH_CODE) { + CourgetteWin32X86PatchGenerator* generator = + new CourgetteWin32X86PatchGenerator( + old_element, + new_element, + new CourgetteWin32X86Patcher(old_element->region())); + return generator; + } else { + LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); + return NULL; } - - LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); - return NULL; } // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element diff --git a/courgette/win32_x86_generator.h b/courgette/win32_x86_generator.h index e77819f..496a2ce 100644 --- a/courgette/win32_x86_generator.h +++ b/courgette/win32_x86_generator.h @@ -61,9 +61,9 @@ class CourgetteWin32X86PatchGenerator : public TransformationPatchGenerator { // TODO(sra): refactor to use same code from patcher_. AssemblyProgram* old_program = NULL; Status old_parse_status = - ParseDetectedExecutable(old_element_->region().start(), - old_element_->region().length(), - &old_program); + ParseWin32X86PE(old_element_->region().start(), + old_element_->region().length(), + &old_program); if (old_parse_status != C_OK) { LOG(ERROR) << "Cannot parse as Win32X86PE " << old_element_->Name(); return old_parse_status; @@ -71,9 +71,9 @@ class CourgetteWin32X86PatchGenerator : public TransformationPatchGenerator { AssemblyProgram* new_program = NULL; Status new_parse_status = - ParseDetectedExecutable(new_element_->region().start(), - new_element_->region().length(), - &new_program); + ParseWin32X86PE(new_element_->region().start(), + new_element_->region().length(), + &new_program); if (new_parse_status != C_OK) { DeleteAssemblyProgram(old_program); LOG(ERROR) << "Cannot parse as Win32X86PE " << new_element_->Name(); diff --git a/courgette/win32_x86_patcher.h b/courgette/win32_x86_patcher.h index f1aad9d..6b85021 100644 --- a/courgette/win32_x86_patcher.h +++ b/courgette/win32_x86_patcher.h @@ -46,9 +46,9 @@ class CourgetteWin32X86Patcher : public TransformationPatcher { return C_GENERAL_ERROR; // Don't expect any corrected parameters. AssemblyProgram* program = NULL; - status = ParseDetectedExecutable(ensemble_region_.start() + base_offset_, - base_length_, - &program); + status = ParseWin32X86PE(ensemble_region_.start() + base_offset_, + base_length_, + &program); if (status != C_OK) return status; |