diff options
-rw-r--r-- | courgette/courgette.gyp | 4 | ||||
-rw-r--r-- | courgette/courgette.h | 19 | ||||
-rw-r--r-- | courgette/courgette_tool.cc | 27 | ||||
-rw-r--r-- | courgette/disassembler.cc | 408 | ||||
-rw-r--r-- | courgette/disassembler.h | 11 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 377 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.h | 56 | ||||
-rw-r--r-- | courgette/encode_decode_unittest.cc | 3 | ||||
-rw-r--r-- | courgette/encoded_program_fuzz_unittest.cc | 5 | ||||
-rw-r--r-- | courgette/ensemble.cc | 99 | ||||
-rw-r--r-- | courgette/ensemble.h | 24 | ||||
-rw-r--r-- | courgette/ensemble_apply.cc | 16 | ||||
-rw-r--r-- | courgette/ensemble_create.cc | 24 | ||||
-rw-r--r-- | courgette/win32_x86_generator.h | 12 | ||||
-rw-r--r-- | courgette/win32_x86_patcher.h | 6 |
15 files changed, 579 insertions, 512 deletions
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp index e9fa170..877faf5 100644 --- a/courgette/courgette.gyp +++ b/courgette/courgette.gyp @@ -1,4 +1,4 @@ -# Copyright (c) 2009 The Chromium Authors. All rights reserved. +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -22,6 +22,8 @@ 'difference_estimator.h', 'disassembler.cc', 'disassembler.h', + 'disassembler_win32_x86.cc', + 'disassembler_win32_x86.h', 'encoded_program.cc', 'encoded_program.h', 'ensemble.cc', diff --git a/courgette/courgette.h b/courgette/courgette.h index 521053e..70a6436 100644 --- a/courgette/courgette.h +++ b/courgette/courgette.h @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -50,6 +50,13 @@ enum Status { C_ADJUSTMENT_FAILED = 27, // }; +// What type of executable is something +// Generally corresponds to CourgettePatchFile::TransformationMethodId +enum ExecutableType { + UNKNOWN, + WIN32_X86 +}; + class SinkStream; class SinkStreamSet; class SourceStream; @@ -84,8 +91,14 @@ Status GenerateEnsemblePatch(SourceStream* old, SourceStream* target, // storing the pointer to the AssemblyProgram in |*output|. // Returns C_OK if successful, otherwise returns an error status and sets // |*output| to NULL. -Status ParseWin32X86PE(const void* buffer, size_t length, - AssemblyProgram** output); +ExecutableType DetectExecutableType(const void* buffer, size_t length); + +// Attempts to detect the type of executable, and parse it with the +// appropriate tools, storing the pointer to the AssemblyProgram in |*output|. +// Returns C_OK if successful, otherwise returns an error status and sets +// |*output| to NULL. +Status ParseDetectedExecutable(const void* buffer, size_t length, + AssemblyProgram** output); // Converts |program| into encoded form, returning it as |*output|. // Returns C_OK if succeeded, otherwise returns an error status and diff --git a/courgette/courgette_tool.cc b/courgette/courgette_tool.cc index d36f4f1..6b2e14e 100644 --- a/courgette/courgette_tool.cc +++ b/courgette/courgette_tool.cc @@ -85,7 +85,8 @@ void Disassemble(const std::wstring& input_file, courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseWin32X86PE(buffer.c_str(), buffer.length(), &program); + courgette::ParseDetectedExecutable(buffer.c_str(), buffer.length(), + &program); if (parse_status != courgette::C_OK) Problem("Can't parse input."); @@ -122,17 +123,17 @@ void DisassembleAndAdjust(const std::wstring& program_file, courgette::AssemblyProgram* program = NULL; const courgette::Status parse_program_status = - courgette::ParseWin32X86PE(program_buffer.c_str(), - program_buffer.length(), - &program); + courgette::ParseDetectedExecutable(program_buffer.c_str(), + program_buffer.length(), + &program); if (parse_program_status != courgette::C_OK) Problem("Can't parse program input."); courgette::AssemblyProgram* model = NULL; const courgette::Status parse_model_status = - courgette::ParseWin32X86PE(model_buffer.c_str(), - model_buffer.length(), - &model); + courgette::ParseDetectedExecutable(model_buffer.c_str(), + model_buffer.length(), + &model); if (parse_model_status != courgette::C_OK) Problem("Can't parse model input."); @@ -178,17 +179,17 @@ void DisassembleAdjustDiff(const std::wstring& model_file, courgette::AssemblyProgram* model = NULL; const courgette::Status parse_model_status = - courgette::ParseWin32X86PE(model_buffer.c_str(), - model_buffer.length(), - &model); + courgette::ParseDetectedExecutable(model_buffer.c_str(), + model_buffer.length(), + &model); if (parse_model_status != courgette::C_OK) Problem("Can't parse model input."); courgette::AssemblyProgram* program = NULL; const courgette::Status parse_program_status = - courgette::ParseWin32X86PE(program_buffer.c_str(), - program_buffer.length(), - &program); + courgette::ParseDetectedExecutable(program_buffer.c_str(), + program_buffer.length(), + &program); if (parse_program_status != courgette::C_OK) Problem("Can't parse program input."); diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index e3dd71a..f4ae86d 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -13,6 +13,7 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" +#include "courgette/disassembler_win32_x86.h" #include "courgette/encoded_program.h" #include "courgette/image_info.h" @@ -22,402 +23,25 @@ namespace courgette { -class DisassemblerWin32X86 : public Disassembler { - public: - explicit DisassemblerWin32X86(PEInfo* pe_info) - : pe_info_(pe_info), - incomplete_disassembly_(false) { - } - - virtual bool Disassemble(AssemblyProgram* target); - - virtual void Destroy() { delete this; } - - protected: - PEInfo& pe_info() { return *pe_info_; } - - CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; - bool ParseAbs32Relocs(); - void ParseRel32RelocsFromSections(); - void ParseRel32RelocsFromSection(const Section* section); - - CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, - uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; - CheckBool ParseFileRegion(const Section* section, - uint32 start_file_offset, uint32 end_file_offset, - AssemblyProgram* program) WARN_UNUSED_RESULT; - -#if COURGETTE_HISTOGRAM_TARGETS - void HistogramTargets(const char* kind, const std::map<RVA, int>& map); -#endif - - PEInfo* pe_info_; - bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits - - std::vector<RVA> abs32_locations_; - std::vector<RVA> rel32_locations_; - -#if COURGETTE_HISTOGRAM_TARGETS - std::map<RVA, int> abs32_target_rvas_; - std::map<RVA, int> rel32_target_rvas_; -#endif -}; - -bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { - if (!pe_info().ok()) - return false; - - target->set_image_base(pe_info().image_base()); - - if (!ParseAbs32Relocs()) - return false; - - ParseRel32RelocsFromSections(); - - if (!ParseFile(target)) - return false; - - target->DefaultAssignIndexes(); - - return true; -} - -static uint32 Read32LittleEndian(const void* address) { - return *reinterpret_cast<const uint32*>(address); -} - -bool DisassemblerWin32X86::ParseAbs32Relocs() { - abs32_locations_.clear(); - if (!pe_info().ParseRelocs(&abs32_locations_)) - return false; - - std::sort(abs32_locations_.begin(), abs32_locations_.end()); - -#if COURGETTE_HISTOGRAM_TARGETS - for (size_t i = 0; i < abs32_locations_.size(); ++i) { - RVA rva = abs32_locations_[i]; - // The 4 bytes at the relocation are a reference to some address. - uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); - ++abs32_target_rvas_[target_address - pe_info().image_base()]; - } -#endif - return true; -} - -void DisassemblerWin32X86::ParseRel32RelocsFromSections() { - uint32 file_offset = 0; - while (file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); - if (section == NULL) - break; - if (file_offset < section->file_offset_of_raw_data) - file_offset = section->file_offset_of_raw_data; - ParseRel32RelocsFromSection(section); - file_offset += section->size_of_raw_data; - } - std::sort(rel32_locations_.begin(), rel32_locations_.end()); - -#if COURGETTE_HISTOGRAM_TARGETS - VLOG(1) << "abs32_locations_ " << abs32_locations_.size() - << "\nrel32_locations_ " << rel32_locations_.size() - << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() - << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); - - int common = 0; - std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); - std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); - while (abs32_iter != abs32_target_rvas_.end() && - rel32_iter != rel32_target_rvas_.end()) { - if (abs32_iter->first < rel32_iter->first) - ++abs32_iter; - else if (rel32_iter->first < abs32_iter->first) - ++rel32_iter; - else { - ++common; - ++abs32_iter; - ++rel32_iter; - } - } - VLOG(1) << "common " << common; -#endif -} - -void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { - // TODO(sra): use characteristic. - bool isCode = strcmp(section->name, ".text") == 0; - if (!isCode) - return; - - uint32 start_file_offset = section->file_offset_of_raw_data; - uint32 end_file_offset = start_file_offset + section->size_of_raw_data; - RVA relocs_start_rva = pe_info().base_relocation_table().address_; - - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); - - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); - RVA end_rva = start_rva + section->virtual_size; - - // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. - const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; - - std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); - - // Find the rel32 relocations. - const uint8* p = start_pointer; - while (p < end_pointer) { - RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); - if (current_rva == relocs_start_rva) { - uint32 relocs_size = pe_info().base_relocation_table().size_; - if (relocs_size) { - p += relocs_size; - continue; - } - } - - //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) - // ++abs32_pos; - - // Heuristic discovery of rel32 locations in instruction stream: are the - // next few bytes the start of an instruction containing a rel32 - // addressing mode? - const uint8* rel32 = NULL; - - if (p + 5 < end_pointer) { - if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 - rel32 = p + 1; - } - } - if (p + 6 < end_pointer) { - if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form - if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely - rel32 = p + 2; - } - } - if (rel32) { - RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); - - // Is there an abs32 reloc overlapping the candidate? - while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) - ++abs32_pos; - // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte - // region that could overlap rel32_rva. - if (abs32_pos != abs32_locations_.end()) { - if (*abs32_pos < rel32_rva + 4) { - // Beginning of abs32 reloc is before end of rel32 reloc so they - // overlap. Skip four bytes past the abs32 reloc. - p += (*abs32_pos + 4) - current_rva; - continue; - } - } - - RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); - // To be valid, rel32 target must be within image, and within this - // section. - if (pe_info().IsValidRVA(target_rva) && - start_rva <= target_rva && target_rva < end_rva) { - rel32_locations_.push_back(rel32_rva); -#if COURGETTE_HISTOGRAM_TARGETS - ++rel32_target_rvas_[target_rva]; -#endif - p += 4; - continue; - } - } - p += 1; - } -} - -CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { - bool ok = true; - // Walk all the bytes in the file, whether or not in a section. - uint32 file_offset = 0; - while (ok && file_offset < pe_info().length()) { - const Section* section = pe_info().FindNextSection(file_offset); - if (section == NULL) { - // No more sections. There should not be extra stuff following last - // section. - // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); - break; - } - if (file_offset < section->file_offset_of_raw_data) { - uint32 section_start_offset = section->file_offset_of_raw_data; - ok = ParseNonSectionFileRegion(file_offset, section_start_offset, - program); - file_offset = section_start_offset; - } - if (ok) { - uint32 end = file_offset + section->size_of_raw_data; - ok = ParseFileRegion(section, file_offset, end, program); - file_offset = end; - } - } - -#if COURGETTE_HISTOGRAM_TARGETS - HistogramTargets("abs32 relocs", abs32_target_rvas_); - HistogramTargets("rel32 relocs", rel32_target_rvas_); -#endif - - return ok; -} - -CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( - uint32 start_file_offset, - uint32 end_file_offset, - AssemblyProgram* program) { - if (incomplete_disassembly_) - return true; - - const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); - - const uint8* p = start; - - bool ok = true; - while (p < end && ok) { - ok = program->EmitByteInstruction(*p); - ++p; - } - - return ok; -} - -CheckBool DisassemblerWin32X86::ParseFileRegion( - const Section* section, - uint32 start_file_offset, uint32 end_file_offset, - AssemblyProgram* program) { - RVA relocs_start_rva = pe_info().base_relocation_table().address_; - - const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); - const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); - - RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); - RVA end_rva = start_rva + section->virtual_size; - - // Quick way to convert from Pointer to RVA within a single Section is to - // subtract 'pointer_to_rva'. - const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; - - std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); - std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); - - bool ok = program->EmitOriginInstruction(start_rva); - - const uint8* p = start_pointer; - - while (ok && p < end_pointer) { - RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); - - // The base relocation table is usually in the .relocs section, but it could - // actually be anywhere. Make sure we skip it because we will regenerate it - // during assembly. - if (current_rva == relocs_start_rva) { - ok = program->EmitMakeRelocsInstruction(); - if (!ok) - break; - uint32 relocs_size = pe_info().base_relocation_table().size_; - if (relocs_size) { - p += relocs_size; - continue; - } - } - - while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) - ++abs32_pos; - - if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { - uint32 target_address = Read32LittleEndian(p); - RVA target_rva = target_address - pe_info().image_base(); - // TODO(sra): target could be Label+offset. It is not clear how to guess - // which it might be. We assume offset==0. - ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); - if (!ok) - break; - p += 4; - continue; - } - - while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) - ++rel32_pos; - - if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { - RVA target_rva = current_rva + 4 + Read32LittleEndian(p); - ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); - p += 4; - continue; - } - - if (incomplete_disassembly_) { - if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && - (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && - (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { - // No more relocs in this section, don't bother encoding bytes. - break; - } - } - - ok = program->EmitByteInstruction(*p); - p += 1; - } +//////////////////////////////////////////////////////////////////////////////// - return ok; -} +ExecutableType DetectExecutableType(const void* buffer, size_t length) { -#if COURGETTE_HISTOGRAM_TARGETS -// Histogram is printed to std::cout. It is purely for debugging the algorithm -// and is only enabled manually in 'exploration' builds. I don't want to add -// command-line configuration for this feature because this code has to be -// small, which means compiled-out. -void DisassemblerWin32X86::HistogramTargets(const char* kind, - const std::map<RVA, int>& map) { - int total = 0; - std::map<int, std::vector<RVA> > h; - for (std::map<RVA, int>::const_iterator p = map.begin(); - p != map.end(); - ++p) { - h[p->second].push_back(p->first); - total += p->second; - } + bool parsed = false; - std::cout << total << " " << kind << " to " - << map.size() << " unique targets" << std::endl; + PEInfo* pe_info = new PEInfo(); + pe_info->Init(buffer, length); + parsed = pe_info->ParseHeader(); + delete pe_info; - std::cout << "indegree: #targets-with-indegree (example)" << std::endl; - const int kFirstN = 15; - bool someSkipped = false; - int index = 0; - for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); - p != h.rend(); - ++p) { - ++index; - if (index <= kFirstN || p->first <= 3) { - if (someSkipped) { - std::cout << "..." << std::endl; - } - size_t count = p->second.size(); - std::cout << std::dec << p->first << ": " << count; - if (count <= 2) { - for (size_t i = 0; i < count; ++i) - std::cout << " " << pe_info().DescribeRVA(p->second[i]); - } - std::cout << std::endl; - someSkipped = false; - } else { - someSkipped = true; - } - } -} -#endif // COURGETTE_HISTOGRAM_TARGETS + if (parsed) + return WIN32_X86; -Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) { - return new DisassemblerWin32X86(pe_info); + return UNKNOWN; } -//////////////////////////////////////////////////////////////////////////////// - -Status ParseWin32X86PE(const void* buffer, size_t length, - AssemblyProgram** output) { +Status ParseDetectedExecutable(const void* buffer, size_t length, + AssemblyProgram** output) { *output = NULL; PEInfo* pe_info = new PEInfo(); @@ -428,17 +52,17 @@ Status ParseWin32X86PE(const void* buffer, size_t length, return C_INPUT_NOT_RECOGNIZED; } - Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); + Disassembler* disassembler = new DisassemblerWin32X86(pe_info); AssemblyProgram* program = new AssemblyProgram(); if (!disassembler->Disassemble(program)) { delete program; - disassembler->Destroy(); + delete disassembler; delete pe_info; return C_DISASSEMBLY_FAILED; } - disassembler->Destroy(); + delete disassembler; delete pe_info; *output = program; return C_OK; diff --git a/courgette/disassembler.h b/courgette/disassembler.h index fa7c908..bef1a90 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,21 +14,14 @@ class PEInfo; class Disassembler { public: - // Factory methods for making disassemblers for various kinds of executables. - // We have only one so far. - - static Disassembler* MakeDisassemberWin32X86(PEInfo* pe_info); + virtual ~Disassembler() {} // Disassembles the item passed to the factory method into the output // parameter 'program'. virtual bool Disassemble(AssemblyProgram* program) = 0; - // Deletes 'this' disassembler. - virtual void Destroy() = 0; - protected: Disassembler() {} - virtual ~Disassembler() {} private: DISALLOW_COPY_AND_ASSIGN(Disassembler); diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc new file mode 100644 index 0000000..fb12c22 --- /dev/null +++ b/courgette/disassembler_win32_x86.cc @@ -0,0 +1,377 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/disassembler_win32_x86.h" + +#include <algorithm> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/logging.h" + +#include "courgette/assembly_program.h" +#include "courgette/courgette.h" +#include "courgette/encoded_program.h" +#include "courgette/image_info.h" + +// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently +// different target addresses are referenced. Purely for debugging. +#define COURGETTE_HISTOGRAM_TARGETS 0 + +namespace courgette { + +DisassemblerWin32X86::DisassemblerWin32X86(PEInfo* pe_info) + : pe_info_(pe_info), + incomplete_disassembly_(false) { +} + +bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { + if (!pe_info().ok()) + return false; + + target->set_image_base(pe_info().image_base()); + + if (!ParseAbs32Relocs()) + return false; + + ParseRel32RelocsFromSections(); + + if (!ParseFile(target)) + return false; + + target->DefaultAssignIndexes(); + + return true; +} + +static uint32 Read32LittleEndian(const void* address) { + return *reinterpret_cast<const uint32*>(address); +} + +bool DisassemblerWin32X86::ParseAbs32Relocs() { + abs32_locations_.clear(); + if (!pe_info().ParseRelocs(&abs32_locations_)) + return false; + + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + for (size_t i = 0; i < abs32_locations_.size(); ++i) { + RVA rva = abs32_locations_[i]; + // The 4 bytes at the relocation are a reference to some address. + uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); + ++abs32_target_rvas_[target_address - pe_info().image_base()]; + } +#endif + return true; +} + +void DisassemblerWin32X86::ParseRel32RelocsFromSections() { + uint32 file_offset = 0; + while (file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) + break; + if (file_offset < section->file_offset_of_raw_data) + file_offset = section->file_offset_of_raw_data; + ParseRel32RelocsFromSection(section); + file_offset += section->size_of_raw_data; + } + std::sort(rel32_locations_.begin(), rel32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + VLOG(1) << "abs32_locations_ " << abs32_locations_.size() + << "\nrel32_locations_ " << rel32_locations_.size() + << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() + << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); + + int common = 0; + std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); + std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); + while (abs32_iter != abs32_target_rvas_.end() && + rel32_iter != rel32_target_rvas_.end()) { + if (abs32_iter->first < rel32_iter->first) + ++abs32_iter; + else if (rel32_iter->first < abs32_iter->first) + ++rel32_iter; + else { + ++common; + ++abs32_iter; + ++rel32_iter; + } + } + VLOG(1) << "common " << common; +#endif +} + +void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { + // TODO(sra): use characteristic. + bool isCode = strcmp(section->name, ".text") == 0; + if (!isCode) + return; + + uint32 start_file_offset = section->file_offset_of_raw_data; + uint32 end_file_offset = start_file_offset + section->size_of_raw_data; + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + // Find the rel32 relocations. + const uint8* p = start_pointer; + while (p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + if (current_rva == relocs_start_rva) { + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + // ++abs32_pos; + + // Heuristic discovery of rel32 locations in instruction stream: are the + // next few bytes the start of an instruction containing a rel32 + // addressing mode? + const uint8* rel32 = NULL; + + if (p + 5 < end_pointer) { + if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 + rel32 = p + 1; + } + } + if (p + 6 < end_pointer) { + if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form + if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely + rel32 = p + 2; + } + } + if (rel32) { + RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); + + // Is there an abs32 reloc overlapping the candidate? + while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) + ++abs32_pos; + // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte + // region that could overlap rel32_rva. + if (abs32_pos != abs32_locations_.end()) { + if (*abs32_pos < rel32_rva + 4) { + // Beginning of abs32 reloc is before end of rel32 reloc so they + // overlap. Skip four bytes past the abs32 reloc. + p += (*abs32_pos + 4) - current_rva; + continue; + } + } + + RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); + // To be valid, rel32 target must be within image, and within this + // section. + if (pe_info().IsValidRVA(target_rva) && + start_rva <= target_rva && target_rva < end_rva) { + rel32_locations_.push_back(rel32_rva); +#if COURGETTE_HISTOGRAM_TARGETS + ++rel32_target_rvas_[target_rva]; +#endif + p += 4; + continue; + } + } + p += 1; + } +} + +CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { + bool ok = true; + // Walk all the bytes in the file, whether or not in a section. + uint32 file_offset = 0; + while (ok && file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) { + // No more sections. There should not be extra stuff following last + // section. + // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); + break; + } + if (file_offset < section->file_offset_of_raw_data) { + uint32 section_start_offset = section->file_offset_of_raw_data; + ok = ParseNonSectionFileRegion(file_offset, section_start_offset, + program); + file_offset = section_start_offset; + } + if (ok) { + uint32 end = file_offset + section->size_of_raw_data; + ok = ParseFileRegion(section, file_offset, end, program); + file_offset = end; + } + } + +#if COURGETTE_HISTOGRAM_TARGETS + HistogramTargets("abs32 relocs", abs32_target_rvas_); + HistogramTargets("rel32 relocs", rel32_target_rvas_); +#endif + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( + uint32 start_file_offset, + uint32 end_file_offset, + AssemblyProgram* program) { + if (incomplete_disassembly_) + return true; + + const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); + + const uint8* p = start; + + bool ok = true; + while (p < end && ok) { + ok = program->EmitByteInstruction(*p); + ++p; + } + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseFileRegion( + const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) { + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + bool ok = program->EmitOriginInstruction(start_rva); + + const uint8* p = start_pointer; + + while (ok && p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + + // The base relocation table is usually in the .relocs section, but it could + // actually be anywhere. Make sure we skip it because we will regenerate it + // during assembly. + if (current_rva == relocs_start_rva) { + ok = program->EmitMakeRelocsInstruction(); + if (!ok) + break; + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + ++abs32_pos; + + if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { + uint32 target_address = Read32LittleEndian(p); + RVA target_rva = target_address - pe_info().image_base(); + // TODO(sra): target could be Label+offset. It is not clear how to guess + // which it might be. We assume offset==0. + ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); + if (!ok) + break; + p += 4; + continue; + } + + while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) + ++rel32_pos; + + if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { + RVA target_rva = current_rva + 4 + Read32LittleEndian(p); + ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); + p += 4; + continue; + } + + if (incomplete_disassembly_) { + if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && + (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && + (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { + // No more relocs in this section, don't bother encoding bytes. + break; + } + } + + ok = program->EmitByteInstruction(*p); + p += 1; + } + + return ok; +} + +#if COURGETTE_HISTOGRAM_TARGETS +// Histogram is printed to std::cout. It is purely for debugging the algorithm +// and is only enabled manually in 'exploration' builds. I don't want to add +// command-line configuration for this feature because this code has to be +// small, which means compiled-out. +void DisassemblerWin32X86::HistogramTargets(const char* kind, + const std::map<RVA, int>& map) { + int total = 0; + std::map<int, std::vector<RVA> > h; + for (std::map<RVA, int>::const_iterator p = map.begin(); + p != map.end(); + ++p) { + h[p->second].push_back(p->first); + total += p->second; + } + + std::cout << total << " " << kind << " to " + << map.size() << " unique targets" << std::endl; + + std::cout << "indegree: #targets-with-indegree (example)" << std::endl; + const int kFirstN = 15; + bool someSkipped = false; + int index = 0; + for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); + p != h.rend(); + ++p) { + ++index; + if (index <= kFirstN || p->first <= 3) { + if (someSkipped) { + std::cout << "..." << std::endl; + } + size_t count = p->second.size(); + std::cout << std::dec << p->first << ": " << count; + if (count <= 2) { + for (size_t i = 0; i < count; ++i) + std::cout << " " << pe_info().DescribeRVA(p->second[i]); + } + std::cout << std::endl; + someSkipped = false; + } else { + someSkipped = true; + } + } +} +#endif // COURGETTE_HISTOGRAM_TARGETS + +} // namespace courgette diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h new file mode 100644 index 0000000..fe00b6d --- /dev/null +++ b/courgette/disassembler_win32_x86.h @@ -0,0 +1,56 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COURGETTE_DISASSEMBLER_WIN32_X86_H_ +#define COURGETTE_DISASSEMBLER_WIN32_X86_H_ + +#include "base/basictypes.h" +#include "courgette/disassembler.h" +#include "courgette/image_info.h" +#include "courgette/memory_allocator.h" + +namespace courgette { + +class AssemblyProgram; + +class DisassemblerWin32X86 : public Disassembler { + public: + explicit DisassemblerWin32X86(PEInfo* pe_info); + + virtual bool Disassemble(AssemblyProgram* target); + + protected: + PEInfo& pe_info() { return *pe_info_; } + + CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; + bool ParseAbs32Relocs(); + void ParseRel32RelocsFromSections(); + void ParseRel32RelocsFromSection(const Section* section); + + CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, + uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; + CheckBool ParseFileRegion(const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) WARN_UNUSED_RESULT; + +#if COURGETTE_HISTOGRAM_TARGETS + void HistogramTargets(const char* kind, const std::map<RVA, int>& map); +#endif + + PEInfo* pe_info_; + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits + + std::vector<RVA> abs32_locations_; + std::vector<RVA> rel32_locations_; + +#if COURGETTE_HISTOGRAM_TARGETS + std::map<RVA, int> abs32_target_rvas_; + std::map<RVA, int> rel32_target_rvas_; +#endif + + DISALLOW_COPY_AND_ASSIGN(DisassemblerWin32X86); +}; + +} // namespace courgette +#endif // COURGETTE_DISASSEMBLER_WIN32_X86_H_ diff --git a/courgette/encode_decode_unittest.cc b/courgette/encode_decode_unittest.cc index c14dc9f..21e2e65 100644 --- a/courgette/encode_decode_unittest.cc +++ b/courgette/encode_decode_unittest.cc @@ -53,7 +53,8 @@ void EncodeDecodeTest::TestExe(const char* file_name) const { courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseWin32X86PE(original_buffer, original_length, &program); + courgette::ParseDetectedExecutable(original_buffer, original_length, + &program); EXPECT_EQ(courgette::C_OK, parse_status); courgette::EncodedProgram* encoded = NULL; diff --git a/courgette/encoded_program_fuzz_unittest.cc b/courgette/encoded_program_fuzz_unittest.cc index a869bc2..1625fd0 100644 --- a/courgette/encoded_program_fuzz_unittest.cc +++ b/courgette/encoded_program_fuzz_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -70,7 +70,8 @@ void DecodeFuzzTest::FuzzExe(const char* file_name) const { courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = - courgette::ParseWin32X86PE(original_buffer, original_length, &program); + courgette::ParseDetectedExecutable(original_buffer, original_length, + &program); EXPECT_EQ(courgette::C_OK, parse_status); courgette::EncodedProgram* encoded = NULL; diff --git a/courgette/ensemble.cc b/courgette/ensemble.cc index 69e07a7..a2bea8f 100644 --- a/courgette/ensemble.cc +++ b/courgette/ensemble.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,8 +14,15 @@ namespace courgette { -Element::Element(Kind kind, Ensemble* ensemble, const Region& region) - : kind_(kind), ensemble_(ensemble), region_(region) { +Element::Element(ExecutableType kind, + Ensemble* ensemble, + const Region& region, + PEInfo* info) + : kind_(kind), ensemble_(ensemble), region_(region), info_(info) { +} + +Element::~Element() { + delete info_; } std::string Element::Name() const { @@ -25,71 +32,51 @@ std::string Element::Name() const { + base::Uint64ToString(region().length()) + ")"; } -// A subclass of Element that has a PEInfo. -class ElementWinPE : public Element { - public: - ElementWinPE(Kind kind, Ensemble* ensemble, const Region& region, - PEInfo* info) - : Element(kind, ensemble, region), - pe_info_(info) { - } - - virtual PEInfo* GetPEInfo() const { return pe_info_; } - - protected: - ~ElementWinPE() { delete pe_info_; } - - private: - PEInfo* pe_info_; // Owned by |this|. -}; - // Scans the Ensemble's region, sniffing out Elements. We assume that the // elements do not overlap. Status Ensemble::FindEmbeddedElements() { + size_t length = region_.length(); const uint8* start = region_.start(); size_t position = 0; while (position < length) { - // Quick test; Windows executables begin with 'MZ'. - if (start[position] == 'M' && - position + 1 < length && start[position + 1] == 'Z') { - courgette::PEInfo *info = new courgette::PEInfo(); - info->Init(start + position, length - position); - if (info->ParseHeader()) { - Region region(start + position, info->length()); - - if (info->has_text_section()) { - if (info->is_32bit()) { - Element* element = new ElementWinPE(Element::WIN32_X86_WITH_CODE, - this, region, info); - owned_elements_.push_back(element); - elements_.push_back(element); - position += region.length(); - continue; - } - // TODO(sra): Extend to 64-bit executables. + ExecutableType type = DetectExecutableType(start + position, + length - position); + + // + // TODO(dgarrett) This switch can go away totally after two things. + // + // Make ImageInfo generic for all executable types. + // Find a generic way to handle length detection for executables. + // + // When this switch is gone, that's one less piece of code that is + // executable type aware. + // + switch (type) { + case UNKNOWN: { + // No Element found at current position. + ++position; + break; + } + case WIN32_X86: { + // The Info is only created to detect the length of the executable + courgette::PEInfo* info(new courgette::PEInfo()); + info->Init(start + position, length - position); + if (!info->ParseHeader()) { + delete info; + position++; + break; } + Region region(start + position, info->length()); - // If we had a clever transformation for resource-only executables we - // should identify the suitable elements here: - if (!info->has_text_section() && false) { - Element* element = new ElementWinPE(Element::WIN32_NOCODE, - this, region, info); - owned_elements_.push_back(element); - elements_.push_back(element); - position += region.length(); - continue; - } + Element* element = new Element(type, this, region, info); + owned_elements_.push_back(element); + elements_.push_back(element); + position += region.length(); + break; } - delete info; } - - // This is where to add new formats, e.g. Linux executables, Dalvik - // executables etc. - - // No Element found at current position. - ++position; } return C_OK; } diff --git a/courgette/ensemble.h b/courgette/ensemble.h index f907f9d..e766782 100644 --- a/courgette/ensemble.h +++ b/courgette/ensemble.h @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -36,11 +36,14 @@ class PEInfo; // class Element { public: - enum Kind { WIN32_X86_WITH_CODE, WIN32_NOCODE }; + Element(ExecutableType kind, + Ensemble* ensemble, + const Region& region, + PEInfo*info); - virtual ~Element() {} + virtual ~Element(); - Kind kind() const { return kind_; } + ExecutableType kind() const { return kind_; } const Region& region() const { return region_; } // The name is used only for debugging and logging. @@ -50,16 +53,14 @@ class Element { // containing Ensemble. size_t offset_in_ensemble() const; - // Some subclasses of Element might have a PEInfo. - virtual PEInfo* GetPEInfo() const { return NULL; } - - protected: - Element(Kind kind, Ensemble* ensemble, const Region& region); + // The ImageInfo for this executable + virtual PEInfo* GetImageInfo() const { return info_; } private: - Kind kind_; + ExecutableType kind_; Ensemble* ensemble_; Region region_; + PEInfo *info_; DISALLOW_COPY_AND_ASSIGN(Element); }; @@ -139,7 +140,8 @@ struct CourgettePatchFile { static const uint32 kVersion = 20110216; - // Transformation method IDs. + // Transformation method IDs. These are embedded in generated files, so + // never remove or change an existing id. enum TransformationMethodId { T_COURGETTE_WIN32_X86 = 1, // Windows 32 bit 'Portable Executable' x86. }; diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc index 6efbc40..499ccac 100644 --- a/courgette/ensemble_apply.cc +++ b/courgette/ensemble_apply.cc @@ -136,13 +136,19 @@ Status EnsemblePatchApplication::ReadInitialParameters( if (!transformation_parameters->ReadVarint32(&kind)) return C_BAD_ENSEMBLE_HEADER; - if (kind == CourgettePatchFile::T_COURGETTE_WIN32_X86) { - TransformationPatcher* patcher = - new CourgetteWin32X86Patcher(base_region_); + TransformationPatcher* patcher = NULL; + + switch (kind) + { + case CourgettePatchFile::T_COURGETTE_WIN32_X86: + patcher = new CourgetteWin32X86Patcher(base_region_); + break; + } + + if (patcher) patchers_.push_back(patcher); - } else { + else return C_BAD_ENSEMBLE_HEADER; - } } for (size_t i = 0; i < patchers_.size(); ++i) { diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc index ec33689..62105b9 100644 --- a/courgette/ensemble_create.cc +++ b/courgette/ensemble_create.cc @@ -65,17 +65,21 @@ Status TransformationPatchGenerator::Reform( // Element kind. TransformationPatchGenerator* MakeGenerator(Element* old_element, Element* new_element) { - if (new_element->kind() == Element::WIN32_X86_WITH_CODE) { - CourgetteWin32X86PatchGenerator* generator = - new CourgetteWin32X86PatchGenerator( - old_element, - new_element, - new CourgetteWin32X86Patcher(old_element->region())); - return generator; - } else { - LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); - return NULL; + switch (new_element->kind()) { + case UNKNOWN: + break; + case WIN32_X86: { + TransformationPatchGenerator* generator = + new CourgetteWin32X86PatchGenerator( + old_element, + new_element, + new CourgetteWin32X86Patcher(old_element->region())); + return generator; + } } + + LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind(); + return NULL; } // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element diff --git a/courgette/win32_x86_generator.h b/courgette/win32_x86_generator.h index 496a2ce..e77819f 100644 --- a/courgette/win32_x86_generator.h +++ b/courgette/win32_x86_generator.h @@ -61,9 +61,9 @@ class CourgetteWin32X86PatchGenerator : public TransformationPatchGenerator { // TODO(sra): refactor to use same code from patcher_. AssemblyProgram* old_program = NULL; Status old_parse_status = - ParseWin32X86PE(old_element_->region().start(), - old_element_->region().length(), - &old_program); + ParseDetectedExecutable(old_element_->region().start(), + old_element_->region().length(), + &old_program); if (old_parse_status != C_OK) { LOG(ERROR) << "Cannot parse as Win32X86PE " << old_element_->Name(); return old_parse_status; @@ -71,9 +71,9 @@ class CourgetteWin32X86PatchGenerator : public TransformationPatchGenerator { AssemblyProgram* new_program = NULL; Status new_parse_status = - ParseWin32X86PE(new_element_->region().start(), - new_element_->region().length(), - &new_program); + ParseDetectedExecutable(new_element_->region().start(), + new_element_->region().length(), + &new_program); if (new_parse_status != C_OK) { DeleteAssemblyProgram(old_program); LOG(ERROR) << "Cannot parse as Win32X86PE " << new_element_->Name(); diff --git a/courgette/win32_x86_patcher.h b/courgette/win32_x86_patcher.h index 6b85021..f1aad9d 100644 --- a/courgette/win32_x86_patcher.h +++ b/courgette/win32_x86_patcher.h @@ -46,9 +46,9 @@ class CourgetteWin32X86Patcher : public TransformationPatcher { return C_GENERAL_ERROR; // Don't expect any corrected parameters. AssemblyProgram* program = NULL; - status = ParseWin32X86PE(ensemble_region_.start() + base_offset_, - base_length_, - &program); + status = ParseDetectedExecutable(ensemble_region_.start() + base_offset_, + base_length_, + &program); if (status != C_OK) return status; |