diff options
author | laforge@chromium.org <laforge@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-11 18:35:37 +0000 |
---|---|---|
committer | laforge@chromium.org <laforge@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-11 18:35:37 +0000 |
commit | d244fdff5fbbda333ea904b7833b81dddc14ba3b (patch) | |
tree | 761ff3d72f842d729fb135aef08b7d686488c107 /courgette/disassembler.cc | |
parent | 3bbf1ca56cbc84e9dbe08c20ad62521655e12cd1 (diff) | |
download | chromium_src-d244fdff5fbbda333ea904b7833b81dddc14ba3b.zip chromium_src-d244fdff5fbbda333ea904b7833b81dddc14ba3b.tar.gz chromium_src-d244fdff5fbbda333ea904b7833b81dddc14ba3b.tar.bz2 |
Revert 103879 - Start refactoring to reduce executable type knowledge.
This creates executable detection functions, a globally shared enum for
describing an executable type, and reduces the number of classes and
locations with executable specific knowledge.
These changes, along with moving architecture specific classes into their
own files should make it easier to produce special purpose clients that
only contain the code required to apply their own form of patch.
DisassemblerWin32EXE, ImagePE, CourgetteWin32X86PatchGenerator, and
CourgetteWin32X86Patcher, and ensemble handling are all heavily affected here.
This should have no effect on the behavior of the system yet, and is instead
all prep-work.
BUG=None
TEST=Unittests
Review URL: http://codereview.chromium.org/7920004
TBR=dgarrett@chromium.org
Review URL: http://codereview.chromium.org/8234012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@104926 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette/disassembler.cc')
-rw-r--r-- | courgette/disassembler.cc | 408 |
1 files changed, 392 insertions, 16 deletions
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc index f4ae86d..e3dd71a 100644 --- a/courgette/disassembler.cc +++ b/courgette/disassembler.cc @@ -13,7 +13,6 @@ #include "courgette/assembly_program.h" #include "courgette/courgette.h" -#include "courgette/disassembler_win32_x86.h" #include "courgette/encoded_program.h" #include "courgette/image_info.h" @@ -23,25 +22,402 @@ namespace courgette { -//////////////////////////////////////////////////////////////////////////////// +class DisassemblerWin32X86 : public Disassembler { + public: + explicit DisassemblerWin32X86(PEInfo* pe_info) + : pe_info_(pe_info), + incomplete_disassembly_(false) { + } -ExecutableType DetectExecutableType(const void* buffer, size_t length) { + virtual bool Disassemble(AssemblyProgram* target); - bool parsed = false; + virtual void Destroy() { delete this; } - PEInfo* pe_info = new PEInfo(); - pe_info->Init(buffer, length); - parsed = pe_info->ParseHeader(); - delete pe_info; + protected: + PEInfo& pe_info() { return *pe_info_; } + + CheckBool ParseFile(AssemblyProgram* target) WARN_UNUSED_RESULT; + bool ParseAbs32Relocs(); + void ParseRel32RelocsFromSections(); + void ParseRel32RelocsFromSection(const Section* section); + + CheckBool ParseNonSectionFileRegion(uint32 start_file_offset, + uint32 end_file_offset, AssemblyProgram* program) WARN_UNUSED_RESULT; + CheckBool ParseFileRegion(const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) WARN_UNUSED_RESULT; + +#if COURGETTE_HISTOGRAM_TARGETS + void HistogramTargets(const char* kind, const std::map<RVA, int>& map); +#endif + + PEInfo* pe_info_; + bool incomplete_disassembly_; // 'true' if can leave out 'uninteresting' bits + + std::vector<RVA> abs32_locations_; + std::vector<RVA> rel32_locations_; + +#if COURGETTE_HISTOGRAM_TARGETS + std::map<RVA, int> abs32_target_rvas_; + std::map<RVA, int> rel32_target_rvas_; +#endif +}; + +bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) { + if (!pe_info().ok()) + return false; + + target->set_image_base(pe_info().image_base()); + + if (!ParseAbs32Relocs()) + return false; + + ParseRel32RelocsFromSections(); + + if (!ParseFile(target)) + return false; + + target->DefaultAssignIndexes(); + + return true; +} + +static uint32 Read32LittleEndian(const void* address) { + return *reinterpret_cast<const uint32*>(address); +} - if (parsed) - return WIN32_X86; +bool DisassemblerWin32X86::ParseAbs32Relocs() { + abs32_locations_.clear(); + if (!pe_info().ParseRelocs(&abs32_locations_)) + return false; - return UNKNOWN; + std::sort(abs32_locations_.begin(), abs32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + for (size_t i = 0; i < abs32_locations_.size(); ++i) { + RVA rva = abs32_locations_[i]; + // The 4 bytes at the relocation are a reference to some address. + uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva)); + ++abs32_target_rvas_[target_address - pe_info().image_base()]; + } +#endif + return true; } -Status ParseDetectedExecutable(const void* buffer, size_t length, - AssemblyProgram** output) { +void DisassemblerWin32X86::ParseRel32RelocsFromSections() { + uint32 file_offset = 0; + while (file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) + break; + if (file_offset < section->file_offset_of_raw_data) + file_offset = section->file_offset_of_raw_data; + ParseRel32RelocsFromSection(section); + file_offset += section->size_of_raw_data; + } + std::sort(rel32_locations_.begin(), rel32_locations_.end()); + +#if COURGETTE_HISTOGRAM_TARGETS + VLOG(1) << "abs32_locations_ " << abs32_locations_.size() + << "\nrel32_locations_ " << rel32_locations_.size() + << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() + << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); + + int common = 0; + std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); + std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); + while (abs32_iter != abs32_target_rvas_.end() && + rel32_iter != rel32_target_rvas_.end()) { + if (abs32_iter->first < rel32_iter->first) + ++abs32_iter; + else if (rel32_iter->first < abs32_iter->first) + ++rel32_iter; + else { + ++common; + ++abs32_iter; + ++rel32_iter; + } + } + VLOG(1) << "common " << common; +#endif +} + +void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) { + // TODO(sra): use characteristic. + bool isCode = strcmp(section->name, ".text") == 0; + if (!isCode) + return; + + uint32 start_file_offset = section->file_offset_of_raw_data; + uint32 end_file_offset = start_file_offset + section->size_of_raw_data; + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + // Find the rel32 relocations. + const uint8* p = start_pointer; + while (p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + if (current_rva == relocs_start_rva) { + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + // ++abs32_pos; + + // Heuristic discovery of rel32 locations in instruction stream: are the + // next few bytes the start of an instruction containing a rel32 + // addressing mode? + const uint8* rel32 = NULL; + + if (p + 5 < end_pointer) { + if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32 + rel32 = p + 1; + } + } + if (p + 6 < end_pointer) { + if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form + if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely + rel32 = p + 2; + } + } + if (rel32) { + RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva); + + // Is there an abs32 reloc overlapping the candidate? + while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3) + ++abs32_pos; + // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte + // region that could overlap rel32_rva. + if (abs32_pos != abs32_locations_.end()) { + if (*abs32_pos < rel32_rva + 4) { + // Beginning of abs32 reloc is before end of rel32 reloc so they + // overlap. Skip four bytes past the abs32 reloc. + p += (*abs32_pos + 4) - current_rva; + continue; + } + } + + RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); + // To be valid, rel32 target must be within image, and within this + // section. + if (pe_info().IsValidRVA(target_rva) && + start_rva <= target_rva && target_rva < end_rva) { + rel32_locations_.push_back(rel32_rva); +#if COURGETTE_HISTOGRAM_TARGETS + ++rel32_target_rvas_[target_rva]; +#endif + p += 4; + continue; + } + } + p += 1; + } +} + +CheckBool DisassemblerWin32X86::ParseFile(AssemblyProgram* program) { + bool ok = true; + // Walk all the bytes in the file, whether or not in a section. + uint32 file_offset = 0; + while (ok && file_offset < pe_info().length()) { + const Section* section = pe_info().FindNextSection(file_offset); + if (section == NULL) { + // No more sections. There should not be extra stuff following last + // section. + // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); + break; + } + if (file_offset < section->file_offset_of_raw_data) { + uint32 section_start_offset = section->file_offset_of_raw_data; + ok = ParseNonSectionFileRegion(file_offset, section_start_offset, + program); + file_offset = section_start_offset; + } + if (ok) { + uint32 end = file_offset + section->size_of_raw_data; + ok = ParseFileRegion(section, file_offset, end, program); + file_offset = end; + } + } + +#if COURGETTE_HISTOGRAM_TARGETS + HistogramTargets("abs32 relocs", abs32_target_rvas_); + HistogramTargets("rel32 relocs", rel32_target_rvas_); +#endif + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion( + uint32 start_file_offset, + uint32 end_file_offset, + AssemblyProgram* program) { + if (incomplete_disassembly_) + return true; + + const uint8* start = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end = pe_info().FileOffsetToPointer(end_file_offset); + + const uint8* p = start; + + bool ok = true; + while (p < end && ok) { + ok = program->EmitByteInstruction(*p); + ++p; + } + + return ok; +} + +CheckBool DisassemblerWin32X86::ParseFileRegion( + const Section* section, + uint32 start_file_offset, uint32 end_file_offset, + AssemblyProgram* program) { + RVA relocs_start_rva = pe_info().base_relocation_table().address_; + + const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset); + const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset); + + RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset); + RVA end_rva = start_rva + section->virtual_size; + + // Quick way to convert from Pointer to RVA within a single Section is to + // subtract 'pointer_to_rva'. + const uint8* const adjust_pointer_to_rva = start_pointer - start_rva; + + std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); + std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); + + bool ok = program->EmitOriginInstruction(start_rva); + + const uint8* p = start_pointer; + + while (ok && p < end_pointer) { + RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); + + // The base relocation table is usually in the .relocs section, but it could + // actually be anywhere. Make sure we skip it because we will regenerate it + // during assembly. + if (current_rva == relocs_start_rva) { + ok = program->EmitMakeRelocsInstruction(); + if (!ok) + break; + uint32 relocs_size = pe_info().base_relocation_table().size_; + if (relocs_size) { + p += relocs_size; + continue; + } + } + + while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) + ++abs32_pos; + + if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { + uint32 target_address = Read32LittleEndian(p); + RVA target_rva = target_address - pe_info().image_base(); + // TODO(sra): target could be Label+offset. It is not clear how to guess + // which it might be. We assume offset==0. + ok = program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)); + if (!ok) + break; + p += 4; + continue; + } + + while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva) + ++rel32_pos; + + if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) { + RVA target_rva = current_rva + 4 + Read32LittleEndian(p); + ok = program->EmitRel32(program->FindOrMakeRel32Label(target_rva)); + p += 4; + continue; + } + + if (incomplete_disassembly_) { + if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) && + (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) && + (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) { + // No more relocs in this section, don't bother encoding bytes. + break; + } + } + + ok = program->EmitByteInstruction(*p); + p += 1; + } + + return ok; +} + +#if COURGETTE_HISTOGRAM_TARGETS +// Histogram is printed to std::cout. It is purely for debugging the algorithm +// and is only enabled manually in 'exploration' builds. I don't want to add +// command-line configuration for this feature because this code has to be +// small, which means compiled-out. +void DisassemblerWin32X86::HistogramTargets(const char* kind, + const std::map<RVA, int>& map) { + int total = 0; + std::map<int, std::vector<RVA> > h; + for (std::map<RVA, int>::const_iterator p = map.begin(); + p != map.end(); + ++p) { + h[p->second].push_back(p->first); + total += p->second; + } + + std::cout << total << " " << kind << " to " + << map.size() << " unique targets" << std::endl; + + std::cout << "indegree: #targets-with-indegree (example)" << std::endl; + const int kFirstN = 15; + bool someSkipped = false; + int index = 0; + for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin(); + p != h.rend(); + ++p) { + ++index; + if (index <= kFirstN || p->first <= 3) { + if (someSkipped) { + std::cout << "..." << std::endl; + } + size_t count = p->second.size(); + std::cout << std::dec << p->first << ": " << count; + if (count <= 2) { + for (size_t i = 0; i < count; ++i) + std::cout << " " << pe_info().DescribeRVA(p->second[i]); + } + std::cout << std::endl; + someSkipped = false; + } else { + someSkipped = true; + } + } +} +#endif // COURGETTE_HISTOGRAM_TARGETS + +Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) { + return new DisassemblerWin32X86(pe_info); +} + +//////////////////////////////////////////////////////////////////////////////// + +Status ParseWin32X86PE(const void* buffer, size_t length, + AssemblyProgram** output) { *output = NULL; PEInfo* pe_info = new PEInfo(); @@ -52,17 +428,17 @@ Status ParseDetectedExecutable(const void* buffer, size_t length, return C_INPUT_NOT_RECOGNIZED; } - Disassembler* disassembler = new DisassemblerWin32X86(pe_info); + Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info); AssemblyProgram* program = new AssemblyProgram(); if (!disassembler->Disassemble(program)) { delete program; - delete disassembler; + disassembler->Destroy(); delete pe_info; return C_DISASSEMBLY_FAILED; } - delete disassembler; + disassembler->Destroy(); delete pe_info; *output = program; return C_OK; |