diff options
author | huangs <huangs@chromium.org> | 2016-03-23 13:40:35 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-03-23 20:42:14 +0000 |
commit | f940a8c9bf80de50dc23562aa46dfe09439b5aa2 (patch) | |
tree | 2c1eb13677525c093383207b7978d386286a45b5 /courgette | |
parent | 880fae5d33cc8394387fedc03f358e8a0717b0d1 (diff) | |
download | chromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.zip chromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.tar.gz chromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.tar.bz2 |
[Courgette] Add and use AddressTranslator::PointerToTargetRVA(); Update comments.
Addresses in Courgette (abs32 and rel32) are represented in these forms:
(1) Location RVA.
(2) Location FileOffset.
(3) Pointer in image.
(4) Target VA.
(5) Target RVA.
We already have (1) -> (2), (2) -> (1), (2) -> (3), (1) -> (3) for
existing usage. Now we add (3) -> (5) and refactor accordingly (with
helpers to do (4) -> (5) for PE files). PointerToTargetRVA() will
be used again we apply LabelManager to save 25% peak RAM.
Review URL: https://codereview.chromium.org/1807293003
Cr-Commit-Position: refs/heads/master@{#382920}
Diffstat (limited to 'courgette')
-rw-r--r-- | courgette/disassembler.h | 1 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.cc | 9 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.h | 2 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.cc | 45 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.h | 5 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 48 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.h | 5 | ||||
-rw-r--r-- | courgette/image_utils.h | 44 |
8 files changed, 103 insertions, 56 deletions
diff --git a/courgette/disassembler.h b/courgette/disassembler.h index bc715b0..7c57099 100644 --- a/courgette/disassembler.h +++ b/courgette/disassembler.h @@ -25,6 +25,7 @@ class Disassembler : public AddressTranslator { virtual FileOffset RVAToFileOffset(RVA rva) const override = 0; const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override; const uint8_t* RVAToPointer(RVA rva) const override; + RVA PointerToTargetRVA(const uint8_t* p) const = 0; virtual ExecutableType kind() const = 0; diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc index 9ceb8ab..70294d6 100644 --- a/courgette/disassembler_elf_32.cc +++ b/courgette/disassembler_elf_32.cc @@ -61,6 +61,11 @@ FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const { return kNoFileOffset; } +RVA DisassemblerElf32::PointerToTargetRVA(const uint8_t* p) const { + // TODO(huangs): Add check (e.g., IsValidTargetRVA(), but more efficient). + return Read32LittleEndian(p); +} + bool DisassemblerElf32::ParseHeader() { if (length() < sizeof(Elf32_Ehdr)) return Bad("Too small"); @@ -350,8 +355,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection( if (*current_abs_offset != end_abs_offset && file_offset == **current_abs_offset) { - const uint8_t* p = FileOffsetToPointer(file_offset); - RVA target_rva = Read32LittleEndian(p); + RVA target_rva = PointerToTargetRVA(FileOffsetToPointer(file_offset)); + DCHECK_NE(kNoRVA, target_rva); if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva))) return false; diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h index e9d00ca..56895bd 100644 --- a/courgette/disassembler_elf_32.h +++ b/courgette/disassembler_elf_32.h @@ -79,6 +79,7 @@ class DisassemblerElf32 : public Disassembler { // Disassembler interfaces. RVA FileOffsetToRVA(FileOffset file_offset) const override; FileOffset RVAToFileOffset(RVA rva) const override; + RVA PointerToTargetRVA(const uint8_t* p) const override; virtual ExecutableType kind() const override = 0; bool ParseHeader() override; bool Disassemble(AssemblyProgram* target) override; @@ -90,7 +91,6 @@ class DisassemblerElf32 : public Disassembler { ScopedVector<TypedRVA> &Rel32Locations() { return rel32_locations_; } protected: - bool UpdateLength(); // Misc Section Helpers diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc index 6604268..ffa6c36 100644 --- a/courgette/disassembler_win32_x64.cc +++ b/courgette/disassembler_win32_x64.cc @@ -41,6 +41,21 @@ DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) number_of_data_directories_(0) { } +RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { + for (int i = 0; i < number_of_sections_; ++i) { + const Section* section = §ions_[i]; + if (file_offset >= section->file_offset_of_raw_data) { + FileOffset offset_in_section = + file_offset - section->file_offset_of_raw_data; + if (offset_in_section < section->size_of_raw_data) + return static_cast<RVA>(section->virtual_address + offset_in_section); + } + } + + NOTREACHED(); + return kNoRVA; +} + FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { const Section* section = RVAToSection(rva); if (section != nullptr) { @@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { return kNoFileOffset; } -RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { - for (int i = 0; i < number_of_sections_; ++i) { - const Section* section = §ions_[i]; - if (file_offset >= section->file_offset_of_raw_data) { - FileOffset offset_in_section = - file_offset - section->file_offset_of_raw_data; - if (offset_in_section < section->size_of_raw_data) - return static_cast<RVA>(section->virtual_address + offset_in_section); - } - } - - NOTREACHED(); - return kNoRVA; +RVA DisassemblerWin32X64::PointerToTargetRVA(const uint8_t* p) const { + return Address64ToRVA(Read64LittleEndian(p)); } // ParseHeader attempts to match up the buffer with the Windows data @@ -338,6 +342,12 @@ const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { return nullptr; } +RVA DisassemblerWin32X64::Address64ToRVA(uint64_t address) const { + if (address < image_base() || address >= image_base() + size_of_image_) + return kNoRVA; + return base::checked_cast<RVA>(address - image_base()); +} + std::string DisassemblerWin32X64::SectionName(const Section* section) { if (section == nullptr) return "<none>"; @@ -389,9 +399,8 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() { #if COURGETTE_HISTOGRAM_TARGETS for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; - // The 4 bytes at the relocation are a reference to some address. - uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); - ++abs32_target_rvas_[target_address - image_base()]; + // The 8 bytes at the relocation are a reference to some address. + ++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))]; } #endif return true; @@ -601,8 +610,8 @@ CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, ++abs32_pos; if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { - uint64_t target_address = Read64LittleEndian(p); - RVA target_rva = base::checked_cast<RVA>(target_address - image_base()); + RVA target_rva = PointerToTargetRVA(p); + DCHECK_NE(kNoRVA, target_rva); // TODO(sra): target could be Label+offset. It is not clear how to guess // which it might be. We assume offset==0. if (!program->EmitAbs64(program->FindOrMakeAbs32Label(target_rva))) diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h index 20cfc7e..60a2259 100644 --- a/courgette/disassembler_win32_x64.h +++ b/courgette/disassembler_win32_x64.h @@ -29,6 +29,7 @@ class DisassemblerWin32X64 : public Disassembler { // Disassembler interfaces. RVA FileOffsetToRVA(FileOffset file_offset) const override; FileOffset RVAToFileOffset(RVA rva) const override; + RVA PointerToTargetRVA(const uint8_t* p) const override; ExecutableType kind() const override { return EXE_WIN_32_X64; } bool ParseHeader() override; bool Disassemble(AssemblyProgram* target) override; @@ -46,6 +47,10 @@ class DisassemblerWin32X64 : public Disassembler { // Returns Section containing the relative virtual address, or null if none. const Section* RVAToSection(RVA rva) const; + // (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA + // specified by |address|, or kNoRVA if |address| lies outside of the image. + RVA Address64ToRVA(uint64_t address) const; + static std::string SectionName(const Section* section); protected: diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc index af41375..974e864 100644 --- a/courgette/disassembler_win32_x86.cc +++ b/courgette/disassembler_win32_x86.cc @@ -41,6 +41,21 @@ DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length) number_of_data_directories_(0) { } +RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const { + for (int i = 0; i < number_of_sections_; ++i) { + const Section* section = §ions_[i]; + if (file_offset >= section->file_offset_of_raw_data) { + FileOffset offset_in_section = + file_offset - section->file_offset_of_raw_data; + if (offset_in_section < section->size_of_raw_data) + return static_cast<RVA>(section->virtual_address + offset_in_section); + } + } + + NOTREACHED(); + return kNoRVA; +} + FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { const Section* section = RVAToSection(rva); if (section != nullptr) { @@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const { return kNoFileOffset; } -RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const { - for (int i = 0; i < number_of_sections_; ++i) { - const Section* section = §ions_[i]; - if (file_offset >= section->file_offset_of_raw_data) { - FileOffset offset_in_section = - file_offset - section->file_offset_of_raw_data; - if (offset_in_section < section->size_of_raw_data) - return static_cast<RVA>(section->virtual_address + offset_in_section); - } - } - - NOTREACHED(); - return kNoRVA; +RVA DisassemblerWin32X86::PointerToTargetRVA(const uint8_t* p) const { + return Address32ToRVA(Read32LittleEndian(p)); } // ParseHeader attempts to match up the buffer with the Windows data @@ -308,9 +312,8 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) { // Skip the relocs that live outside of the image. It might be the case // if a reloc is relative to a register, e.g.: // mov ecx,dword ptr [eax+044D5888h] - uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); - if (target_address < image_base_ || - target_address > (image_base_ + size_of_image_)) { + RVA target_rva = PointerToTargetRVA(RVAToPointer(rva)); + if (target_rva == kNoRVA) { continue; } if (type == 3) { // IMAGE_REL_BASED_HIGHLOW @@ -344,6 +347,12 @@ const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const { return nullptr; } +RVA DisassemblerWin32X86::Address32ToRVA(uint32_t address) const { + if (address < image_base() || address >= image_base() + size_of_image_) + return kNoRVA; + return static_cast<RVA>(address - image_base()); +} + std::string DisassemblerWin32X86::SectionName(const Section* section) { if (section == nullptr) return "<none>"; @@ -396,8 +405,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() { for (size_t i = 0; i < abs32_locations_.size(); ++i) { RVA rva = abs32_locations_[i]; // The 4 bytes at the relocation are a reference to some address. - uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); - ++abs32_target_rvas_[target_address - image_base()]; + ++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))]; } #endif return true; @@ -531,8 +539,8 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section, ++abs32_pos; if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) { - uint32_t target_address = Read32LittleEndian(p); - RVA target_rva = target_address - image_base(); + RVA target_rva = PointerToTargetRVA(p); + DCHECK_NE(kNoRVA, target_rva); // TODO(sra): target could be Label+offset. It is not clear how to guess // which it might be. We assume offset==0. if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva))) diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h index c22872b..597f841f 100644 --- a/courgette/disassembler_win32_x86.h +++ b/courgette/disassembler_win32_x86.h @@ -29,6 +29,7 @@ class DisassemblerWin32X86 : public Disassembler { // Disassembler interfaces. RVA FileOffsetToRVA(FileOffset file_offset) const override; FileOffset RVAToFileOffset(RVA rva) const override; + RVA PointerToTargetRVA(const uint8_t* p) const override; ExecutableType kind() const override { return EXE_WIN_32_X86; } bool ParseHeader() override; bool Disassemble(AssemblyProgram* target) override; @@ -46,6 +47,10 @@ class DisassemblerWin32X86 : public Disassembler { // Returns Section containing the relative virtual address, or null if none. const Section* RVAToSection(RVA rva) const; + // (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA + // specified by |address|, or kNoRVA if |address| lies outside of the image. + RVA Address32ToRVA(uint32_t address) const; + static std::string SectionName(const Section* section); protected: diff --git a/courgette/image_utils.h b/courgette/image_utils.h index cfbfcfe..aa539b6 100644 --- a/courgette/image_utils.h +++ b/courgette/image_utils.h @@ -19,12 +19,13 @@ namespace courgette { // - VA (Virtual Address): Virtual memory address of a loaded image. This is // subject to relocation by the OS. // - RVA (Relative Virtual Address): VA relative to some base address. This is -// the preferred way to specify pointers in an image. Two ways to encode RVA -// are: -// - abs32: RVA value is encoded directly. -// - rel32: RVA is encoded as offset from an instruction address. This is -// commonly used for relative branch/call opcodes. -// Courgette operates on File Offsets and RVAs only. +// the preferred way to specify pointers in an image. +// +// In Courgette we consider two types of addresses: +// - abs32: In an image these are directly stored as VA whose locations are +// stored in the relocation table. +// - rel32: In an image these appear in branch/call opcodes, and are represented +// as offsets from an instruction address. using RVA = uint32_t; const RVA kUnassignedRVA = 0xFFFFFFFFU; @@ -33,24 +34,37 @@ const RVA kNoRVA = 0xFFFFFFFFU; using FileOffset = size_t; const FileOffset kNoFileOffset = UINTPTR_MAX; -// An interface for {File Offset, RVA, pointer to image data} translation. +// An interface translate and read addresses. The main conversion path is: +// (1) Location RVA. +// (2) Location FileOffset. +// (3) Pointer in image. +// (4) Target VA (32-bit or 64-bit). +// (5) Target RVA (32-bit). +// For abs32, we get (1) from relocation table, and convert to (5). +// For rel32, we get (2) from scanning opcode, and convert to (1). class AddressTranslator { public: - // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent. + // (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if + // nonexistent. virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0; - // Returns the file offset corresponding to |rva|, or kNoFileOffset if - // nonexistent. + // (1) -> (2): Returns the file offset corresponding to |rva|, or + // kNoFileOffset if nonexistent. virtual FileOffset RVAToFileOffset(RVA rva) const = 0; - // Returns the pointer to the image data for |file_offset|. Assumes that - // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting - // pointer is an end bound for iteration that should never be dereferenced. + // (2) -> (3): Returns image data pointer correspnoding to |file_offset|. + // Assumes 0 <= |file_offset| <= image size. + // If |file_offset| == image size, then the resulting pointer is an end bound + // for iteration, and should not be dereferenced. virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0; - // Returns the pointer to the image data for |rva|, or null if |rva| is - // invalid. + // (1) -> (3): Returns the pointer to the image data for |rva|, or null if + // |rva| is invalid. virtual const uint8_t* RVAToPointer(RVA rva) const = 0; + + // (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer + // to image data. + virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0; }; // A Label is a symbolic reference to an address. Unlike a conventional |