summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorhuangs <huangs@chromium.org>2016-03-23 13:40:35 -0700
committerCommit bot <commit-bot@chromium.org>2016-03-23 20:42:14 +0000
commitf940a8c9bf80de50dc23562aa46dfe09439b5aa2 (patch)
tree2c1eb13677525c093383207b7978d386286a45b5 /courgette
parent880fae5d33cc8394387fedc03f358e8a0717b0d1 (diff)
downloadchromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.zip
chromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.tar.gz
chromium_src-f940a8c9bf80de50dc23562aa46dfe09439b5aa2.tar.bz2
[Courgette] Add and use AddressTranslator::PointerToTargetRVA(); Update comments.
Addresses in Courgette (abs32 and rel32) are represented in these forms: (1) Location RVA. (2) Location FileOffset. (3) Pointer in image. (4) Target VA. (5) Target RVA. We already have (1) -> (2), (2) -> (1), (2) -> (3), (1) -> (3) for existing usage. Now we add (3) -> (5) and refactor accordingly (with helpers to do (4) -> (5) for PE files). PointerToTargetRVA() will be used again we apply LabelManager to save 25% peak RAM. Review URL: https://codereview.chromium.org/1807293003 Cr-Commit-Position: refs/heads/master@{#382920}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/disassembler.h1
-rw-r--r--courgette/disassembler_elf_32.cc9
-rw-r--r--courgette/disassembler_elf_32.h2
-rw-r--r--courgette/disassembler_win32_x64.cc45
-rw-r--r--courgette/disassembler_win32_x64.h5
-rw-r--r--courgette/disassembler_win32_x86.cc48
-rw-r--r--courgette/disassembler_win32_x86.h5
-rw-r--r--courgette/image_utils.h44
8 files changed, 103 insertions, 56 deletions
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index bc715b0..7c57099 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -25,6 +25,7 @@ class Disassembler : public AddressTranslator {
virtual FileOffset RVAToFileOffset(RVA rva) const override = 0;
const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override;
const uint8_t* RVAToPointer(RVA rva) const override;
+ RVA PointerToTargetRVA(const uint8_t* p) const = 0;
virtual ExecutableType kind() const = 0;
diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc
index 9ceb8ab..70294d6 100644
--- a/courgette/disassembler_elf_32.cc
+++ b/courgette/disassembler_elf_32.cc
@@ -61,6 +61,11 @@ FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}
+RVA DisassemblerElf32::PointerToTargetRVA(const uint8_t* p) const {
+ // TODO(huangs): Add check (e.g., IsValidTargetRVA(), but more efficient).
+ return Read32LittleEndian(p);
+}
+
bool DisassemblerElf32::ParseHeader() {
if (length() < sizeof(Elf32_Ehdr))
return Bad("Too small");
@@ -350,8 +355,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
if (*current_abs_offset != end_abs_offset &&
file_offset == **current_abs_offset) {
- const uint8_t* p = FileOffsetToPointer(file_offset);
- RVA target_rva = Read32LittleEndian(p);
+ RVA target_rva = PointerToTargetRVA(FileOffsetToPointer(file_offset));
+ DCHECK_NE(kNoRVA, target_rva);
if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
return false;
diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h
index e9d00ca..56895bd 100644
--- a/courgette/disassembler_elf_32.h
+++ b/courgette/disassembler_elf_32.h
@@ -79,6 +79,7 @@ class DisassemblerElf32 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
+ RVA PointerToTargetRVA(const uint8_t* p) const override;
virtual ExecutableType kind() const override = 0;
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
@@ -90,7 +91,6 @@ class DisassemblerElf32 : public Disassembler {
ScopedVector<TypedRVA> &Rel32Locations() { return rel32_locations_; }
protected:
-
bool UpdateLength();
// Misc Section Helpers
diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc
index 6604268..ffa6c36 100644
--- a/courgette/disassembler_win32_x64.cc
+++ b/courgette/disassembler_win32_x64.cc
@@ -41,6 +41,21 @@ DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length)
number_of_data_directories_(0) {
}
+RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
+ for (int i = 0; i < number_of_sections_; ++i) {
+ const Section* section = &sections_[i];
+ if (file_offset >= section->file_offset_of_raw_data) {
+ FileOffset offset_in_section =
+ file_offset - section->file_offset_of_raw_data;
+ if (offset_in_section < section->size_of_raw_data)
+ return static_cast<RVA>(section->virtual_address + offset_in_section);
+ }
+ }
+
+ NOTREACHED();
+ return kNoRVA;
+}
+
FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
const Section* section = RVAToSection(rva);
if (section != nullptr) {
@@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}
-RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
- for (int i = 0; i < number_of_sections_; ++i) {
- const Section* section = &sections_[i];
- if (file_offset >= section->file_offset_of_raw_data) {
- FileOffset offset_in_section =
- file_offset - section->file_offset_of_raw_data;
- if (offset_in_section < section->size_of_raw_data)
- return static_cast<RVA>(section->virtual_address + offset_in_section);
- }
- }
-
- NOTREACHED();
- return kNoRVA;
+RVA DisassemblerWin32X64::PointerToTargetRVA(const uint8_t* p) const {
+ return Address64ToRVA(Read64LittleEndian(p));
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -338,6 +342,12 @@ const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const {
return nullptr;
}
+RVA DisassemblerWin32X64::Address64ToRVA(uint64_t address) const {
+ if (address < image_base() || address >= image_base() + size_of_image_)
+ return kNoRVA;
+ return base::checked_cast<RVA>(address - image_base());
+}
+
std::string DisassemblerWin32X64::SectionName(const Section* section) {
if (section == nullptr)
return "<none>";
@@ -389,9 +399,8 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
#if COURGETTE_HISTOGRAM_TARGETS
for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
- // The 4 bytes at the relocation are a reference to some address.
- uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
- ++abs32_target_rvas_[target_address - image_base()];
+ // The 8 bytes at the relocation are a reference to some address.
+ ++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))];
}
#endif
return true;
@@ -601,8 +610,8 @@ CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section,
++abs32_pos;
if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
- uint64_t target_address = Read64LittleEndian(p);
- RVA target_rva = base::checked_cast<RVA>(target_address - image_base());
+ RVA target_rva = PointerToTargetRVA(p);
+ DCHECK_NE(kNoRVA, target_rva);
// TODO(sra): target could be Label+offset. It is not clear how to guess
// which it might be. We assume offset==0.
if (!program->EmitAbs64(program->FindOrMakeAbs32Label(target_rva)))
diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h
index 20cfc7e..60a2259 100644
--- a/courgette/disassembler_win32_x64.h
+++ b/courgette/disassembler_win32_x64.h
@@ -29,6 +29,7 @@ class DisassemblerWin32X64 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
+ RVA PointerToTargetRVA(const uint8_t* p) const override;
ExecutableType kind() const override { return EXE_WIN_32_X64; }
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
@@ -46,6 +47,10 @@ class DisassemblerWin32X64 : public Disassembler {
// Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;
+ // (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA
+ // specified by |address|, or kNoRVA if |address| lies outside of the image.
+ RVA Address64ToRVA(uint64_t address) const;
+
static std::string SectionName(const Section* section);
protected:
diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc
index af41375..974e864 100644
--- a/courgette/disassembler_win32_x86.cc
+++ b/courgette/disassembler_win32_x86.cc
@@ -41,6 +41,21 @@ DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
number_of_data_directories_(0) {
}
+RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
+ for (int i = 0; i < number_of_sections_; ++i) {
+ const Section* section = &sections_[i];
+ if (file_offset >= section->file_offset_of_raw_data) {
+ FileOffset offset_in_section =
+ file_offset - section->file_offset_of_raw_data;
+ if (offset_in_section < section->size_of_raw_data)
+ return static_cast<RVA>(section->virtual_address + offset_in_section);
+ }
+ }
+
+ NOTREACHED();
+ return kNoRVA;
+}
+
FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
const Section* section = RVAToSection(rva);
if (section != nullptr) {
@@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}
-RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
- for (int i = 0; i < number_of_sections_; ++i) {
- const Section* section = &sections_[i];
- if (file_offset >= section->file_offset_of_raw_data) {
- FileOffset offset_in_section =
- file_offset - section->file_offset_of_raw_data;
- if (offset_in_section < section->size_of_raw_data)
- return static_cast<RVA>(section->virtual_address + offset_in_section);
- }
- }
-
- NOTREACHED();
- return kNoRVA;
+RVA DisassemblerWin32X86::PointerToTargetRVA(const uint8_t* p) const {
+ return Address32ToRVA(Read32LittleEndian(p));
}
// ParseHeader attempts to match up the buffer with the Windows data
@@ -308,9 +312,8 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
// Skip the relocs that live outside of the image. It might be the case
// if a reloc is relative to a register, e.g.:
// mov ecx,dword ptr [eax+044D5888h]
- uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
- if (target_address < image_base_ ||
- target_address > (image_base_ + size_of_image_)) {
+ RVA target_rva = PointerToTargetRVA(RVAToPointer(rva));
+ if (target_rva == kNoRVA) {
continue;
}
if (type == 3) { // IMAGE_REL_BASED_HIGHLOW
@@ -344,6 +347,12 @@ const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
return nullptr;
}
+RVA DisassemblerWin32X86::Address32ToRVA(uint32_t address) const {
+ if (address < image_base() || address >= image_base() + size_of_image_)
+ return kNoRVA;
+ return static_cast<RVA>(address - image_base());
+}
+
std::string DisassemblerWin32X86::SectionName(const Section* section) {
if (section == nullptr)
return "<none>";
@@ -396,8 +405,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
- uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
- ++abs32_target_rvas_[target_address - image_base()];
+ ++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))];
}
#endif
return true;
@@ -531,8 +539,8 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section,
++abs32_pos;
if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
- uint32_t target_address = Read32LittleEndian(p);
- RVA target_rva = target_address - image_base();
+ RVA target_rva = PointerToTargetRVA(p);
+ DCHECK_NE(kNoRVA, target_rva);
// TODO(sra): target could be Label+offset. It is not clear how to guess
// which it might be. We assume offset==0.
if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h
index c22872b..597f841f 100644
--- a/courgette/disassembler_win32_x86.h
+++ b/courgette/disassembler_win32_x86.h
@@ -29,6 +29,7 @@ class DisassemblerWin32X86 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
+ RVA PointerToTargetRVA(const uint8_t* p) const override;
ExecutableType kind() const override { return EXE_WIN_32_X86; }
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
@@ -46,6 +47,10 @@ class DisassemblerWin32X86 : public Disassembler {
// Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;
+ // (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA
+ // specified by |address|, or kNoRVA if |address| lies outside of the image.
+ RVA Address32ToRVA(uint32_t address) const;
+
static std::string SectionName(const Section* section);
protected:
diff --git a/courgette/image_utils.h b/courgette/image_utils.h
index cfbfcfe..aa539b6 100644
--- a/courgette/image_utils.h
+++ b/courgette/image_utils.h
@@ -19,12 +19,13 @@ namespace courgette {
// - VA (Virtual Address): Virtual memory address of a loaded image. This is
// subject to relocation by the OS.
// - RVA (Relative Virtual Address): VA relative to some base address. This is
-// the preferred way to specify pointers in an image. Two ways to encode RVA
-// are:
-// - abs32: RVA value is encoded directly.
-// - rel32: RVA is encoded as offset from an instruction address. This is
-// commonly used for relative branch/call opcodes.
-// Courgette operates on File Offsets and RVAs only.
+// the preferred way to specify pointers in an image.
+//
+// In Courgette we consider two types of addresses:
+// - abs32: In an image these are directly stored as VA whose locations are
+// stored in the relocation table.
+// - rel32: In an image these appear in branch/call opcodes, and are represented
+// as offsets from an instruction address.
using RVA = uint32_t;
const RVA kUnassignedRVA = 0xFFFFFFFFU;
@@ -33,24 +34,37 @@ const RVA kNoRVA = 0xFFFFFFFFU;
using FileOffset = size_t;
const FileOffset kNoFileOffset = UINTPTR_MAX;
-// An interface for {File Offset, RVA, pointer to image data} translation.
+// An interface translate and read addresses. The main conversion path is:
+// (1) Location RVA.
+// (2) Location FileOffset.
+// (3) Pointer in image.
+// (4) Target VA (32-bit or 64-bit).
+// (5) Target RVA (32-bit).
+// For abs32, we get (1) from relocation table, and convert to (5).
+// For rel32, we get (2) from scanning opcode, and convert to (1).
class AddressTranslator {
public:
- // Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent.
+ // (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if
+ // nonexistent.
virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;
- // Returns the file offset corresponding to |rva|, or kNoFileOffset if
- // nonexistent.
+ // (1) -> (2): Returns the file offset corresponding to |rva|, or
+ // kNoFileOffset if nonexistent.
virtual FileOffset RVAToFileOffset(RVA rva) const = 0;
- // Returns the pointer to the image data for |file_offset|. Assumes that
- // 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting
- // pointer is an end bound for iteration that should never be dereferenced.
+ // (2) -> (3): Returns image data pointer correspnoding to |file_offset|.
+ // Assumes 0 <= |file_offset| <= image size.
+ // If |file_offset| == image size, then the resulting pointer is an end bound
+ // for iteration, and should not be dereferenced.
virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;
- // Returns the pointer to the image data for |rva|, or null if |rva| is
- // invalid.
+ // (1) -> (3): Returns the pointer to the image data for |rva|, or null if
+ // |rva| is invalid.
virtual const uint8_t* RVAToPointer(RVA rva) const = 0;
+
+ // (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer
+ // to image data.
+ virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0;
};
// A Label is a symbolic reference to an address. Unlike a conventional