summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorhuangs <huangs@chromium.org>2015-09-18 11:52:56 -0700
committerCommit bot <commit-bot@chromium.org>2015-09-18 18:53:28 +0000
commit6d2a30316defffdd77c473547063316ebb9c279b (patch)
tree6de1afe27ab096196c1c4e92587c19519691b443 /courgette
parent6d5ddf5cd94ed429df19496948062fe137e68116 (diff)
downloadchromium_src-6d2a30316defffdd77c473547063316ebb9c279b.zip
chromium_src-6d2a30316defffdd77c473547063316ebb9c279b.tar.gz
chromium_src-6d2a30316defffdd77c473547063316ebb9c279b.tar.bz2
[Courgette] Refactor: Adding Rel32FinderWin32X86_Basic and Unittests
We're planning to improve heuristic to find Rel32 addresses for Win32 x86. First refactor by by extraacting the feature into its own class. This also lead to the extraction of RVA and various helpers from disassembler.h to image_utils.h. Also adding unittests for Rel32FinderWin32X86_Basic. The test is data-driven by adding a simple parser for test data. BUG= Review URL: https://codereview.chromium.org/1344173003 Cr-Commit-Position: refs/heads/master@{#349727}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/BUILD.gn5
-rw-r--r--courgette/courgette.gyp5
-rw-r--r--courgette/disassembler.h35
-rw-r--r--courgette/disassembler_win32_x86.cc76
-rw-r--r--courgette/disassembler_win32_x86.h2
-rw-r--r--courgette/image_utils.h45
-rw-r--r--courgette/image_utils_unittest.cc30
-rw-r--r--courgette/rel32_finder_win32_x86.cc113
-rw-r--r--courgette/rel32_finder_win32_x86.h77
-rw-r--r--courgette/rel32_finder_win32_x86_unittest.cc149
-rw-r--r--courgette/testdata/rel32_win32_x86_01.txt55
-rw-r--r--courgette/testdata/rel32_win32_x86_02.txt62
-rw-r--r--courgette/testdata/rel32_win32_x86_03.txt40
-rw-r--r--courgette/testdata/rel32_win32_x86_04.txt47
-rw-r--r--courgette/typedrva_unittest.cc1
15 files changed, 640 insertions, 102 deletions
diff --git a/courgette/BUILD.gn b/courgette/BUILD.gn
index 1ebd2ee..bc9a9f6 100644
--- a/courgette/BUILD.gn
+++ b/courgette/BUILD.gn
@@ -34,11 +34,14 @@ static_library("courgette_lib") {
"ensemble.h",
"ensemble_apply.cc",
"ensemble_create.cc",
+ "image_utils.h",
"memory_allocator.cc",
"memory_allocator.h",
"patch_generator_x86_32.h",
"patcher_x86_32.h",
"region.h",
+ "rel32_finder_win32_x86.cc",
+ "rel32_finder_win32_x86.h",
"simple_delta.cc",
"simple_delta.h",
"streams.cc",
@@ -104,7 +107,9 @@ test("courgette_unittests") {
"encode_decode_unittest.cc",
"encoded_program_unittest.cc",
"ensemble_unittest.cc",
+ "image_utils_unittest.cc",
"memory_allocator_unittest.cc",
+ "rel32_finder_win32_x86_unittest.cc",
"streams_unittest.cc",
"third_party/paged_array_unittest.cc",
"third_party/qsufsort_unittest.cc",
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp
index 8ff08bc..e5ec783 100644
--- a/courgette/courgette.gyp
+++ b/courgette/courgette.gyp
@@ -34,9 +34,12 @@
'ensemble.h',
'ensemble_apply.cc',
'ensemble_create.cc',
+ 'image_utils.h',
'memory_allocator.cc',
'memory_allocator.h',
'region.h',
+ 'rel32_finder_win32_x86.cc',
+ 'rel32_finder_win32_x86.h',
'simple_delta.cc',
'simple_delta.h',
'streams.cc',
@@ -106,7 +109,9 @@
'encoded_program_unittest.cc',
'encode_decode_unittest.cc',
'ensemble_unittest.cc',
+ 'image_utils_unittest.cc',
'memory_allocator_unittest.cc',
+ 'rel32_finder_win32_x86_unittest.cc',
'streams_unittest.cc',
'typedrva_unittest.cc',
'versioning_unittest.cc',
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
index 0154ec4..abbec3a 100644
--- a/courgette/disassembler.h
+++ b/courgette/disassembler.h
@@ -8,19 +8,12 @@
#include "base/basictypes.h"
#include "courgette/courgette.h"
-
-// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
-// different target addresses are referenced. Purely for debugging.
-#define COURGETTE_HISTOGRAM_TARGETS 0
+#include "courgette/image_utils.h"
namespace courgette {
class AssemblyProgram;
-// A Relative Virtual Address is the address in the image file after it is
-// loaded into memory relative to the image load address.
-typedef uint32 RVA;
-
class Disassembler {
public:
virtual ~Disassembler();
@@ -59,31 +52,6 @@ class Disassembler {
return offset <= length() && elements <= (length() - offset) / element_size;
}
- // These helper functions avoid the need for casts in the main code.
- uint16 ReadU16(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint16*>(address + offset);
- }
-
- uint32 ReadU32(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint32*>(address + offset);
- }
-
- uint64 ReadU64(const uint8* address, size_t offset) {
- return *reinterpret_cast<const uint64*>(address + offset);
- }
-
- static uint32 Read32LittleEndian(const void* address) {
- return *reinterpret_cast<const uint32*>(address);
- }
-
- static uint64 Read64LittleEndian(const void* address) {
- return *reinterpret_cast<const uint64*>(address);
- }
-
- static uint16 Read16LittleEndian(const void* address) {
- return *reinterpret_cast<const uint16*>(address);
- }
-
// Reduce the length of the image in memory. Does not actually free
// (or realloc) any memory. Usually only called via ParseHeader()
void ReduceLength(size_t reduced_length);
@@ -104,4 +72,5 @@ class Disassembler {
};
} // namespace courgette
+
#endif // COURGETTE_DISASSEMBLER_H_
diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc
index dd3dd5e..85d99f4 100644
--- a/courgette/disassembler_win32_x86.cc
+++ b/courgette/disassembler_win32_x86.cc
@@ -14,6 +14,7 @@
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
+#include "courgette/rel32_finder_win32_x86.h"
namespace courgette {
@@ -434,7 +435,6 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
uint32 start_file_offset = section->file_offset_of_raw_data;
uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
- RVA relocs_start_rva = base_relocation_table().address_;
const uint8* start_pointer = OffsetToPointer(start_file_offset);
const uint8* end_pointer = OffsetToPointer(end_file_offset);
@@ -442,75 +442,17 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
RVA start_rva = FileOffsetToRVA(start_file_offset);
RVA end_rva = start_rva + section->virtual_size;
- // Quick way to convert from Pointer to RVA within a single Section is to
- // subtract 'pointer_to_rva'.
- const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
-
- std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
-
- // Find the rel32 relocations.
- const uint8* p = start_pointer;
- while (p < end_pointer) {
- RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
- if (current_rva == relocs_start_rva) {
- uint32 relocs_size = base_relocation_table().size_;
- if (relocs_size) {
- p += relocs_size;
- continue;
- }
- }
-
- //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
- // ++abs32_pos;
-
- // Heuristic discovery of rel32 locations in instruction stream: are the
- // next few bytes the start of an instruction containing a rel32
- // addressing mode?
- const uint8* rel32 = NULL;
-
- if (p + 5 <= end_pointer) {
- if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
- rel32 = p + 1;
- }
- }
- if (p + 6 <= end_pointer) {
- if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
- if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
- rel32 = p + 2;
- }
- }
- if (rel32) {
- RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
-
- // Is there an abs32 reloc overlapping the candidate?
- while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3)
- ++abs32_pos;
- // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
- // region that could overlap rel32_rva.
- if (abs32_pos != abs32_locations_.end()) {
- if (*abs32_pos < rel32_rva + 4) {
- // Beginning of abs32 reloc is before end of rel32 reloc so they
- // overlap. Skip four bytes past the abs32 reloc.
- p += (*abs32_pos + 4) - current_rva;
- continue;
- }
- }
+ Rel32FinderWin32X86_Basic finder(
+ base_relocation_table().address_,
+ base_relocation_table().address_ + base_relocation_table().size_,
+ size_of_image_);
+ finder.Find(start_pointer, end_pointer, start_rva, end_rva, abs32_locations_);
+ finder.SwapRel32Locations(&rel32_locations_);
- RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
- // To be valid, rel32 target must be within image, and within this
- // section.
- if (IsValidRVA(target_rva) &&
- start_rva <= target_rva && target_rva < end_rva) {
- rel32_locations_.push_back(rel32_rva);
#if COURGETTE_HISTOGRAM_TARGETS
- ++rel32_target_rvas_[target_rva];
+ DCHECK(rel32_target_rvas_.empty());
+ finder.SwapRel32TargetRVAs(&rel32_target_rvas_);
#endif
- p = rel32 + 4;
- continue;
- }
- }
- p += 1;
- }
}
CheckBool DisassemblerWin32X86::ParseNonSectionFileRegion(
diff --git a/courgette/disassembler_win32_x86.h b/courgette/disassembler_win32_x86.h
index dec339f..52ce520 100644
--- a/courgette/disassembler_win32_x86.h
+++ b/courgette/disassembler_win32_x86.h
@@ -82,8 +82,6 @@ class DisassemblerWin32X86 : public Disassembler {
return base_relocation_table_;
}
- bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
-
// Returns description of the RVA, e.g. ".text+0x1243". For debugging only.
std::string DescribeRVA(RVA rva) const;
diff --git a/courgette/image_utils.h b/courgette/image_utils.h
new file mode 100644
index 0000000..c016357
--- /dev/null
+++ b/courgette/image_utils.h
@@ -0,0 +1,45 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_IMAGE_UTILS_H_
+#define COURGETTE_IMAGE_UTILS_H_
+
+#include "base/basictypes.h"
+
+// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
+// different target addresses are referenced. Purely for debugging.
+#define COURGETTE_HISTOGRAM_TARGETS 0
+
+namespace courgette {
+
+typedef uint32 RVA;
+
+// These helper functions avoid the need for casts in the main code.
+inline uint16 ReadU16(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint16*>(address + offset);
+}
+
+inline uint32 ReadU32(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint32*>(address + offset);
+}
+
+inline uint64 ReadU64(const uint8* address, size_t offset) {
+ return *reinterpret_cast<const uint64*>(address + offset);
+}
+
+inline uint16 Read16LittleEndian(const void* address) {
+ return *reinterpret_cast<const uint16*>(address);
+}
+
+inline uint32 Read32LittleEndian(const void* address) {
+ return *reinterpret_cast<const uint32*>(address);
+}
+
+inline uint64 Read64LittleEndian(const void* address) {
+ return *reinterpret_cast<const uint64*>(address);
+}
+
+} // namespace courgette
+
+#endif // COURGETTE_IMAGE_UTILS_H_
diff --git a/courgette/image_utils_unittest.cc b/courgette/image_utils_unittest.cc
new file mode 100644
index 0000000..a90c19b
--- /dev/null
+++ b/courgette/image_utils_unittest.cc
@@ -0,0 +1,30 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/image_utils.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace courgette {
+
+namespace {
+
+TEST(ImageUtilsTest, Read) {
+ uint8 test_data[] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 00};
+ EXPECT_EQ(0x2301U, Read16LittleEndian(test_data));
+ EXPECT_EQ(0x67452301U, Read32LittleEndian(test_data));
+ EXPECT_EQ(0xEFCDAB8967452301ULL, Read64LittleEndian(test_data));
+
+ // These will break big-endian architectures, which we don't yet support.
+ EXPECT_EQ(0x2301U, ReadU16(test_data, 0));
+ EXPECT_EQ(0x4523U, ReadU16(test_data, 1));
+ EXPECT_EQ(0x67452301U, ReadU32(test_data, 0));
+ EXPECT_EQ(0x89674523U, ReadU32(test_data, 1));
+ EXPECT_EQ(0xEFCDAB8967452301ULL, ReadU64(test_data, 0));
+ EXPECT_EQ(0x00EFCDAB89674523ULL, ReadU64(test_data, 1));
+}
+
+} // namespace
+
+} // namespace courgette
diff --git a/courgette/rel32_finder_win32_x86.cc b/courgette/rel32_finder_win32_x86.cc
new file mode 100644
index 0000000..09419e9
--- /dev/null
+++ b/courgette/rel32_finder_win32_x86.cc
@@ -0,0 +1,113 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/rel32_finder_win32_x86.h"
+
+namespace courgette {
+
+Rel32FinderWin32X86::Rel32FinderWin32X86(
+ RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
+ : relocs_start_rva_(relocs_start_rva),
+ relocs_end_rva_(relocs_end_rva),
+ image_end_rva_(image_end_rva) {
+}
+
+Rel32FinderWin32X86::~Rel32FinderWin32X86() {
+}
+
+void Rel32FinderWin32X86::SwapRel32Locations(std::vector<RVA>* dest) {
+ dest->swap(rel32_locations_);
+}
+
+#if COURGETTE_HISTOGRAM_TARGETS
+void Rel32FinderWin32X86::SwapRel32TargetRVAs(std::map<RVA, int>* dest) {
+ dest->swap(rel32_target_rvas_);
+}
+#endif
+
+Rel32FinderWin32X86_Basic::Rel32FinderWin32X86_Basic(
+ RVA relocs_start_rva, RVA relocs_end_rva, RVA image_end_rva)
+ : Rel32FinderWin32X86(relocs_start_rva, relocs_end_rva, image_end_rva) {
+}
+
+Rel32FinderWin32X86_Basic::~Rel32FinderWin32X86_Basic() {
+}
+
+void Rel32FinderWin32X86_Basic::Find(
+ const uint8* start_pointer,
+ const uint8* end_pointer,
+ RVA start_rva,
+ RVA end_rva,
+ const std::vector<RVA>& abs32_locations) {
+ // Quick way to convert from Pointer to RVA within a single Section is to
+ // subtract 'pointer_to_rva'.
+ const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
+
+ std::vector<RVA>::const_iterator abs32_pos = abs32_locations.begin();
+
+ // Find the rel32 relocations.
+ const uint8* p = start_pointer;
+ while (p < end_pointer) {
+ RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+ if (current_rva == relocs_start_rva_) {
+ if (relocs_start_rva_ < relocs_end_rva_) {
+ p += relocs_end_rva_ - relocs_start_rva_;
+ continue;
+ }
+ }
+
+ //while (abs32_pos != abs32_locations.end() && *abs32_pos < current_rva)
+ // ++abs32_pos;
+
+ // Heuristic discovery of rel32 locations in instruction stream: are the
+ // next few bytes the start of an instruction containing a rel32
+ // addressing mode?
+ const uint8* rel32 = NULL;
+
+ if (p + 5 <= end_pointer) {
+ if (*p == 0xE8 || *p == 0xE9) { // jmp rel32 and call rel32
+ rel32 = p + 1;
+ }
+ }
+ if (p + 6 <= end_pointer) {
+ if (*p == 0x0F && (*(p+1) & 0xF0) == 0x80) { // Jcc long form
+ if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely
+ rel32 = p + 2;
+ }
+ }
+ if (rel32) {
+ RVA rel32_rva = static_cast<RVA>(rel32 - adjust_pointer_to_rva);
+
+ // Is there an abs32 reloc overlapping the candidate?
+ while (abs32_pos != abs32_locations.end() && *abs32_pos < rel32_rva - 3)
+ ++abs32_pos;
+ // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
+ // region that could overlap rel32_rva.
+ if (abs32_pos != abs32_locations.end()) {
+ if (*abs32_pos < rel32_rva + 4) {
+ // Beginning of abs32 reloc is before end of rel32 reloc so they
+ // overlap. Skip four bytes past the abs32 reloc.
+ p += (*abs32_pos + 4) - current_rva;
+ continue;
+ }
+ }
+
+ RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
+ // To be valid, rel32 target must be within image, and within this
+ // section.
+ if (IsValidRVA(target_rva) &&
+ start_rva <= target_rva && target_rva < end_rva) {
+ rel32_locations_.push_back(rel32_rva);
+#if COURGETTE_HISTOGRAM_TARGETS
+ ++rel32_target_rvas_[target_rva];
+#endif
+ p = rel32 + 4;
+ continue;
+ }
+ }
+ p += 1;
+ }
+}
+
+} // namespace courgette
diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h
new file mode 100644
index 0000000..84dfe0b
--- /dev/null
+++ b/courgette/rel32_finder_win32_x86.h
@@ -0,0 +1,77 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_REL32_FINDER_WIN32_X86_H_
+#define COURGETTE_REL32_FINDER_WIN32_X86_H_
+
+#if COURGETTE_HISTOGRAM_TARGETS
+#include <map>
+#endif
+#include <vector>
+
+#include "base/basictypes.h"
+#include "courgette/image_utils.h"
+
+namespace courgette {
+
+// A helper class to scan through a section of code to extract RVAs.
+class Rel32FinderWin32X86 {
+ public:
+ Rel32FinderWin32X86(RVA relocs_start_rva, RVA relocs_end_rva,
+ RVA image_end_rva);
+ virtual ~Rel32FinderWin32X86();
+
+ bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; }
+
+ // Swaps data in |rel32_locations_| to |dest|.
+ void SwapRel32Locations(std::vector<RVA>* dest);
+
+#if COURGETTE_HISTOGRAM_TARGETS
+ // Swaps data in |rel32_target_rvas_| to |dest|.
+ void SwapRel32TargetRVAs(std::map<RVA, int>* dest);
+#endif
+
+ // Scans through [|start_pointer|, |end_pointer|) for rel32 addresses. Seeks
+ // RVAs that satisfy the following:
+ // - Do not collide with |abs32_pos| (assumed sorted).
+ // - Do not collide with |base_relocation_table|'s RVA range,
+ // - Whose targets are in [|start_rva|, |end_rva|).
+ // The sorted results are written to |rel32_locations_|.
+ virtual void Find(const uint8* start_pointer,
+ const uint8* end_pointer,
+ RVA start_rva,
+ RVA end_rva,
+ const std::vector<RVA>& abs32_locations) = 0;
+
+ protected:
+ const RVA relocs_start_rva_;
+ const RVA relocs_end_rva_;
+ const RVA image_end_rva_;
+
+ std::vector<RVA> rel32_locations_;
+
+#if COURGETTE_HISTOGRAM_TARGETS
+ std::map<RVA, int> rel32_target_rvas_;
+#endif
+};
+
+// The basic implementation performs naive scan for rel32 JMP and Jcc opcodes
+// (excluding JPO/JPE) disregarding instruction alignment.
+class Rel32FinderWin32X86_Basic : public Rel32FinderWin32X86 {
+ public:
+ Rel32FinderWin32X86_Basic(RVA relocs_start_rva, RVA relocs_end_rva,
+ RVA image_end_rva);
+ virtual ~Rel32FinderWin32X86_Basic();
+
+ // Rel32FinderWin32X86 implementation.
+ void Find(const uint8* start_pointer,
+ const uint8* end_pointer,
+ RVA start_rva,
+ RVA end_rva,
+ const std::vector<RVA>& abs32_locations) override;
+};
+
+} // namespace courgette
+
+#endif // COURGETTE_REL32_FINDER_WIN32_X86_H_
diff --git a/courgette/rel32_finder_win32_x86_unittest.cc b/courgette/rel32_finder_win32_x86_unittest.cc
new file mode 100644
index 0000000..08eb5c2
--- /dev/null
+++ b/courgette/rel32_finder_win32_x86_unittest.cc
@@ -0,0 +1,149 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/rel32_finder_win32_x86.h"
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+
+#include "base/macros.h"
+#include "courgette/base_test_unittest.h"
+#include "courgette/image_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace courgette {
+
+namespace {
+
+// Helper class to load and execute a Rel32FinderWin32X86 test case.
+class Rel32FinderWin32X86TestCase {
+ public:
+ Rel32FinderWin32X86TestCase(const std::string& test_data)
+ : text_start_rva_(0),
+ text_end_rva_(0),
+ relocs_start_rva_(0),
+ relocs_end_rva_(0),
+ image_end_rva_(0) {
+ LoadTestFromString(test_data);
+ }
+
+ void RunTestBasic(std::string name) {
+ Rel32FinderWin32X86_Basic finder(relocs_start_rva_, relocs_end_rva_,
+ image_end_rva_);
+ ASSERT_FALSE(text_data_.empty());
+ finder.Find(&text_data_[0], &text_data_[0] + text_data_.size(),
+ text_start_rva_, text_end_rva_, abs32_locations_);
+ std::vector<RVA> rel32_locations;
+ finder.SwapRel32Locations(&rel32_locations);
+ EXPECT_EQ(expected_rel32_locations_, rel32_locations)
+ << "From test case " << name << " (addresses are in hex)";
+ }
+
+ private:
+ RVA text_start_rva_;
+ RVA text_end_rva_;
+ RVA relocs_start_rva_;
+ RVA relocs_end_rva_;
+ RVA image_end_rva_;
+ std::vector<uint8> text_data_;
+ std::vector<RVA> abs32_locations_;
+ std::vector<RVA> expected_rel32_locations_;
+
+ // Scans |iss| for the next non-empty line, after removing "#"-style comments
+ // and stripping trailing spaces. On success, returns true and writes the
+ // result to |line_out|. Otherwise returns false.
+ bool ReadNonEmptyLine(std::istringstream& iss, std::string* line_out) {
+ std::string line;
+ while (std::getline(iss, line)) {
+ // Trim comments and trailing spaces.
+ size_t end_pos = std::min(line.find("#"), line.length());
+ while (end_pos > 0 && line[end_pos] == ' ')
+ --end_pos;
+ line.resize(end_pos);
+ if (!line.empty())
+ break;
+ }
+ if (line.empty())
+ return false;
+ line_out->swap(line);
+ return true;
+ }
+
+ // Scans |iss| for the next non-empty line, and reads (hex) uint32 into |v|.
+ // Returns true iff successful.
+ bool ReadHexUInt32(std::istringstream& iss, uint32* v) {
+ std::string line;
+ if (!ReadNonEmptyLine(iss, &line))
+ return false;
+ return sscanf(line.c_str(), "%X", v) == 1;
+ }
+
+ // Initializes the test case by parsing the multi-line string |test_data|
+ // to extract Rel32FinderWin32X86 parameters, and read expected values.
+ void LoadTestFromString(const std::string& test_data) {
+ // The first lines (ignoring empty ones) specify RVA bounds.
+ std::istringstream iss(test_data);
+ ASSERT_TRUE(ReadHexUInt32(iss, &text_start_rva_));
+ ASSERT_TRUE(ReadHexUInt32(iss, &text_end_rva_));
+ ASSERT_TRUE(ReadHexUInt32(iss, &relocs_start_rva_));
+ ASSERT_TRUE(ReadHexUInt32(iss, &relocs_end_rva_));
+ ASSERT_TRUE(ReadHexUInt32(iss, &image_end_rva_));
+
+ std::string line;
+ // The Program section specifies instruction bytes. We require lines to be
+ // formatted in "DUMPBIN /DISASM" style, i.e.,
+ // "00401003: E8 00 00 00 00 call 00401008"
+ // ^ ^ ^ ^ ^ ^
+ // We extract up to 6 bytes per line. The remaining are ignored.
+ const int kBytesBegin = 12;
+ const int kBytesEnd = 17;
+ ReadNonEmptyLine(iss, &line);
+ ASSERT_EQ("Program:", line);
+ while (ReadNonEmptyLine(iss, &line) && line != "Abs32:") {
+ std::string toks = line.substr(kBytesBegin, kBytesEnd);
+ uint32 vals[6];
+ int num_read = sscanf(toks.c_str(), "%X %X %X %X %X %X", &vals[0],
+ &vals[1], &vals[2], &vals[3], &vals[4], &vals[5]);
+ for (int i = 0; i < num_read; ++i)
+ text_data_.push_back(static_cast<uint8>(vals[i] & 0xFF));
+ }
+ ASSERT_FALSE(text_data_.empty());
+
+ // The Abs32 section specifies hex RVAs, one per line.
+ ASSERT_EQ("Abs32:", line);
+ while (ReadNonEmptyLine(iss, &line) && line != "Expected:") {
+ RVA abs32_location;
+ ASSERT_EQ(1, sscanf(line.c_str(), "%X", &abs32_location));
+ abs32_locations_.push_back(abs32_location);
+ }
+
+ // The Expected section specifies hex Rel32 RVAs, one per line.
+ ASSERT_EQ("Expected:", line);
+ while (ReadNonEmptyLine(iss, &line)) {
+ RVA rel32_location;
+ ASSERT_EQ(1, sscanf(line.c_str(), "%X", &rel32_location));
+ expected_rel32_locations_.push_back(rel32_location);
+ }
+ }
+};
+
+class Rel32FinderWin32X86Test : public BaseTest {
+ public:
+ void RunTest(const char* test_case_file) {
+ Rel32FinderWin32X86TestCase test_case(FileContents(test_case_file));
+ test_case.RunTestBasic(test_case_file);
+ }
+};
+
+TEST_F(Rel32FinderWin32X86Test, TestBasic) {
+ RunTest("rel32_win32_x86_01.txt");
+ RunTest("rel32_win32_x86_02.txt");
+ RunTest("rel32_win32_x86_03.txt");
+ RunTest("rel32_win32_x86_04.txt");
+}
+
+} // namespace
+
+} // namespace courgette
diff --git a/courgette/testdata/rel32_win32_x86_01.txt b/courgette/testdata/rel32_win32_x86_01.txt
new file mode 100644
index 0000000..4234c86
--- /dev/null
+++ b/courgette/testdata/rel32_win32_x86_01.txt
@@ -0,0 +1,55 @@
+# Test all rel32 jump instructions.
+
+# .text start RVA and end RVA
+1000
+3000
+# .reloc start RVA and end RVA
+3800
+4000
+# End RVA
+5000
+
+# Assume ImageBase = 00400000. This does not affect the test.
+Program:
+ 00401000: 55 push ebp
+ 00401001: 8B EC mov ebp,esp
+ 00401003: E8 00 00 00 00 call 00401008
+ 00401008: E9 00 00 00 00 jmp 0040100D
+ 0040100D: 0F 80 00 00 00 00 jo 00401013
+ 00401013: 0F 81 00 00 00 00 jno 00401019
+ 00401019: 0F 82 00 00 00 00 jb 0040101F
+ 0040101F: 0F 83 00 00 00 00 jae 00401025
+ 00401025: 0F 84 00 00 00 00 je 0040102B
+ 0040102B: 0F 85 00 00 00 00 jne 00401031
+ 00401031: 0F 86 00 00 00 00 jbe 00401037
+ 00401037: 0F 87 00 00 00 00 ja 0040103D
+ 0040103D: 0F 88 00 00 00 00 js 00401043
+ 00401043: 0F 89 00 00 00 00 jns 00401049
+ 00401049: 0F 8A 00 00 00 00 jp 0040104F # Ignored!
+ 0040104F: 0F 8B 00 00 00 00 jnp 00401055 # Ignored!
+ 00401055: 0F 8C 00 00 00 00 jl 0040105B
+ 0040105B: 0F 8D 00 00 00 00 jge 00401061
+ 00401061: 0F 8E 00 00 00 00 jle 00401067
+ 00401067: 0F 8F 00 00 00 00 jg 0040106D
+ 0040106D: 5D pop ebp
+ 0040106E: C3 ret
+
+Abs32:
+
+Expected:
+1004
+1009
+100F
+1015
+101B
+1021
+1027
+102D
+1033
+1039
+103F
+1045
+1057
+105D
+1063
+1069
diff --git a/courgette/testdata/rel32_win32_x86_02.txt b/courgette/testdata/rel32_win32_x86_02.txt
new file mode 100644
index 0000000..a9e5b12
--- /dev/null
+++ b/courgette/testdata/rel32_win32_x86_02.txt
@@ -0,0 +1,62 @@
+# Test rel32 jump instructions with abs32 and .reloc obstructions.
+
+# .text start RVA and end RVA
+1000
+3000
+# .reloc start RVA and end RVA
+# Typically this won't overlap with .text.
+# Note: If we use 1020, then the algorithm would never encounter it, so the
+# .reloc does not get skipped! This edge case is not worth fixing right now.
+101F
+1030
+# End RVA
+5000
+
+# Assume ImageBase = 00400000. This does not affect the test.
+Program:
+ 00401000: 55 push ebp
+ 00401001: 8B EC mov ebp,esp
+ 00401003: E8 00 00 00 00 call 00401008 # Overlaps with abs32
+ 00401008: E9 00 00 00 00 jmp 0040100D # Overlaps with abs32
+ 0040100D: 0F 80 00 00 00 00 jo 00401013
+ 00401013: 0F 81 00 00 00 00 jno 00401019
+ 00401019: 0F 82 00 00 00 00 jb 0040101F
+ 0040101F: 0F 83 00 00 00 00 jae 00401025 # Overlaps with .reloc
+ 00401025: 0F 84 00 00 00 00 je 0040102B # Overlaps with .reloc
+ 0040102B: 0F 85 00 00 00 00 jne 00401031 # Overlaps with .reloc
+ 00401031: 0F 86 00 00 00 00 jbe 00401037
+ 00401037: 0F 87 00 00 00 00 ja 0040103D
+ 0040103D: 0F 88 00 00 00 00 js 00401043
+ 00401043: 0F 89 00 00 00 00 jns 00401049
+ 00401049: 0F 8A 00 00 00 00 jp 0040104F # Ignored!
+ 0040104F: 0F 8B 00 00 00 00 jnp 00401055 # Ignored!
+ 00401055: 0F 8C 00 00 00 00 jl 0040105B # Overlaps with abs32
+ 0040105B: 0F 8D 00 00 00 00 jge 00401061 # Overlaps with abs32
+ 00401061: 0F 8E 00 00 00 00 jle 00401067
+ 00401067: 0F 8F 00 00 00 00 jg 0040106D
+ 0040106D: 5D pop ebp
+ 0040106E: C3 ret
+
+Abs32:
+1004
+1008
+105A # Straddles across two instruction.
+105F # Same, but covers the op code (and not the target) of the second.
+
+Expected:
+# 1004
+# 1009
+100F
+1015
+101B
+# 1021
+# 1027
+# 102D
+1033
+1039
+103F
+1045
+# 1057
+# 105D
+1063 # Op code 0F 8E overlaps with abs32 105F, but still okay
+1069
diff --git a/courgette/testdata/rel32_win32_x86_03.txt b/courgette/testdata/rel32_win32_x86_03.txt
new file mode 100644
index 0000000..6403b10
--- /dev/null
+++ b/courgette/testdata/rel32_win32_x86_03.txt
@@ -0,0 +1,40 @@
+# Test target validity: only accept target RVA in [1000, 3000).
+
+# .text start RVA and end RVA
+1000
+3000
+# .reloc start RVA and end RVA
+3800
+4000
+# End RVA
+5000
+
+# Assume ImageBase = 00400000. This does not affect the test.
+Program:
+ 00401000: 55 push ebp
+ 00401001: 8B EC mov ebp,esp
+ 00401003: E8 F8 EF FF FF call 00400000 # RVA start, outside .text
+ 00401008: E8 F3 FF FF FF call 00401000
+ 0040100D: E8 ED FF FF FF call 00400FFF # 1 byte before .text
+ 00401012: 90 nop # Padding so E8 & E9 ...
+ 00401013: 90 nop
+ 00401014: E9 E7 FF FF FF jmp 00401000 # ... don't appear here.
+ 00401019: E9 E1 FF FF FF jmp 00400FFF # 1 byte before .text
+ 0040101E: E8 DC 1F 00 00 call 00402FFF
+ 00401023: E8 D8 1F 00 00 call 00403000 # 1 byte after .text
+ 00401028: 0F 87 D1 1F 00 00 ja 00402FFF
+ 0040102E: 0F 88 CC 1F 00 00 js 00403000 # 1 byte after .text
+ 00401034: E8 C6 3F 00 00 call 00404FFF # In image, outside .text
+ 00401039: E8 C2 3F 00 00 call 00405000 # Outside image
+ 0040103E: E8 BE 3F 00 00 call 00405001 # Outside image
+ 00401043: E8 88 88 88 88 call 88C898D0 # Far away
+ 00401048: 5D pop ebp
+ 00401049: C3 ret
+
+Abs32:
+
+Expected:
+1009
+1015
+101F
+102A
diff --git a/courgette/testdata/rel32_win32_x86_04.txt b/courgette/testdata/rel32_win32_x86_04.txt
new file mode 100644
index 0000000..30b4f26
--- /dev/null
+++ b/courgette/testdata/rel32_win32_x86_04.txt
@@ -0,0 +1,47 @@
+# Test false detection cases.
+
+# .text start RVA and end RVA
+1000
+3000
+# .reloc start RVA and end RVA
+3800
+4000
+# End RVA
+5000
+
+# Assume ImageBase = 00400000. This does not affect the test.
+Program:
+ 00401000: 55 push ebp
+ 00401001: 8B EC mov ebp,esp
+ 00401003: B8 E8 00 00 00 mov eax,0E8h # E8 00 00 00 00
+ 00401008: 00 C0 add al,al
+ 0040100A: 90 nop
+ 0040100B: 90 nop
+ 0040100C: B9 00 00 00 E9 mov ecx,0E9000000h # E9 E8 00 00 00
+ 00401011: E8 00 00 00 00 call 00401016
+ 00401016: 90 nop
+ 00401017: 90 nop
+ 00401018: B1 0F mov cl,0Fh # 0F 80 C0 00 00 00
+ 0040101A: 80 C0 00 add al,0
+ 0040101D: 00 00 add byte ptr [eax],al
+ 0040101F: 90 nop
+ 00401020: 90 nop
+ 00401021: B8 E8 00 00 00 mov eax,0E8h # E8 00 00 00 E8
+ 00401026: E8 00 00 00 00 call 0040102B
+ 0040102B: 90 nop
+ 0040102C: 90 nop
+ 0040102D: E8 00 E9 00 00 call 0040F932 # E9 00 00 00 00
+ 00401032: 00 00 add byte ptr [eax],al
+ 00401034: 5D pop ebp
+ 00401035: C3 ret
+
+Abs32:
+
+Expected:
+1005 # False positive
+1011 # False positive
+# 1012 => False negative: shadowed by 1011
+101B # False positive
+# 1023 => Potential false positive, but suppressed since target is outside .text
+1027 # Emerges since it's not shadowed by 1023
+1030 # False positive: target of 102E was outside .text, so fall back to this
diff --git a/courgette/typedrva_unittest.cc b/courgette/typedrva_unittest.cc
index 1fa185f..dc30fa9 100644
--- a/courgette/typedrva_unittest.cc
+++ b/courgette/typedrva_unittest.cc
@@ -5,6 +5,7 @@
#include "courgette/base_test_unittest.h"
#include "courgette/disassembler_elf_32_arm.h"
#include "courgette/disassembler_elf_32_x86.h"
+#include "courgette/image_utils.h"
class TypedRVATest : public BaseTest {
public: