summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorhuangs <huangs@chromium.org>2015-12-07 17:27:46 -0800
committerCommit bot <commit-bot@chromium.org>2015-12-08 01:28:46 +0000
commit7a2fea2555a8014c4bdcdbc1119b97f98e057248 (patch)
treec16e6282db8e51c0d6f37adde6b7effc14a20b09 /courgette
parent56c43a002072a04abe25a77f400a1e3425ab8991 (diff)
downloadchromium_src-7a2fea2555a8014c4bdcdbc1119b97f98e057248.zip
chromium_src-7a2fea2555a8014c4bdcdbc1119b97f98e057248.tar.gz
chromium_src-7a2fea2555a8014c4bdcdbc1119b97f98e057248.tar.bz2
[Courgette] Initial Implementation of LabelManager
This is part of the effort to reduce Courgette's peak memory. Main changes: - Moving Label to image_utils.h, and change Label::count_ from int to int32. - Adding utility class ConsecutiveRangeVisitor, with tests. - Adding LabelManager, with tests. The new code is not yet used in production. Review URL: https://codereview.chromium.org/1491703003 Cr-Commit-Position: refs/heads/master@{#363688}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/BUILD.gn5
-rw-r--r--courgette/assembly_program.h18
-rw-r--r--courgette/consecutive_range_visitor.h57
-rw-r--r--courgette/consecutive_range_visitor_unittest.cc70
-rw-r--r--courgette/courgette.gyp5
-rw-r--r--courgette/image_utils.h20
-rw-r--r--courgette/label_manager.cc74
-rw-r--r--courgette/label_manager.h64
-rw-r--r--courgette/label_manager_unittest.cc143
9 files changed, 439 insertions, 17 deletions
diff --git a/courgette/BUILD.gn b/courgette/BUILD.gn
index bc9a9f6..f27c81e 100644
--- a/courgette/BUILD.gn
+++ b/courgette/BUILD.gn
@@ -11,6 +11,7 @@ static_library("courgette_lib") {
"adjustment_method_2.cc",
"assembly_program.cc",
"assembly_program.h",
+ "consecutive_range_visitor.h",
"courgette.h",
"crc.cc",
"crc.h",
@@ -35,6 +36,8 @@ static_library("courgette_lib") {
"ensemble_apply.cc",
"ensemble_create.cc",
"image_utils.h",
+ "label_manager.cc",
+ "label_manager.h",
"memory_allocator.cc",
"memory_allocator.h",
"patch_generator_x86_32.h",
@@ -100,6 +103,7 @@ test("courgette_unittests") {
"base_test_unittest.cc",
"base_test_unittest.h",
"bsdiff_memory_unittest.cc",
+ "consecutive_range_visitor_unittest.cc",
"difference_estimator_unittest.cc",
"disassembler_elf_32_x86_unittest.cc",
"disassembler_win32_x64_unittest.cc",
@@ -108,6 +112,7 @@ test("courgette_unittests") {
"encoded_program_unittest.cc",
"ensemble_unittest.cc",
"image_utils_unittest.cc",
+ "label_manager_unittest.cc",
"memory_allocator_unittest.cc",
"rel32_finder_win32_x86_unittest.cc",
"streams_unittest.cc",
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
index ba7ad59f18..88cf2e8 100644
--- a/courgette/assembly_program.h
+++ b/courgette/assembly_program.h
@@ -13,29 +13,13 @@
#include "base/memory/scoped_ptr.h"
#include "courgette/disassembler.h"
+#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
namespace courgette {
class EncodedProgram;
-// A Label is a symbolic reference to an address. Unlike a conventional
-// assembly language, we always know the address. The address will later be
-// stored in a table and the Label will be replaced with the index into the
-// table.
-//
-// TODO(sra): Make fields private and add setters and getters.
-class Label {
- public:
- static const int kNoIndex = -1;
- Label() : rva_(0), index_(kNoIndex), count_(0) {}
- explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
-
- RVA rva_; // Address referred to by the label.
- int index_; // Index of address in address table, kNoIndex until assigned.
- int count_;
-};
-
typedef std::map<RVA, Label*> RVAToLabel;
// Opcodes of simple assembly language
diff --git a/courgette/consecutive_range_visitor.h b/courgette/consecutive_range_visitor.h
new file mode 100644
index 0000000..562409d
--- /dev/null
+++ b/courgette/consecutive_range_visitor.h
@@ -0,0 +1,57 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
+#define COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
+
+#include <iterator>
+
+#include "base/macros.h"
+
+namespace courgette {
+
+// Usage note: First check whether std::unique() would suffice.
+//
+// ConsecutiveRangeVisitor is a visitor to read equal consecutive items
+// ("ranges") between two iterators. The base value of InputIterator must
+// implement the == operator.
+//
+// Example: "AAAAABZZZZOO" consists of ranges ["AAAAA", "B", "ZZZZ", "OO"]. The
+// visitor provides accessors to iterate through the ranges, and to access each
+// range's value and repeat, i.e., [('A', 5), ('B', 1), ('Z', 4), ('O', 2)].
+template <class InputIterator>
+class ConsecutiveRangeVisitor {
+ public:
+ ConsecutiveRangeVisitor(InputIterator begin, InputIterator end)
+ : head_(begin), end_(end) {
+ advance();
+ }
+
+ // Returns whether there are more ranges to traverse.
+ bool has_more() const { return tail_ != end_; }
+
+ // Returns an iterator to an element in the current range.
+ InputIterator cur() const { return tail_; }
+
+ // Returns the number of repeated elements in the current range.
+ size_t repeat() const { return std::distance(tail_, head_); }
+
+ // Advances to the next range.
+ void advance() {
+ tail_ = head_;
+ if (head_ != end_)
+ while (++head_ != end_ && *head_ == *tail_) {}
+ }
+
+ private:
+ InputIterator tail_; // The trailing pionter of a range (inclusive).
+ InputIterator head_; // The leading pointer of a range (exclusive).
+ InputIterator end_; // Store the end pointer so we know when to stop.
+
+ DISALLOW_COPY_AND_ASSIGN(ConsecutiveRangeVisitor);
+};
+
+} // namespace courgette
+
+#endif // COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
diff --git a/courgette/consecutive_range_visitor_unittest.cc b/courgette/consecutive_range_visitor_unittest.cc
new file mode 100644
index 0000000..648a6d7
--- /dev/null
+++ b/courgette/consecutive_range_visitor_unittest.cc
@@ -0,0 +1,70 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/consecutive_range_visitor.h"
+
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace courgette {
+
+TEST(ConsecutiveRangeVisitorTest, Basic) {
+ std::string s = "AAAAABZZZZOO";
+ ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ('A', *vis.cur());
+ EXPECT_EQ(5U, vis.repeat());
+ vis.advance();
+
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ('B', *vis.cur());
+ EXPECT_EQ(1U, vis.repeat());
+ vis.advance();
+
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ('Z', *vis.cur());
+ EXPECT_EQ(4U, vis.repeat());
+ vis.advance();
+
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ('O', *vis.cur());
+ EXPECT_EQ(2U, vis.repeat());
+ vis.advance();
+
+ EXPECT_FALSE(vis.has_more());
+}
+
+TEST(ConsecutiveRangeVisitorTest, UnitRanges) {
+ // Unsorted, no consecutive characters.
+ const char s[] = "elephant elephant";
+ ConsecutiveRangeVisitor<const char*> vis(std::begin(s), std::end(s) - 1);
+ for (const char* scan = &s[0]; *scan; ++scan) {
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ(*scan, *vis.cur());
+ EXPECT_EQ(1U, vis.repeat());
+ vis.advance();
+ }
+ EXPECT_FALSE(vis.has_more());
+}
+
+TEST(ConsecutiveRangeVisitorTest, SingleRange) {
+ for (size_t len = 1U; len < 10U; ++len) {
+ std::vector<int> v(len, 137);
+ ConsecutiveRangeVisitor<std::vector<int>::iterator> vis(v.begin(), v.end());
+ EXPECT_TRUE(vis.has_more());
+ EXPECT_EQ(137, *vis.cur());
+ EXPECT_EQ(len, vis.repeat());
+ vis.advance();
+ EXPECT_FALSE(vis.has_more());
+ }
+}
+
+TEST(ConsecutiveRangeVisitorTest, Empty) {
+ std::string s;
+ ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
+ EXPECT_FALSE(vis.has_more());
+}
+
+} // namespace courgette
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp
index e5ec783..788d4ea 100644
--- a/courgette/courgette.gyp
+++ b/courgette/courgette.gyp
@@ -11,6 +11,7 @@
'adjustment_method.h',
'assembly_program.cc',
'assembly_program.h',
+ 'consecutive_range_visitor.h',
'courgette.h',
'crc.cc',
'crc.h',
@@ -35,6 +36,8 @@
'ensemble_apply.cc',
'ensemble_create.cc',
'image_utils.h',
+ 'label_manager.cc',
+ 'label_manager.h',
'memory_allocator.cc',
'memory_allocator.h',
'region.h',
@@ -102,6 +105,7 @@
'bsdiff_memory_unittest.cc',
'base_test_unittest.cc',
'base_test_unittest.h',
+ 'consecutive_range_visitor_unittest.cc',
'difference_estimator_unittest.cc',
'disassembler_elf_32_x86_unittest.cc',
'disassembler_win32_x86_unittest.cc',
@@ -110,6 +114,7 @@
'encode_decode_unittest.cc',
'ensemble_unittest.cc',
'image_utils_unittest.cc',
+ 'label_manager_unittest.cc',
'memory_allocator_unittest.cc',
'rel32_finder_win32_x86_unittest.cc',
'streams_unittest.cc',
diff --git a/courgette/image_utils.h b/courgette/image_utils.h
index c016357..47ba4f4 100644
--- a/courgette/image_utils.h
+++ b/courgette/image_utils.h
@@ -15,6 +15,26 @@ namespace courgette {
typedef uint32 RVA;
+// A Label is a symbolic reference to an address. Unlike a conventional
+// assembly language, we always know the address. The address will later be
+// stored in a table and the Label will be replaced with the index into the
+// table.
+// TODO(huangs): Make this a struct, and remove "_" from member names.
+class Label {
+ public:
+ enum : int { kNoIndex = -1 };
+ explicit Label(RVA rva) : rva_(rva) {}
+
+ bool operator==(const Label& other) const {
+ return rva_ == other.rva_ && index_ == other.index_ &&
+ count_ == other.count_;
+ }
+
+ RVA rva_ = 0; // Address referred to by the label.
+ int index_ = kNoIndex; // Index of address in address table.
+ int32 count_ = 0;
+};
+
// These helper functions avoid the need for casts in the main code.
inline uint16 ReadU16(const uint8* address, size_t offset) {
return *reinterpret_cast<const uint16*>(address + offset);
diff --git a/courgette/label_manager.cc b/courgette/label_manager.cc
new file mode 100644
index 0000000..14b6b4f
--- /dev/null
+++ b/courgette/label_manager.cc
@@ -0,0 +1,74 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/label_manager.h"
+
+#include <algorithm>
+
+#include "base/logging.h"
+#include "base/numerics/safe_math.h"
+#include "courgette/consecutive_range_visitor.h"
+
+namespace courgette {
+
+LabelManager::RvaVisitor::~RvaVisitor() {}
+
+LabelManager::LabelManager() {}
+
+LabelManager::~LabelManager() {}
+
+// We wish to minimize peak memory usage here. Analysis: Let
+// m = number of (RVA) elements in |rva_visitor|,
+// n = number of distinct (RVA) elements in |rva_visitor|.
+// The final storage is n * sizeof(Label) bytes. During computation we uniquify
+// m RVAs, and count repeats. Taking sizeof(RVA) = 4, an implementation using
+// std::map or std::unordered_map would consume additionally 32 * n bytes.
+// Meanwhile, our std::vector implementation consumes additionally 4 * m bytes
+// For our typical usage (i.e. Chrome) we see m = ~4n, so we use 16 * n bytes of
+// extra contiguous memory during computation. Assuming memory fragmentation
+// would not be an issue, this is much better than using std::map.
+void LabelManager::Read(RvaVisitor* rva_visitor) {
+ // Write all values in |rva_visitor| to |rvas|.
+ size_t num_rva = rva_visitor->Remaining();
+ std::vector<RVA> rvas(num_rva);
+ for (size_t i = 0; i < num_rva; ++i, rva_visitor->Next())
+ rvas[i] = rva_visitor->Get();
+
+ // Sort |rvas|, then count the number of distinct values.
+ using CRV = ConsecutiveRangeVisitor<std::vector<RVA>::iterator>;
+ std::sort(rvas.begin(), rvas.end());
+ size_t num_distinct_rva = 0;
+ for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance())
+ ++num_distinct_rva;
+
+ // Reserve space for |labels_|, populate with sorted RVA and repeats.
+ DCHECK(labels_.empty());
+ labels_.reserve(num_distinct_rva);
+ for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) {
+ labels_.push_back(Label(*it.cur()));
+ base::CheckedNumeric<uint32> count = it.repeat();
+ labels_.back().count_ = count.ValueOrDie();
+ }
+}
+
+void LabelManager::RemoveUnderusedLabels(int32 count_threshold) {
+ if (count_threshold <= 0)
+ return;
+ labels_.erase(std::remove_if(labels_.begin(), labels_.end(),
+ [count_threshold](const Label& label) {
+ return label.count_ < count_threshold;
+ }),
+ labels_.end());
+ // Not shrinking |labels_|, since this may cause reallocation.
+}
+
+// Uses binary search to find |rva|.
+Label* LabelManager::Find(RVA rva) {
+ auto it = std::lower_bound(
+ labels_.begin(), labels_.end(), Label(rva),
+ [](const Label& l1, const Label& l2) { return l1.rva_ < l2.rva_; });
+ return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it);
+}
+
+} // namespace courgette
diff --git a/courgette/label_manager.h b/courgette/label_manager.h
new file mode 100644
index 0000000..595e2e7
--- /dev/null
+++ b/courgette/label_manager.h
@@ -0,0 +1,64 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_LABEL_MANAGER_H_
+#define COURGETTE_LABEL_MANAGER_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "courgette/image_utils.h"
+
+namespace courgette {
+
+// A container to store and manage Label instances. A key consideration is peak
+// memory usage reduction. To this end we preallocate Label instances in bulk,
+// and carefully control transient memory usage when initializing Labels.
+class LabelManager {
+ public:
+ // An adaptor to sequentially traverse multiple RVAs. This is useful for RVA
+ // translation without extra storage. For example, we might have a stored list
+ // of RVA locations, but we want to traverse the matching RVA targets.
+ class RvaVisitor {
+ public:
+ virtual ~RvaVisitor();
+
+ // Returns the number of remaining RVAs to visit.
+ virtual size_t Remaining() const = 0;
+
+ // Returns the current RVA.
+ virtual RVA Get() const = 0;
+
+ // Advances to the next RVA.
+ virtual void Next() = 0;
+ };
+
+ LabelManager();
+ virtual ~LabelManager();
+
+ // Initializes |labels_| using RVAs from |rva_visitor|. Each distinct RVA from
+ // |rva_visitor| yields a Label with |rva_| assigned as the RVA, and |count_|
+ // assigned as the repeat.
+ void Read(RvaVisitor* rva_visitor);
+
+ // Removes |labels_| elements whose |count_| is less than |count_threshold|.
+ void RemoveUnderusedLabels(int32 count_threshold);
+
+ // Efficiently searches for a Label that targets |rva|. Returns the pointer to
+ // the stored Label instance if found, or null otherwise.
+ Label* Find(RVA rva);
+
+ // TODO(huangs): Move AssignRemainingIndexes() here.
+
+ protected:
+ // The main list of Label instances, sorted by the |rva_| member.
+ std::vector<Label> labels_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LabelManager);
+};
+
+} // namespace courgette
+
+#endif // COURGETTE_LABEL_MANAGER_H_
diff --git a/courgette/label_manager_unittest.cc b/courgette/label_manager_unittest.cc
new file mode 100644
index 0000000..258d411
--- /dev/null
+++ b/courgette/label_manager_unittest.cc
@@ -0,0 +1,143 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/label_manager.h"
+
+#include <iterator>
+#include <map>
+#include <utility>
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace courgette {
+
+namespace {
+
+// Test version of RvaVisitor: Just wrap std::vector<RVA>.
+class TestRvaVisitor : public LabelManager::RvaVisitor {
+ public:
+ explicit TestRvaVisitor(std::vector<RVA>::const_iterator rva_begin,
+ std::vector<RVA>::const_iterator rva_end)
+ : rva_it_(rva_begin), rva_end_(rva_end) {}
+
+ ~TestRvaVisitor() override {}
+
+ size_t Remaining() const override { return std::distance(rva_it_, rva_end_); }
+
+ RVA Get() const override { return *rva_it_; }
+
+ void Next() override { ++rva_it_; }
+
+ private:
+ std::vector<RVA>::const_iterator rva_it_;
+ std::vector<RVA>::const_iterator rva_end_;
+};
+
+// Test version of LabelManager: Expose data to test implementation.
+class TestLabelManager : public LabelManager {
+ public:
+ const size_t LabelCount() const { return labels_.size(); };
+};
+
+void CheckLabelManagerContent(TestLabelManager* label_manager,
+ const std::map<RVA, int32>& expected) {
+ EXPECT_EQ(expected.size(), label_manager->LabelCount());
+ for (const auto& rva_and_count : expected) {
+ Label* label = label_manager->Find(rva_and_count.first);
+ EXPECT_TRUE(label != nullptr);
+ EXPECT_EQ(rva_and_count.first, label->rva_);
+ EXPECT_EQ(rva_and_count.second, label->count_);
+ }
+}
+
+} // namespace
+
+TEST(LabelManagerTest, Basic) {
+ static const RVA kTestTargetsRaw[] = {
+ 0x04000010,
+ 0x04000030,
+ 0x04000020,
+ 0x04000010, // Redundant
+ 0xFEEDF00D,
+ 0x04000030, // Redundant
+ 0xFEEDF00D, // Redundant
+ 0x00000110,
+ 0x04000010, // Redundant
+ 0xABCD1234
+ };
+ std::vector<RVA> test_targets(std::begin(kTestTargetsRaw),
+ std::end(kTestTargetsRaw));
+ TestRvaVisitor visitor(test_targets.begin(), test_targets.end());
+
+ // Preallocate targets, then populate.
+ TestLabelManager label_manager;
+ label_manager.Read(&visitor);
+
+ static const std::pair<RVA, int32> kExpected1Raw[] = {
+ {0x00000110, 1},
+ {0x04000010, 3},
+ {0x04000020, 1},
+ {0x04000030, 2},
+ {0xABCD1234, 1},
+ {0xFEEDF00D, 2}
+ };
+ std::map<RVA, int32> expected1(std::begin(kExpected1Raw),
+ std::end(kExpected1Raw));
+
+ CheckLabelManagerContent(&label_manager, expected1);
+
+ // Expect to *not* find labels for various RVAs that were never added.
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0x00000000)));
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0x0400000F)));
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0x04000011)));
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0x5F3759DF)));
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0xFEEDFFF0)));
+ EXPECT_EQ(nullptr, label_manager.Find(RVA(0xFFFFFFFF)));
+
+ // Remove Labels with |count_| < 2.
+ label_manager.RemoveUnderusedLabels(2);
+ static const std::pair<RVA, int32> kExpected2Raw[] = {
+ {0x04000010, 3},
+ {0x04000030, 2},
+ {0xFEEDF00D, 2}
+ };
+ std::map<RVA, int32> expected2(std::begin(kExpected2Raw),
+ std::end(kExpected2Raw));
+ CheckLabelManagerContent(&label_manager, expected2);
+}
+
+TEST(LabelManagerTest, Single) {
+ const RVA kRva = 12U;
+ for (int dup = 1; dup < 8; ++dup) {
+ // Test data: |dup| copies of kRva.
+ std::vector<RVA> test_targets(dup, kRva);
+ TestRvaVisitor visitor(test_targets.begin(), test_targets.end());
+ TestLabelManager label_manager;
+ label_manager.Read(&visitor);
+ EXPECT_EQ(1U, label_manager.LabelCount()); // Deduped to 1 Label.
+
+ Label* label = label_manager.Find(kRva);
+ EXPECT_NE(nullptr, label);
+ EXPECT_EQ(kRva, label->rva_);
+ EXPECT_EQ(dup, label->count_);
+
+ for (RVA rva = 0U; rva < 16U; ++rva) {
+ if (rva != kRva)
+ EXPECT_EQ(nullptr, label_manager.Find(rva));
+ }
+ }
+}
+
+TEST(LabelManagerTest, Empty) {
+ std::vector<RVA> empty_test_targets;
+ TestRvaVisitor visitor(empty_test_targets.begin(), empty_test_targets.end());
+ TestLabelManager label_manager;
+ label_manager.Read(&visitor);
+ EXPECT_EQ(0U, label_manager.LabelCount());
+ for (RVA rva = 0U; rva < 16U; ++rva)
+ EXPECT_EQ(nullptr, label_manager.Find(rva));
+}
+
+} // namespace courgette