From 7a2fea2555a8014c4bdcdbc1119b97f98e057248 Mon Sep 17 00:00:00 2001 From: huangs Date: Mon, 7 Dec 2015 17:27:46 -0800 Subject: [Courgette] Initial Implementation of LabelManager This is part of the effort to reduce Courgette's peak memory. Main changes: - Moving Label to image_utils.h, and change Label::count_ from int to int32. - Adding utility class ConsecutiveRangeVisitor, with tests. - Adding LabelManager, with tests. The new code is not yet used in production. Review URL: https://codereview.chromium.org/1491703003 Cr-Commit-Position: refs/heads/master@{#363688} --- courgette/BUILD.gn | 5 + courgette/assembly_program.h | 18 +-- courgette/consecutive_range_visitor.h | 57 ++++++++++ courgette/consecutive_range_visitor_unittest.cc | 70 ++++++++++++ courgette/courgette.gyp | 5 + courgette/image_utils.h | 20 ++++ courgette/label_manager.cc | 74 ++++++++++++ courgette/label_manager.h | 64 +++++++++++ courgette/label_manager_unittest.cc | 143 ++++++++++++++++++++++++ 9 files changed, 439 insertions(+), 17 deletions(-) create mode 100644 courgette/consecutive_range_visitor.h create mode 100644 courgette/consecutive_range_visitor_unittest.cc create mode 100644 courgette/label_manager.cc create mode 100644 courgette/label_manager.h create mode 100644 courgette/label_manager_unittest.cc diff --git a/courgette/BUILD.gn b/courgette/BUILD.gn index bc9a9f6..f27c81e 100644 --- a/courgette/BUILD.gn +++ b/courgette/BUILD.gn @@ -11,6 +11,7 @@ static_library("courgette_lib") { "adjustment_method_2.cc", "assembly_program.cc", "assembly_program.h", + "consecutive_range_visitor.h", "courgette.h", "crc.cc", "crc.h", @@ -35,6 +36,8 @@ static_library("courgette_lib") { "ensemble_apply.cc", "ensemble_create.cc", "image_utils.h", + "label_manager.cc", + "label_manager.h", "memory_allocator.cc", "memory_allocator.h", "patch_generator_x86_32.h", @@ -100,6 +103,7 @@ test("courgette_unittests") { "base_test_unittest.cc", "base_test_unittest.h", "bsdiff_memory_unittest.cc", + "consecutive_range_visitor_unittest.cc", "difference_estimator_unittest.cc", "disassembler_elf_32_x86_unittest.cc", "disassembler_win32_x64_unittest.cc", @@ -108,6 +112,7 @@ test("courgette_unittests") { "encoded_program_unittest.cc", "ensemble_unittest.cc", "image_utils_unittest.cc", + "label_manager_unittest.cc", "memory_allocator_unittest.cc", "rel32_finder_win32_x86_unittest.cc", "streams_unittest.cc", diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h index ba7ad59f18..88cf2e8 100644 --- a/courgette/assembly_program.h +++ b/courgette/assembly_program.h @@ -13,29 +13,13 @@ #include "base/memory/scoped_ptr.h" #include "courgette/disassembler.h" +#include "courgette/image_utils.h" #include "courgette/memory_allocator.h" namespace courgette { class EncodedProgram; -// A Label is a symbolic reference to an address. Unlike a conventional -// assembly language, we always know the address. The address will later be -// stored in a table and the Label will be replaced with the index into the -// table. -// -// TODO(sra): Make fields private and add setters and getters. -class Label { - public: - static const int kNoIndex = -1; - Label() : rva_(0), index_(kNoIndex), count_(0) {} - explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {} - - RVA rva_; // Address referred to by the label. - int index_; // Index of address in address table, kNoIndex until assigned. - int count_; -}; - typedef std::map RVAToLabel; // Opcodes of simple assembly language diff --git a/courgette/consecutive_range_visitor.h b/courgette/consecutive_range_visitor.h new file mode 100644 index 0000000..562409d --- /dev/null +++ b/courgette/consecutive_range_visitor.h @@ -0,0 +1,57 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_ +#define COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_ + +#include + +#include "base/macros.h" + +namespace courgette { + +// Usage note: First check whether std::unique() would suffice. +// +// ConsecutiveRangeVisitor is a visitor to read equal consecutive items +// ("ranges") between two iterators. The base value of InputIterator must +// implement the == operator. +// +// Example: "AAAAABZZZZOO" consists of ranges ["AAAAA", "B", "ZZZZ", "OO"]. The +// visitor provides accessors to iterate through the ranges, and to access each +// range's value and repeat, i.e., [('A', 5), ('B', 1), ('Z', 4), ('O', 2)]. +template +class ConsecutiveRangeVisitor { + public: + ConsecutiveRangeVisitor(InputIterator begin, InputIterator end) + : head_(begin), end_(end) { + advance(); + } + + // Returns whether there are more ranges to traverse. + bool has_more() const { return tail_ != end_; } + + // Returns an iterator to an element in the current range. + InputIterator cur() const { return tail_; } + + // Returns the number of repeated elements in the current range. + size_t repeat() const { return std::distance(tail_, head_); } + + // Advances to the next range. + void advance() { + tail_ = head_; + if (head_ != end_) + while (++head_ != end_ && *head_ == *tail_) {} + } + + private: + InputIterator tail_; // The trailing pionter of a range (inclusive). + InputIterator head_; // The leading pointer of a range (exclusive). + InputIterator end_; // Store the end pointer so we know when to stop. + + DISALLOW_COPY_AND_ASSIGN(ConsecutiveRangeVisitor); +}; + +} // namespace courgette + +#endif // COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_ diff --git a/courgette/consecutive_range_visitor_unittest.cc b/courgette/consecutive_range_visitor_unittest.cc new file mode 100644 index 0000000..648a6d7 --- /dev/null +++ b/courgette/consecutive_range_visitor_unittest.cc @@ -0,0 +1,70 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/consecutive_range_visitor.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +namespace courgette { + +TEST(ConsecutiveRangeVisitorTest, Basic) { + std::string s = "AAAAABZZZZOO"; + ConsecutiveRangeVisitor vis(s.begin(), s.end()); + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ('A', *vis.cur()); + EXPECT_EQ(5U, vis.repeat()); + vis.advance(); + + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ('B', *vis.cur()); + EXPECT_EQ(1U, vis.repeat()); + vis.advance(); + + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ('Z', *vis.cur()); + EXPECT_EQ(4U, vis.repeat()); + vis.advance(); + + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ('O', *vis.cur()); + EXPECT_EQ(2U, vis.repeat()); + vis.advance(); + + EXPECT_FALSE(vis.has_more()); +} + +TEST(ConsecutiveRangeVisitorTest, UnitRanges) { + // Unsorted, no consecutive characters. + const char s[] = "elephant elephant"; + ConsecutiveRangeVisitor vis(std::begin(s), std::end(s) - 1); + for (const char* scan = &s[0]; *scan; ++scan) { + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ(*scan, *vis.cur()); + EXPECT_EQ(1U, vis.repeat()); + vis.advance(); + } + EXPECT_FALSE(vis.has_more()); +} + +TEST(ConsecutiveRangeVisitorTest, SingleRange) { + for (size_t len = 1U; len < 10U; ++len) { + std::vector v(len, 137); + ConsecutiveRangeVisitor::iterator> vis(v.begin(), v.end()); + EXPECT_TRUE(vis.has_more()); + EXPECT_EQ(137, *vis.cur()); + EXPECT_EQ(len, vis.repeat()); + vis.advance(); + EXPECT_FALSE(vis.has_more()); + } +} + +TEST(ConsecutiveRangeVisitorTest, Empty) { + std::string s; + ConsecutiveRangeVisitor vis(s.begin(), s.end()); + EXPECT_FALSE(vis.has_more()); +} + +} // namespace courgette diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp index e5ec783..788d4ea 100644 --- a/courgette/courgette.gyp +++ b/courgette/courgette.gyp @@ -11,6 +11,7 @@ 'adjustment_method.h', 'assembly_program.cc', 'assembly_program.h', + 'consecutive_range_visitor.h', 'courgette.h', 'crc.cc', 'crc.h', @@ -35,6 +36,8 @@ 'ensemble_apply.cc', 'ensemble_create.cc', 'image_utils.h', + 'label_manager.cc', + 'label_manager.h', 'memory_allocator.cc', 'memory_allocator.h', 'region.h', @@ -102,6 +105,7 @@ 'bsdiff_memory_unittest.cc', 'base_test_unittest.cc', 'base_test_unittest.h', + 'consecutive_range_visitor_unittest.cc', 'difference_estimator_unittest.cc', 'disassembler_elf_32_x86_unittest.cc', 'disassembler_win32_x86_unittest.cc', @@ -110,6 +114,7 @@ 'encode_decode_unittest.cc', 'ensemble_unittest.cc', 'image_utils_unittest.cc', + 'label_manager_unittest.cc', 'memory_allocator_unittest.cc', 'rel32_finder_win32_x86_unittest.cc', 'streams_unittest.cc', diff --git a/courgette/image_utils.h b/courgette/image_utils.h index c016357..47ba4f4 100644 --- a/courgette/image_utils.h +++ b/courgette/image_utils.h @@ -15,6 +15,26 @@ namespace courgette { typedef uint32 RVA; +// A Label is a symbolic reference to an address. Unlike a conventional +// assembly language, we always know the address. The address will later be +// stored in a table and the Label will be replaced with the index into the +// table. +// TODO(huangs): Make this a struct, and remove "_" from member names. +class Label { + public: + enum : int { kNoIndex = -1 }; + explicit Label(RVA rva) : rva_(rva) {} + + bool operator==(const Label& other) const { + return rva_ == other.rva_ && index_ == other.index_ && + count_ == other.count_; + } + + RVA rva_ = 0; // Address referred to by the label. + int index_ = kNoIndex; // Index of address in address table. + int32 count_ = 0; +}; + // These helper functions avoid the need for casts in the main code. inline uint16 ReadU16(const uint8* address, size_t offset) { return *reinterpret_cast(address + offset); diff --git a/courgette/label_manager.cc b/courgette/label_manager.cc new file mode 100644 index 0000000..14b6b4f --- /dev/null +++ b/courgette/label_manager.cc @@ -0,0 +1,74 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/label_manager.h" + +#include + +#include "base/logging.h" +#include "base/numerics/safe_math.h" +#include "courgette/consecutive_range_visitor.h" + +namespace courgette { + +LabelManager::RvaVisitor::~RvaVisitor() {} + +LabelManager::LabelManager() {} + +LabelManager::~LabelManager() {} + +// We wish to minimize peak memory usage here. Analysis: Let +// m = number of (RVA) elements in |rva_visitor|, +// n = number of distinct (RVA) elements in |rva_visitor|. +// The final storage is n * sizeof(Label) bytes. During computation we uniquify +// m RVAs, and count repeats. Taking sizeof(RVA) = 4, an implementation using +// std::map or std::unordered_map would consume additionally 32 * n bytes. +// Meanwhile, our std::vector implementation consumes additionally 4 * m bytes +// For our typical usage (i.e. Chrome) we see m = ~4n, so we use 16 * n bytes of +// extra contiguous memory during computation. Assuming memory fragmentation +// would not be an issue, this is much better than using std::map. +void LabelManager::Read(RvaVisitor* rva_visitor) { + // Write all values in |rva_visitor| to |rvas|. + size_t num_rva = rva_visitor->Remaining(); + std::vector rvas(num_rva); + for (size_t i = 0; i < num_rva; ++i, rva_visitor->Next()) + rvas[i] = rva_visitor->Get(); + + // Sort |rvas|, then count the number of distinct values. + using CRV = ConsecutiveRangeVisitor::iterator>; + std::sort(rvas.begin(), rvas.end()); + size_t num_distinct_rva = 0; + for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) + ++num_distinct_rva; + + // Reserve space for |labels_|, populate with sorted RVA and repeats. + DCHECK(labels_.empty()); + labels_.reserve(num_distinct_rva); + for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) { + labels_.push_back(Label(*it.cur())); + base::CheckedNumeric count = it.repeat(); + labels_.back().count_ = count.ValueOrDie(); + } +} + +void LabelManager::RemoveUnderusedLabels(int32 count_threshold) { + if (count_threshold <= 0) + return; + labels_.erase(std::remove_if(labels_.begin(), labels_.end(), + [count_threshold](const Label& label) { + return label.count_ < count_threshold; + }), + labels_.end()); + // Not shrinking |labels_|, since this may cause reallocation. +} + +// Uses binary search to find |rva|. +Label* LabelManager::Find(RVA rva) { + auto it = std::lower_bound( + labels_.begin(), labels_.end(), Label(rva), + [](const Label& l1, const Label& l2) { return l1.rva_ < l2.rva_; }); + return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it); +} + +} // namespace courgette diff --git a/courgette/label_manager.h b/courgette/label_manager.h new file mode 100644 index 0000000..595e2e7 --- /dev/null +++ b/courgette/label_manager.h @@ -0,0 +1,64 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COURGETTE_LABEL_MANAGER_H_ +#define COURGETTE_LABEL_MANAGER_H_ + +#include + +#include "base/macros.h" +#include "courgette/image_utils.h" + +namespace courgette { + +// A container to store and manage Label instances. A key consideration is peak +// memory usage reduction. To this end we preallocate Label instances in bulk, +// and carefully control transient memory usage when initializing Labels. +class LabelManager { + public: + // An adaptor to sequentially traverse multiple RVAs. This is useful for RVA + // translation without extra storage. For example, we might have a stored list + // of RVA locations, but we want to traverse the matching RVA targets. + class RvaVisitor { + public: + virtual ~RvaVisitor(); + + // Returns the number of remaining RVAs to visit. + virtual size_t Remaining() const = 0; + + // Returns the current RVA. + virtual RVA Get() const = 0; + + // Advances to the next RVA. + virtual void Next() = 0; + }; + + LabelManager(); + virtual ~LabelManager(); + + // Initializes |labels_| using RVAs from |rva_visitor|. Each distinct RVA from + // |rva_visitor| yields a Label with |rva_| assigned as the RVA, and |count_| + // assigned as the repeat. + void Read(RvaVisitor* rva_visitor); + + // Removes |labels_| elements whose |count_| is less than |count_threshold|. + void RemoveUnderusedLabels(int32 count_threshold); + + // Efficiently searches for a Label that targets |rva|. Returns the pointer to + // the stored Label instance if found, or null otherwise. + Label* Find(RVA rva); + + // TODO(huangs): Move AssignRemainingIndexes() here. + + protected: + // The main list of Label instances, sorted by the |rva_| member. + std::vector