summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorhuangs <huangs@chromium.org>2016-01-05 17:34:44 -0800
committerCommit bot <commit-bot@chromium.org>2016-01-06 01:35:43 +0000
commit2913c858071894ddc56d684567d10b2e4e708acc (patch)
tree854b3741b345d706b19eb7b131be8bef730abfec /courgette
parent82457c335583dd6d01449bd5ad221be02fc1ca63 (diff)
downloadchromium_src-2913c858071894ddc56d684567d10b2e4e708acc.zip
chromium_src-2913c858071894ddc56d684567d10b2e4e708acc.tar.gz
chromium_src-2913c858071894ddc56d684567d10b2e4e708acc.tar.bz2
[Courgette] Port basic index assignment routines to LabelManager.
Copying UnassignIndexes(), DefaultAssignIndexes(), and AssignRemainingIndexes() from AssemblyProgram. Also adding unit tests. Review URL: https://codereview.chromium.org/1527813002 Cr-Commit-Position: refs/heads/master@{#367741}
Diffstat (limited to 'courgette')
-rw-r--r--courgette/label_manager.cc106
-rw-r--r--courgette/label_manager.h65
-rw-r--r--courgette/label_manager_unittest.cc242
3 files changed, 403 insertions, 10 deletions
diff --git a/courgette/label_manager.cc b/courgette/label_manager.cc
index f19975d..06f3558 100644
--- a/courgette/label_manager.cc
+++ b/courgette/label_manager.cc
@@ -10,6 +10,7 @@
#include <algorithm>
#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
#include "base/numerics/safe_math.h"
#include "courgette/consecutive_range_visitor.h"
@@ -17,6 +18,89 @@ namespace courgette {
LabelManager::RvaVisitor::~RvaVisitor() {}
+LabelManager::SimpleIndexAssigner::SimpleIndexAssigner(LabelVector* labels)
+ : labels_(labels) {
+ // Find the maximum assigned index. Not bounded by |labels_| size.
+ int max_index = -1;
+ for (const Label& label : *labels_) {
+ if (label.index_ != Label::kNoIndex)
+ max_index = std::max(max_index, label.index_);
+ }
+
+ // Initialize |num_index_| and |available_|.
+ CHECK_GE(max_index + 1, 0);
+ num_index_ = std::max(base::checked_cast<int>(labels_->size()),
+ max_index + 1);
+ available_.resize(num_index_, true);
+ size_t used = 0;
+ for (const Label& label : *labels_) {
+ if (label.index_ != Label::kNoIndex) {
+ available_.at(label.index_) = false;
+ ++used;
+ }
+ }
+ VLOG(1) << used << " of " << labels_->size() << " labels pre-assigned.";
+}
+
+LabelManager::SimpleIndexAssigner::~SimpleIndexAssigner() {}
+
+void LabelManager::SimpleIndexAssigner::DoForwardFill() {
+ size_t count = 0;
+ // Inside the loop, if |prev_index| == |kNoIndex| then we try to assign 0.
+ // This allows 0 (if unused) to be assigned in middle of |labels_|.
+ int prev_index = Label::kNoIndex;
+ for (auto p = labels_->begin(); p != labels_->end(); ++p) {
+ if (p->index_ == Label::kNoIndex) {
+ int index = (prev_index == Label::kNoIndex) ? 0 : prev_index + 1;
+ if (index < num_index_ && available_.at(index)) {
+ p->index_ = index;
+ available_.at(index) = false;
+ ++count;
+ }
+ }
+ prev_index = p->index_;
+ }
+ VLOG(1) << " fill forward " << count;
+}
+
+void LabelManager::SimpleIndexAssigner::DoBackwardFill() {
+ size_t count = 0;
+ // This is asymmetric from DoForwardFill(), to preserve old behavior.
+ // Inside the loop, if |prev_index| == |kNoIndex| then we skip assignment.
+ // But we initilaize |prev_index| = |num_index_|, so if the last element in
+ // |labels_| has no index, then can use |num_index_| - 1 (if unused). We don't
+ // try this assignment elsewhere.
+ int prev_index = num_index_;
+ for (auto p = labels_->rbegin(); p != labels_->rend(); ++p) {
+ if (p->index_ == Label::kNoIndex && prev_index != Label::kNoIndex) {
+ int index = prev_index - 1;
+ if (index >= 0 && available_.at(index)) {
+ p->index_ = index;
+ available_.at(index) = false;
+ ++count;
+ }
+ }
+ prev_index = p->index_;
+ }
+ VLOG(1) << " fill backward " << count;
+}
+
+void LabelManager::SimpleIndexAssigner::DoInFill() {
+ size_t count = 0;
+ int index = 0;
+ for (Label& label : *labels_) {
+ if (label.index_ == Label::kNoIndex) {
+ while (!available_.at(index))
+ ++index;
+ label.index_ = index;
+ available_.at(index) = false;
+ ++index;
+ ++count;
+ }
+ }
+ VLOG(1) << " infill " << count;
+}
+
LabelManager::LabelManager() {}
LabelManager::~LabelManager() {}
@@ -74,4 +158,26 @@ Label* LabelManager::Find(RVA rva) {
return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it);
}
+void LabelManager::UnassignIndexes() {
+ for (Label& label : labels_)
+ label.index_ = Label::kNoIndex;
+}
+
+void LabelManager::DefaultAssignIndexes() {
+ int cur_index = 0;
+ for (Label& label : labels_) {
+ CHECK_EQ(Label::kNoIndex, label.index_);
+ label.index_ = cur_index++;
+ }
+}
+
+void LabelManager::AssignRemainingIndexes() {
+ // This adds some memory overhead, about 1 bit per Label (more if indexes >=
+ // |labels_.size()| get used).
+ SimpleIndexAssigner assigner(&labels_);
+ assigner.DoForwardFill();
+ assigner.DoBackwardFill();
+ assigner.DoInFill();
+}
+
} // namespace courgette
diff --git a/courgette/label_manager.h b/courgette/label_manager.h
index d155561..1b88024 100644
--- a/courgette/label_manager.h
+++ b/courgette/label_manager.h
@@ -15,6 +15,8 @@
namespace courgette {
+using LabelVector = std::vector<Label>;
+
// A container to store and manage Label instances. A key consideration is peak
// memory usage reduction. To this end we preallocate Label instances in bulk,
// and carefully control transient memory usage when initializing Labels.
@@ -37,6 +39,58 @@ class LabelManager {
virtual void Next() = 0;
};
+ // A helper class to heuristically complete index assignment for a list of
+ // Labels that have partially assigned indexes.
+ // Goal: An address table compresses best when each index is associated with
+ // an address that is slightly larger than the previous index.
+ // For example, suppose we have RVAs
+ // [0x0000, 0x0070, 0x00E0, 0x0150].
+ // Consider candidate (RVA, index) assignments A and B:
+ // A: [(0x0000, 0), (0x0070, 1), (0x00E0, 2), (0x0150, 3)],
+ // B: [(0x0000, 2), (0x0070, 1), (0x00E0, 0), (0x0150, 3)].
+ // To form the address table, we first sort by indexes:
+ // A: [(0x0000, 0), (0x0070, 1), (0x00E0, 2), (0x0150, 3)],
+ // B: [(0x00E0, 0), (0x0070, 1), (0x0000, 2), (0x0150, 3)].
+ // Then we extract the RVAs for storage:
+ // A: [0x0000, 0x0070, 0x00E0, 0x0150],
+ // B: [0x00E0, 0x0070, 0x0000, 0x0150].
+ // Clearly A compresses better than B under (signed) delta encoding.
+ // In Courgette-gen, an assignment algorithm (subclass of AdjustmentMethod)
+ // creates partial and arbitrary index assignments (to attempt to match one
+ // file against another). So the sorted case (like A) won't happen in general.
+ // Our helper class fills in the missing assignments by creating runs of
+ // consecutive indexes, so once RVAs are sorted by these indexes we'd reduce
+ // distances between successive RVAs.
+ class SimpleIndexAssigner {
+ public:
+ SimpleIndexAssigner(LabelVector* labels);
+ ~SimpleIndexAssigner();
+
+ // Scans forward to assign successive indexes to Labels, using existing
+ // indexes as start-anchors.
+ void DoForwardFill();
+
+ // Scans backward to assign successive indexes to Labels, using existing
+ // indexes as end-anchors.
+ void DoBackwardFill();
+
+ // Assigns all unsigned indexes using what's available, disregarding current
+ // Label assignment.
+ void DoInFill();
+
+ private:
+ // List of Labels to process. Owned by caller.
+ LabelVector* labels_;
+
+ // A bound on indexes.
+ int num_index_ = 0;
+
+ // Tracker for index usage to ensure uniqueness of indexes.
+ std::vector<bool> available_;
+
+ DISALLOW_COPY_AND_ASSIGN(SimpleIndexAssigner);
+ };
+
LabelManager();
virtual ~LabelManager();
@@ -52,11 +106,18 @@ class LabelManager {
// the stored Label instance if found, or null otherwise.
Label* Find(RVA rva);
- // TODO(huangs): Move AssignRemainingIndexes() here.
+ // Resets all indexes to an unassigend state.
+ void UnassignIndexes();
+
+ // Assigns indexes to successive integers from 0, ordered by RVA.
+ void DefaultAssignIndexes();
+
+ // Assigns indexes to any Label elements that don't have one yet.
+ void AssignRemainingIndexes();
protected:
// The main list of Label instances, sorted by the |rva_| member.
- std::vector<Label> labels_;
+ LabelVector labels_;
private:
DISALLOW_COPY_AND_ASSIGN(LabelManager);
diff --git a/courgette/label_manager_unittest.cc b/courgette/label_manager_unittest.cc
index acc6240..cc112a8 100644
--- a/courgette/label_manager_unittest.cc
+++ b/courgette/label_manager_unittest.cc
@@ -9,9 +9,13 @@
#include <iterator>
#include <map>
+#include <set>
+#include <string>
#include <utility>
#include <vector>
+#include "base/logging.h"
+#include "base/macros.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace courgette {
@@ -41,12 +45,17 @@ class TestRvaVisitor : public LabelManager::RvaVisitor {
// Test version of LabelManager: Expose data to test implementation.
class TestLabelManager : public LabelManager {
public:
- size_t LabelCount() const { return labels_.size(); };
+ TestLabelManager() {}
+
+ // Using move semantics to optimize injection of test LabelVector.
+ explicit TestLabelManager(LabelVector&& labels) { labels_ = labels; }
+
+ const LabelVector& Labels() const { return labels_; }
};
void CheckLabelManagerContent(TestLabelManager* label_manager,
const std::map<RVA, int32_t>& expected) {
- EXPECT_EQ(expected.size(), label_manager->LabelCount());
+ EXPECT_EQ(expected.size(), label_manager->Labels().size());
for (const auto& rva_and_count : expected) {
Label* label = label_manager->Find(rva_and_count.first);
EXPECT_TRUE(label != nullptr);
@@ -55,6 +64,65 @@ void CheckLabelManagerContent(TestLabelManager* label_manager,
}
}
+// Instantiates a LabelVector with |n| elements. The |rva_| fields are assigned
+// 0, ..., |n| - 1. The other fields are uninitialized.
+LabelVector CreateLabelVectorBasic(size_t n) {
+ LabelVector labels;
+ labels.reserve(n);
+ for (size_t i = 0; i < n; ++i)
+ labels.push_back(Label(i));
+ return labels;
+}
+
+// Instantiates a list of Labels, one per character |ch| in |encoded_index|.
+// - |rva_| is assigned 0, 1, 2, ...
+// - |count_| is assigned 1.
+// - |index_| depends on |ch|: '.' => kNoIndex, 'A' => 0, ..., 'Z' => 25.
+// Each letter (except '.') can appear at most once in |encoded_index|.
+// For example, |encoded_index| = "A.E" initializes 3 Labels:
+// [{rva_: 0, count_: 1, index_: 0},
+// {rva_: 1, count_: 1, index_: kNoIndex},
+// {rva_: 2, count_: 1, index_: 4}].
+LabelVector CreateLabelVectorWithIndexes(const std::string& encoded_index) {
+ LabelVector labels;
+ size_t n = encoded_index.size();
+ labels.reserve(n);
+ std::set<char> used_ch;
+ for (size_t i = 0; i < n; ++i) {
+ Label label(i);
+ label.count_ = 1;
+ char ch = encoded_index[i];
+ if (ch != '.') {
+ // Sanity check for test case.
+ if (ch < 'A' || ch > 'Z' || used_ch.find(ch) != used_ch.end())
+ NOTREACHED() << "Malformed test case: " << encoded_index;
+ used_ch.insert(ch);
+ label.index_ = ch - 'A';
+ } else {
+ label.index_ = Label::kNoIndex;
+ }
+ labels.push_back(label);
+ }
+ return labels;
+}
+
+// Returns a string encoding for |index_| assignments for |label_| elements,
+// with kNoIndex => '.', 0 => 'A', ..., '25' => 'Z'. Fails if any |index_|
+// does not fit the above.
+std::string EncodeLabelIndexes(const LabelVector& labels) {
+ std::string encoded;
+ encoded.reserve(labels.size());
+ for (const Label& label : labels) {
+ if (label.index_ == Label::kNoIndex)
+ encoded += '.';
+ else if (label.index_ >= 0 && label.index_ <= 'Z' - 'A')
+ encoded += static_cast<char>(label.index_ + 'A');
+ else
+ NOTREACHED();
+ }
+ return encoded;
+}
+
} // namespace
TEST(LabelManagerTest, Basic) {
@@ -62,12 +130,12 @@ TEST(LabelManagerTest, Basic) {
0x04000010,
0x04000030,
0x04000020,
- 0x04000010, // Redundant
+ 0x04000010, // Redundant.
0xFEEDF00D,
- 0x04000030, // Redundant
- 0xFEEDF00D, // Redundant
+ 0x04000030, // Redundant.
+ 0xFEEDF00D, // Redundant.
0x00000110,
- 0x04000010, // Redundant
+ 0x04000010, // Redundant.
0xABCD1234
};
std::vector<RVA> test_targets(std::begin(kTestTargetsRaw),
@@ -111,7 +179,7 @@ TEST(LabelManagerTest, Single) {
TestRvaVisitor visitor(test_targets.begin(), test_targets.end());
TestLabelManager label_manager;
label_manager.Read(&visitor);
- EXPECT_EQ(1U, label_manager.LabelCount()); // Deduped to 1 Label.
+ EXPECT_EQ(1U, label_manager.Labels().size()); // Deduped to 1 Label.
Label* label = label_manager.Find(kRva);
EXPECT_NE(nullptr, label);
@@ -130,9 +198,167 @@ TEST(LabelManagerTest, Empty) {
TestRvaVisitor visitor(empty_test_targets.begin(), empty_test_targets.end());
TestLabelManager label_manager;
label_manager.Read(&visitor);
- EXPECT_EQ(0U, label_manager.LabelCount());
+ EXPECT_EQ(0U, label_manager.Labels().size());
for (RVA rva = 0U; rva < 16U; ++rva)
EXPECT_EQ(nullptr, label_manager.Find(rva));
}
+TEST(LabelManagerTest, EmptyAssign) {
+ TestLabelManager label_manager_empty;
+ label_manager_empty.DefaultAssignIndexes();
+ label_manager_empty.UnassignIndexes();
+ label_manager_empty.AssignRemainingIndexes();
+}
+
+TEST(LabelManagerTest, TrivialAssign) {
+ for (size_t size = 0; size < 20; ++size) {
+ TestLabelManager label_manager(CreateLabelVectorBasic(size));
+ // Sanity check.
+ for (size_t i = 0; i < size; ++i)
+ EXPECT_EQ(Label::kNoIndex, label_manager.Labels()[i].index_);
+
+ // Default assign.
+ label_manager.DefaultAssignIndexes();
+ for (size_t i = 0; i < size; ++i)
+ EXPECT_EQ(static_cast<int>(i), label_manager.Labels()[i].index_);
+
+ // Heuristic assign, but since everything's assigned, so no change.
+ label_manager.AssignRemainingIndexes();
+ for (size_t i = 0; i < size; ++i)
+ EXPECT_EQ(static_cast<int>(i), label_manager.Labels()[i].index_);
+
+ // Unassign.
+ label_manager.UnassignIndexes();
+ for (size_t i = 0; i < size; ++i)
+ EXPECT_EQ(Label::kNoIndex, label_manager.Labels()[i].index_);
+ }
+}
+
+// Tests SimpleIndexAssigner fill strategies independently.
+TEST(LabelManagerTest, SimpleIndexAssigner) {
+ using SimpleIndexAssigner = LabelManager::SimpleIndexAssigner;
+ // See CreateLabelVectorWithIndexes() explanation on how we encode LabelVector
+ // |index_| values as a string.
+ const struct TestCase {
+ const char* input;
+ const char* expect_forward;
+ const char* expect_backward;
+ const char* expect_in;
+ } kTestCases[] = {
+ {"", "", "", ""},
+ {".", "A", "A", "A"},
+ {"....", "ABCD", "ABCD", "ABCD"},
+ {"A...", "ABCD", "ABCD", "ABCD"},
+ {".A..", ".ABC", ".ACD", "BACD"},
+ {"...A", "...A", "...A", "BCDA"},
+ {"C...", "CD.A", "C..D", "CABD"},
+ {".C..", "ACD.", "BC.D", "ACBD"},
+ {"...C", "AB.C", ".ABC", "ABDC"},
+ {"D...", "D.AB", "D...", "DABC"},
+ {".D..", "AD..", "CD..", "ADBC"},
+ {"...D", "ABCD", "ABCD", "ABCD"},
+ {"Z...", "Z.AB", "Z...", "ZABC"},
+ {".Z..", "AZ..", "YZ..", "AZBC"},
+ {"...Z", "ABCZ", "WXYZ", "ABCZ"},
+ {"..AZ..", "..AZ..", "..AZ..", "BCAZDE"},
+ {"..ZA..", "..ZABC", "XYZA..", "BCZADE"},
+ {"A....Z", "ABCDEZ", "AVWXYZ", "ABCDEZ"},
+ {"Z....A", "Z....A", "Z....A", "ZBCDEA"},
+ {"..CD..", "ABCDEF", "ABCDEF", "ABCDEF"},
+ {"..DC..", "ABDC..", "..DCEF", "ABDCEF"},
+ {"..MN..", "ABMN..", "KLMN..", "ABMNCD"},
+ {"..NM..", "ABNM..", "..NM..", "ABNMCD"},
+ {".B.D.F.", "ABCDEFG", "ABCDEFG", "ABCDEFG"},
+ {".D.G.J.", "ADEGHJ.", "CDFGIJ.", "ADBGCJE"},
+ {".D.Z.J.", "ADEZ.JK", "CDYZIJ.", "ADBZCJE"},
+ {".B..E..", "ABCDEFG", "ABCDEFG", "ABCDEFG"},
+ {".B..D..", "ABC.DEF", "AB.CDFG", "ABCEDFG"},
+ };
+ const int kNumFuns = 3;
+ // TestCase member variable pointers to enable iteration.
+ const char* TestCase::*expect_ptr[kNumFuns] = {
+ &TestCase::expect_forward,
+ &TestCase::expect_backward,
+ &TestCase::expect_in
+ };
+ // SimpleIndexAssigner member function pointers to enable iteration.
+ void (SimpleIndexAssigner::*fun_ptrs[kNumFuns])() = {
+ &SimpleIndexAssigner::DoForwardFill,
+ &SimpleIndexAssigner::DoBackwardFill,
+ &SimpleIndexAssigner::DoInFill
+ };
+ for (const auto& test_case : kTestCases) {
+ // Loop over {forward fill, backward fill, infill}.
+ for (int i = 0; i < kNumFuns; ++i) {
+ std::string expect = test_case.*(expect_ptr[i]);
+ LabelVector labels = CreateLabelVectorWithIndexes(test_case.input);
+ SimpleIndexAssigner assigner(&labels);
+ (assigner.*(fun_ptrs[i]))();
+ std::string result = EncodeLabelIndexes(labels);
+ EXPECT_EQ(expect, result);
+ }
+ }
+}
+
+// Tests integrated AssignRemainingIndexes().
+TEST(LabelManagerTest, AssignRemainingIndexes) {
+ const struct {
+ const char* input;
+ const char* expect;
+ } kTestCases[] = {
+ // Trivial.
+ {"", ""},
+ {"M", "M"},
+ {"ABCDEFG", "ABCDEFG"},
+ {"THEQUICKBROWNFXJMPSVLAZYDG", "THEQUICKBROWNFXJMPSVLAZYDG"},
+ // Forward fill only.
+ {".", "A"},
+ {".......", "ABCDEFG"},
+ {"....E..", "ABCDEFG"}, // "E" is at right place.
+ {".D.B.H.F.", "ADEBCHIFG"},
+ {"ZN....", "ZNOPQR"}, // "Z" exists, so 'OPQR" are defined.
+ {"H.D...A..", "HIDEFGABC"},
+ {"...K.DE..H..Z", "ABCKLDEFGHIJZ"}, // "Z" exists, so "L" defined.
+ // Infill only.
+ {"E.", "EA"},
+ {"...A", "BCDA"},
+ {"Z...A", "ZBCDA"},
+ {"AN...", "ANBCD"},
+ {"...AZ", "BCDAZ"},
+ {"....AC", "BDEFAC"},
+ {"ED...C...B....A", "EDFGHCIJKBLMNOA"},
+ // Forward fill and infill.
+ {"E..", "EBA"}, // Forward: "A"; in: "B".
+ {"Z....", "ZDABC"}, // Forward: "ABC"; in: "D".
+ {".E.....", "AEFGBCD"}, // Forward: "A", "FG"; in: "BCD".
+ {"....C..", "ABFGCDE"}, // Forward: "AB", "DE"; in: "FG".
+ {"...Z...", "ABCZDEF"}, // Forward: "ABC"; in: "DEF".
+ {"...A...", "EFGABCD"}, // Forward: "BCD"; in: "EFG".
+ // Backward fill only.
+ {".CA", "BCA"},
+ {"...ZA", "WXYZA"},
+ {"BA...Z", "BAWXYZ"},
+ {"ANM..Z....L...T", "ANMXYZHIJKLQRST"},
+ {"....G..Z...LAH", "CDEFGXYZIJKLAH"},
+ // Forward fill and backward fill.
+ {"..ZA..", "XYZABC"}, // Forward: "BC"; backward: "XY".
+ {".....ZD", "ABCXYZD"}, // Forward: "ABC"; backward: "XY".
+ {"DA.....", "DABCEFG"}, // Forward: "BC"; backward: "EFG".
+ // Backward fill and infill.
+ {"G....DA", "GEFBCDA"}, // Backward: "BC"; in: "EF".
+ {"..ZBA..", "XYZBACD"}, // Backward: "XY"; in: "CD".
+ // All.
+ {".....ZED.", "ABCXYZEDF"}, // Forward: "ABC"; backward: "XY"; in: "F".
+ {".....GD.", "ABCHFGDE"}, // Forward: "ABC", "E"; backward: "F"; in: "H".
+ {"..FE..GD..", "ABFECHGDIJ"}, // Forward: "AB"; backward: "IJ"; in: "CH".
+ };
+ for (const auto& test_case : kTestCases) {
+ TestLabelManager label_manager(
+ CreateLabelVectorWithIndexes(test_case.input));
+ label_manager.AssignRemainingIndexes();
+ std::string result = EncodeLabelIndexes(label_manager.Labels());
+ EXPECT_EQ(test_case.expect, result);
+ }
+}
+
} // namespace courgette