diff options
author | huangs <huangs@chromium.org> | 2016-01-19 14:09:03 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-01-19 22:11:12 +0000 |
commit | bb4b8a90a34d7a36b9a83cb663e9f29f12d16a3e (patch) | |
tree | 67781fd3df79bd04ed728c2555bcce6a574d609f /courgette | |
parent | 7f6dad4f0bdb435323452ba420d34b2e7ee536cc (diff) | |
download | chromium_src-bb4b8a90a34d7a36b9a83cb663e9f29f12d16a3e.zip chromium_src-bb4b8a90a34d7a36b9a83cb663e9f29f12d16a3e.tar.gz chromium_src-bb4b8a90a34d7a36b9a83cb663e9f29f12d16a3e.tar.bz2 |
[Courgette] Simplify EncodedProgram Label addition code; removed "1.01 x" memory fix.
This CL simplifies how Labels get flattened to a list of RVAs. In the past
EncodedProgram used DefineAbs32Label() / DefineRel32Label(), which let callers
add one Label at a time. Complexity arose from:
- Function pointer usage to avoid duplicate code for abs32 and rel32.
- Need for EncodedProgram to dynamically adjust size of RVA list. This led to
inefficient array resizing, which was fixed by the "1.01 x" memory growth.
Change: We now pass the collection of abs32 and rel32 Labels to EncodedProgram.
This simplifies the interface, and allows EncodedProgram to find the max indexes
and preallocated buffers. The trade-off is increased test code complexity, since
we'd need to create Label collection.
Other changes:
- Update namespace{} for EncodedProgram and its tests.
- Add more Label constructors (for testing).
- Add LabelManager::GetIndexBound(), for LabelVector and RVAToLabel.
- Add kUnassignedRVA in image_utils.h, with checks for its absence in images.
Review URL: https://codereview.chromium.org/1571913003
Cr-Commit-Position: refs/heads/master@{#370200}
Diffstat (limited to 'courgette')
-rw-r--r-- | courgette/assembly_program.cc | 26 | ||||
-rw-r--r-- | courgette/assembly_program.h | 3 | ||||
-rw-r--r-- | courgette/disassembler_elf_32.cc | 6 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.cc | 3 | ||||
-rw-r--r-- | courgette/disassembler_win32_x64.h | 1 | ||||
-rw-r--r-- | courgette/disassembler_win32_x86.cc | 3 | ||||
-rw-r--r-- | courgette/encoded_program.cc | 91 | ||||
-rw-r--r-- | courgette/encoded_program.h | 11 | ||||
-rw-r--r-- | courgette/encoded_program_unittest.cc | 119 | ||||
-rw-r--r-- | courgette/image_utils.h | 6 | ||||
-rw-r--r-- | courgette/label_manager.cc | 29 | ||||
-rw-r--r-- | courgette/label_manager.h | 8 | ||||
-rw-r--r-- | courgette/label_manager_unittest.cc | 52 | ||||
-rw-r--r-- | courgette/rel32_finder_win32_x86.h | 1 |
14 files changed, 217 insertions, 142 deletions
diff --git a/courgette/assembly_program.cc b/courgette/assembly_program.cc index 28a348e..b2a01a0 100644 --- a/courgette/assembly_program.cc +++ b/courgette/assembly_program.cc @@ -365,37 +365,13 @@ void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) { << " infill " << fill_infill_count; } -typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value); - -#if defined(OS_WIN) -__declspec(noinline) -#endif -static CheckBool DefineLabels(const RVAToLabel& labels, - EncodedProgram* encoded_format, - DefineLabelMethod define_label) { - bool ok = true; - for (RVAToLabel::const_iterator p = labels.begin(); - ok && p != labels.end(); - ++p) { - Label* label = p->second; - ok = (encoded_format->*define_label)(label->index_, label->rva_); - } - return ok; -} - EncodedProgram* AssemblyProgram::Encode() const { scoped_ptr<EncodedProgram> encoded(new EncodedProgram()); encoded->set_image_base(image_base_); - if (!DefineLabels(abs32_labels_, encoded.get(), - &EncodedProgram::DefineAbs32Label) || - !DefineLabels(rel32_labels_, encoded.get(), - &EncodedProgram::DefineRel32Label)) { + if (!encoded->DefineLabels(abs32_labels_, rel32_labels_)) return NULL; - } - - encoded->EndLabels(); for (size_t i = 0; i < instructions_.size(); ++i) { Instruction* instruction = instructions_[i]; diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h index 36dc369..5fee6c2 100644 --- a/courgette/assembly_program.h +++ b/courgette/assembly_program.h @@ -16,14 +16,13 @@ #include "base/memory/scoped_ptr.h" #include "courgette/disassembler.h" #include "courgette/image_utils.h" +#include "courgette/label_manager.h" #include "courgette/memory_allocator.h" namespace courgette { class EncodedProgram; -typedef std::map<RVA, Label*> RVAToLabel; - // Opcodes of simple assembly language enum OP { ORIGIN, // ORIGIN <rva> - set current address for assembly. diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc index c6139a8..c0f42e6 100644 --- a/courgette/disassembler_elf_32.cc +++ b/courgette/disassembler_elf_32.cc @@ -142,6 +142,8 @@ bool DisassemblerElf32::UpdateLength() { } CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const { + if (rva == kUnassignedRVA) + return false; // It's valid if it's contained in any program segment for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { @@ -446,6 +448,8 @@ CheckBool DisassemblerElf32::ParseAbs32Relocs() { } std::sort(abs32_locations_.begin(), abs32_locations_.end()); + DCHECK(abs32_locations_.empty() || + abs32_locations_.back() != kUnassignedRVA); return true; } @@ -497,6 +501,8 @@ CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() { std::sort(rel32_locations_.begin(), rel32_locations_.end(), TypedRVA::IsLessThan); + DCHECK(rel32_locations_.empty() || + rel32_locations_.back()->rva() != kUnassignedRVA); return true; } diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc index 357930d..74b0fe4 100644 --- a/courgette/disassembler_win32_x64.cc +++ b/courgette/disassembler_win32_x64.cc @@ -281,6 +281,7 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) { } std::sort(relocs->begin(), relocs->end()); + DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); return true; } @@ -396,6 +397,8 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() { file_offset += section->size_of_raw_data; } std::sort(rel32_locations_.begin(), rel32_locations_.end()); + DCHECK(rel32_locations_.empty() || + rel32_locations_.back() != kUnassignedRVA); #if COURGETTE_HISTOGRAM_TARGETS VLOG(1) << "abs32_locations_ " << abs32_locations_.size() diff --git a/courgette/disassembler_win32_x64.h b/courgette/disassembler_win32_x64.h index 3b0b978..23aee66 100644 --- a/courgette/disassembler_win32_x64.h +++ b/courgette/disassembler_win32_x64.h @@ -88,6 +88,7 @@ class DisassemblerWin32X64 : public Disassembler { return base_relocation_table_; } + // Subsumes rva != kUnassignedRVA. bool IsValidRVA(RVA rva) const { return rva < size_of_image_; } // Returns description of the RVA, e.g. ".text+0x1243". For debugging only. diff --git a/courgette/disassembler_win32_x86.cc b/courgette/disassembler_win32_x86.cc index bc41ff0..aed26c7 100644 --- a/courgette/disassembler_win32_x86.cc +++ b/courgette/disassembler_win32_x86.cc @@ -287,6 +287,7 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) { } std::sort(relocs->begin(), relocs->end()); + DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); return true; } @@ -402,6 +403,8 @@ void DisassemblerWin32X86::ParseRel32RelocsFromSections() { file_offset += section->size_of_raw_data; } std::sort(rel32_locations_.begin(), rel32_locations_.end()); + DCHECK(rel32_locations_.empty() || + rel32_locations_.back() != kUnassignedRVA); #if COURGETTE_HISTOGRAM_TARGETS VLOG(1) << "abs32_locations_ " << abs32_locations_.size() diff --git a/courgette/encoded_program.cc b/courgette/encoded_program.cc index 209fe2f..59800c5 100644 --- a/courgette/encoded_program.cc +++ b/courgette/encoded_program.cc @@ -26,11 +26,7 @@ namespace courgette { -// Constructor is here rather than in the header. Although the constructor -// appears to do nothing it is fact quite large because of the implicit calls to -// field constructors. Ditto for the destructor. -EncodedProgram::EncodedProgram() : image_base_(0) {} -EncodedProgram::~EncodedProgram() {} +namespace { // Serializes a vector of integral values using Varint32 coding. template<typename V> @@ -132,60 +128,49 @@ bool ReadVectorU8(V* items, SourceStream* buffer) { return ok; } -//////////////////////////////////////////////////////////////////////////////// - -CheckBool EncodedProgram::DefineRel32Label(int index, RVA value) { - return DefineLabelCommon(&rel32_rva_, index, value); -} +} // namespace -CheckBool EncodedProgram::DefineAbs32Label(int index, RVA value) { - return DefineLabelCommon(&abs32_rva_, index, value); -} - -static const RVA kUnassignedRVA = static_cast<RVA>(-1); - -CheckBool EncodedProgram::DefineLabelCommon(RvaVector* rvas, - int index, - RVA rva) { - bool ok = true; +//////////////////////////////////////////////////////////////////////////////// - // Resize |rvas| to accommodate |index|. If we naively call resize(), in the - // worst case we'd encounter |index| in increasing order, and then we'd - // require reallocation every time. Turns out this worst case is the typical - // scenario, and noticeable slowness (~5x slow down) ensues. The solution is - // to exponentially increase capacity. We use a factor of 1.01 to be frugal. - if (static_cast<int>(rvas->capacity()) <= index) - ok = rvas->reserve((index + 1) * 1.01); - if (ok && static_cast<int>(rvas->size()) <= index) - ok = rvas->resize(index + 1, kUnassignedRVA); +// Constructor is here rather than in the header. Although the constructor +// appears to do nothing it is fact quite large because of the implicit calls to +// field constructors. Ditto for the destructor. +EncodedProgram::EncodedProgram() {} +EncodedProgram::~EncodedProgram() {} - if (ok) { - DCHECK_EQ((*rvas)[index], kUnassignedRVA) - << "DefineLabel double assigned " << index; - (*rvas)[index] = rva; - } +CheckBool EncodedProgram::DefineLabels(const RVAToLabel& abs32_labels, + const RVAToLabel& rel32_labels) { + // which == 0 => abs32; which == 1 => rel32. + for (int which = 0; which < 2; ++which) { + const RVAToLabel& labels = which == 0 ? abs32_labels : rel32_labels; + RvaVector& rvas = which == 0 ? abs32_rva_ : rel32_rva_; - return ok; -} + if (!rvas.resize(LabelManager::GetIndexBound(labels), kUnassignedRVA)) + return false; -void EncodedProgram::EndLabels() { - FinishLabelsCommon(&abs32_rva_); - FinishLabelsCommon(&rel32_rva_); -} + // For each Label, write its RVA to assigned index. + for (const auto& rva_and_label : labels) { + const Label& label = *rva_and_label.second; + DCHECK_EQ(rva_and_label.first, label.rva_); + DCHECK_NE(label.index_, Label::kNoIndex); + DCHECK_EQ(rvas[label.index_], kUnassignedRVA) + << "DefineLabels() double assigned " << label.index_; + rvas[label.index_] = label.rva_; + } -void EncodedProgram::FinishLabelsCommon(RvaVector* rvas) { - // Replace all unassigned slots with the value at the previous index so they - // delta-encode to zero. (There might be better values than zero. The way to - // get that is have the higher level assembly program assign the unassigned - // slots.) - RVA previous = 0; - size_t size = rvas->size(); - for (size_t i = 0; i < size; ++i) { - if ((*rvas)[i] == kUnassignedRVA) - (*rvas)[i] = previous; - else - previous = (*rvas)[i]; + // Replace all unassigned slots with the value at the previous index so they + // delta-encode to zero. (There might be better values than zero. The way to + // get that is have the higher level assembly program assign the unassigned + // slots.) + RVA previous = 0; + for (RVA& rva : rvas) { + if (rva == kUnassignedRVA) + rva = previous; + else + previous = rva; + } } + return true; } CheckBool EncodedProgram::AddOrigin(RVA origin) { @@ -752,6 +737,7 @@ class RelocBlock { CheckBool EncodedProgram::GeneratePeRelocations(SinkStream* buffer, uint8_t type) { std::sort(abs32_relocs_.begin(), abs32_relocs_.end()); + DCHECK(abs32_relocs_.empty() || abs32_relocs_.back() != kUnassignedRVA); RelocBlock block; @@ -773,6 +759,7 @@ CheckBool EncodedProgram::GeneratePeRelocations(SinkStream* buffer, CheckBool EncodedProgram::GenerateElfRelocations(Elf32_Word r_info, SinkStream* buffer) { std::sort(abs32_relocs_.begin(), abs32_relocs_.end()); + DCHECK(abs32_relocs_.empty() || abs32_relocs_.back() != kUnassignedRVA); Elf32_Rel relocation_block; diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h index 81e1639..e89676d 100644 --- a/courgette/encoded_program.h +++ b/courgette/encoded_program.h @@ -12,6 +12,7 @@ #include "base/macros.h" #include "courgette/disassembler.h" +#include "courgette/label_manager.h" #include "courgette/memory_allocator.h" #include "courgette/types_elf.h" @@ -49,9 +50,9 @@ class EncodedProgram { void set_image_base(uint64_t base) { image_base_ = base; } // (2) Address tables and indexes defined first. - CheckBool DefineRel32Label(int index, RVA address) WARN_UNUSED_RESULT; - CheckBool DefineAbs32Label(int index, RVA address) WARN_UNUSED_RESULT; - void EndLabels(); + CheckBool DefineLabels(const RVAToLabel& abs32_labels, + const RVAToLabel& rel32_labels) WARN_UNUSED_RESULT; + // (3) Add instructions in the order needed to generate bytes of file. // NOTE: If any of these methods ever fail, the EncodedProgram instance @@ -116,15 +117,13 @@ class EncodedProgram { uint8_t type) WARN_UNUSED_RESULT; CheckBool GenerateElfRelocations(Elf32_Word pending_elf_relocation_table, SinkStream *buffer) WARN_UNUSED_RESULT; - CheckBool DefineLabelCommon(RvaVector*, int, RVA) WARN_UNUSED_RESULT; - void FinishLabelsCommon(RvaVector* addresses); // Decodes and evaluates courgette ops for ARM rel32 addresses. CheckBool EvaluateRel32ARM(OP op, size_t& ix_rel32_ix, RVA& current_rva, SinkStream* output); // Binary assembly language tables. - uint64_t image_base_; + uint64_t image_base_ = 0; RvaVector rel32_rva_; RvaVector abs32_rva_; OPVector ops_; diff --git a/courgette/encoded_program_unittest.cc b/courgette/encoded_program_unittest.cc index 5d29452..654ef44 100644 --- a/courgette/encoded_program_unittest.cc +++ b/courgette/encoded_program_unittest.cc @@ -7,53 +7,77 @@ #include <stddef.h> #include <stdint.h> +#include <vector> + #include "base/macros.h" #include "base/memory/scoped_ptr.h" #include "courgette/disassembler.h" +#include "courgette/image_utils.h" +#include "courgette/label_manager.h" #include "courgette/streams.h" #include "testing/gtest/include/gtest/gtest.h" +namespace courgette { + namespace { -using courgette::EncodedProgram; +// Helper class to instantiate RVAToLabel while managing allocation. +class RVAToLabelMaker { + public: + RVAToLabelMaker() {} + ~RVAToLabelMaker() {} + + // Adds a Label for storage. Must be called before Make(), since |labels_map_| + // has pointers into |labels_|, and |labels_.push_back()| can invalidate them. + void Add(int index, RVA rva) { + DCHECK(label_map_.empty()); + labels_.push_back(Label(rva, index)); // Don't care about |count_|. + } + + // Initializes |label_map_| and returns a pointer to it. + RVAToLabel* Make() { + for (size_t i = 0; i < labels_.size(); ++i) { + DCHECK_EQ(0U, label_map_.count(labels_[i].rva_)); + label_map_[labels_[i].rva_] = &labels_[i]; + } + return &label_map_; + } + + const std::vector<Label>& GetLabels() const { return labels_; } -struct AddressSpec { - int32_t index; - courgette::RVA rva; + private: + std::vector<Label> labels_; // Stores all Labels. + RVAToLabel label_map_; // Has pointers to |labels_| elements. }; // Creates a simple new program with given addresses. The orders of elements // in |abs32_specs| and |rel32_specs| are important. -scoped_ptr<EncodedProgram> CreateTestProgram(AddressSpec* abs32_specs, - size_t num_abs32_specs, - AddressSpec* rel32_specs, - size_t num_rel32_specs) { +scoped_ptr<EncodedProgram> CreateTestProgram(RVAToLabelMaker* abs32_maker, + RVAToLabelMaker* rel32_maker) { + RVAToLabel* abs32_labels = abs32_maker->Make(); + RVAToLabel* rel32_labels = rel32_maker->Make(); + scoped_ptr<EncodedProgram> program(new EncodedProgram()); uint32_t base = 0x00900000; program->set_image_base(base); - for (size_t i = 0; i < num_abs32_specs; ++i) { - EXPECT_TRUE(program->DefineAbs32Label(abs32_specs[i].index, - abs32_specs[i].rva)); - } - for (size_t i = 0; i < num_rel32_specs; ++i) { - EXPECT_TRUE(program->DefineRel32Label(rel32_specs[i].index, - rel32_specs[i].rva)); - } - program->EndLabels(); + EXPECT_TRUE(program->DefineLabels(*abs32_labels, *rel32_labels)); EXPECT_TRUE(program->AddOrigin(0)); // Start at base. - for (size_t i = 0; i < num_abs32_specs; ++i) - EXPECT_TRUE(program->AddAbs32(abs32_specs[i].index)); - for (size_t i = 0; i < num_rel32_specs; ++i) - EXPECT_TRUE(program->AddRel32(rel32_specs[i].index)); + + // Arbitrary: Add instructions in the order they're defined in |*_maker|. + for (const Label& label : abs32_maker->GetLabels()) + EXPECT_TRUE(program->AddAbs32(label.index_)); + for (const Label& label : rel32_maker->GetLabels()) + EXPECT_TRUE(program->AddRel32(label.index_)); + return program; } bool CompareSink(const uint8_t expected[], size_t num_expected, - courgette::SinkStream* ss) { + SinkStream* ss) { size_t n = ss->Length(); if (num_expected != n) return false; @@ -66,28 +90,29 @@ bool CompareSink(const uint8_t expected[], // Create a simple program with a few addresses and references and // check that the bits produced are as expected. TEST(EncodedProgramTest, Test) { - // ABS32 index 7 == base + 4. - AddressSpec abs32_specs[] = {{7, 4}}; - // REL32 index 5 == base + 0. - AddressSpec rel32_specs[] = {{5, 0}}; + // ABS32 index 7 <-- base + 4. + RVAToLabelMaker abs32_label_maker; + abs32_label_maker.Add(7, 4); + // REL32 index 5 <-- base + 0. + RVAToLabelMaker rel32_label_maker; + rel32_label_maker.Add(5, 0); + scoped_ptr<EncodedProgram> program( - CreateTestProgram(abs32_specs, arraysize(abs32_specs), - rel32_specs, arraysize(rel32_specs))); + CreateTestProgram(&abs32_label_maker, &rel32_label_maker)); // Serialize and deserialize. - - courgette::SinkStreamSet sinks; + SinkStreamSet sinks; EXPECT_TRUE(program->WriteTo(&sinks)); program.reset(); - courgette::SinkStream sink; + SinkStream sink; bool can_collect = sinks.CopyTo(&sink); EXPECT_TRUE(can_collect); const void* buffer = sink.Buffer(); size_t length = sink.Length(); - courgette::SourceStreamSet sources; + SourceStreamSet sources; bool can_get_source_streams = sources.Init(buffer, length); EXPECT_TRUE(can_get_source_streams); @@ -96,7 +121,7 @@ TEST(EncodedProgramTest, Test) { EXPECT_TRUE(can_read); // Finally, try to assemble. - courgette::SinkStream assembled; + SinkStream assembled; bool can_assemble = encoded2->AssembleTo(&assembled); EXPECT_TRUE(can_assemble); encoded2.reset(); @@ -114,31 +139,37 @@ TEST(EncodedProgramTest, Test) { // contents of the address streams. TEST(EncodedProgramTest, TestWriteAddress) { // Absolute addresses by index: [_, _, _, 2, _, 23, _, 11]. - AddressSpec abs32_specs[] = {{7, 11}, {3, 2}, {5, 23}}; + RVAToLabelMaker abs32_label_maker; + abs32_label_maker.Add(7, 11); + abs32_label_maker.Add(3, 2); + abs32_label_maker.Add(5, 23); // Relative addresses by index: [16, 7, _, 32]. - AddressSpec rel32_specs[] = {{0, 16}, {3, 32}, {1, 7}}; + RVAToLabelMaker rel32_label_maker; + rel32_label_maker.Add(0, 16); + rel32_label_maker.Add(3, 32); + rel32_label_maker.Add(1, 7); + scoped_ptr<EncodedProgram> program( - CreateTestProgram(abs32_specs, arraysize(abs32_specs), - rel32_specs, arraysize(rel32_specs))); + CreateTestProgram(&abs32_label_maker, &rel32_label_maker)); - courgette::SinkStreamSet sinks; + SinkStreamSet sinks; EXPECT_TRUE(program->WriteTo(&sinks)); program.reset(); - // Check addresses in sinks. + // Check indexes and addresses in sinks. const uint8_t golden_abs32_indexes[] = { 0x03, 0x07, 0x03, 0x05 // 3 indexes: [7, 3, 5]. }; EXPECT_TRUE(CompareSink(golden_abs32_indexes, arraysize(golden_abs32_indexes), - sinks.stream(courgette::kStreamAbs32Indexes))); + sinks.stream(kStreamAbs32Indexes))); const uint8_t golden_rel32_indexes[] = { 0x03, 0x00, 0x03, 0x01 // 3 indexes: [0, 3, 1]. }; EXPECT_TRUE(CompareSink(golden_rel32_indexes, arraysize(golden_rel32_indexes), - sinks.stream(courgette::kStreamRel32Indexes))); + sinks.stream(kStreamRel32Indexes))); // Addresses: [_, _, _, 2, _, 23, _, 11]. // Padded: [0, 0, 0, 2, 2, 23, 23, 11]. @@ -152,7 +183,7 @@ TEST(EncodedProgramTest, TestWriteAddress) { }; EXPECT_TRUE(CompareSink(golden_abs32_addresses, arraysize(golden_abs32_addresses), - sinks.stream(courgette::kStreamAbs32Addresses))); + sinks.stream(kStreamAbs32Addresses))); // Addresses: [16, 7, _, 32]. // Padded: [16, 7, 7, 32]. @@ -166,5 +197,7 @@ TEST(EncodedProgramTest, TestWriteAddress) { }; EXPECT_TRUE(CompareSink(golden_rel32_addresses, arraysize(golden_rel32_addresses), - sinks.stream(courgette::kStreamRel32Addresses))); + sinks.stream(kStreamRel32Addresses))); } + +} // namespace courgette diff --git a/courgette/image_utils.h b/courgette/image_utils.h index 9a077fd..f958cc1 100644 --- a/courgette/image_utils.h +++ b/courgette/image_utils.h @@ -15,6 +15,7 @@ namespace courgette { typedef uint32_t RVA; +const RVA kUnassignedRVA = 0xFFFFFFFFU; // A Label is a symbolic reference to an address. Unlike a conventional // assembly language, we always know the address. The address will later be @@ -25,13 +26,16 @@ class Label { public: enum : int { kNoIndex = -1 }; explicit Label(RVA rva) : rva_(rva) {} + Label(RVA rva, int index) : rva_(rva), index_(index) {} + Label(RVA rva, int index, int32_t count) + : rva_(rva), index_(index), count_(count) {} bool operator==(const Label& other) const { return rva_ == other.rva_ && index_ == other.index_ && count_ == other.count_; } - RVA rva_ = 0; // Address referred to by the label. + RVA rva_ = kUnassignedRVA; // Address referred to by the label. int index_ = kNoIndex; // Index of address in address table. int32_t count_ = 0; }; diff --git a/courgette/label_manager.cc b/courgette/label_manager.cc index 268bda2..bd568b0 100644 --- a/courgette/label_manager.cc +++ b/courgette/label_manager.cc @@ -20,21 +20,34 @@ LabelManager::LabelManager() {} LabelManager::~LabelManager() {} -LabelManagerImpl::RvaVisitor::~RvaVisitor() {} +// static +int LabelManager::GetIndexBound(const LabelVector& labels) { + int max_index = -1; + for (const Label& label : labels) { + if (label.index_ != Label::kNoIndex) + max_index = std::max(max_index, label.index_); + } + return max_index + 1; +} -LabelManagerImpl::SimpleIndexAssigner::SimpleIndexAssigner(LabelVector* labels) - : labels_(labels) { - // Find the maximum assigned index. Not bounded by |labels_| size. +// static +int LabelManager::GetIndexBound(const RVAToLabel& labels_map) { int max_index = -1; - for (const Label& label : *labels_) { + for (const auto& rva_and_label : labels_map) { + const Label& label = *rva_and_label.second; if (label.index_ != Label::kNoIndex) max_index = std::max(max_index, label.index_); } + return max_index + 1; +} + +LabelManagerImpl::RvaVisitor::~RvaVisitor() {} +LabelManagerImpl::SimpleIndexAssigner::SimpleIndexAssigner(LabelVector* labels) + : labels_(labels) { // Initialize |num_index_| and |available_|. - CHECK_GE(max_index + 1, 0); num_index_ = std::max(base::checked_cast<int>(labels_->size()), - max_index + 1); + LabelManager::GetIndexBound(*labels_)); available_.resize(num_index_, true); size_t used = 0; for (const Label& label : *labels_) { @@ -129,6 +142,8 @@ void LabelManagerImpl::Read(RvaVisitor* rva_visitor) { // Sort |rvas|, then count the number of distinct values. using CRV = ConsecutiveRangeVisitor<std::vector<RVA>::iterator>; std::sort(rvas.begin(), rvas.end()); + DCHECK(rvas.empty() || rvas.back() != kUnassignedRVA); + size_t num_distinct_rva = 0; for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) ++num_distinct_rva; diff --git a/courgette/label_manager.h b/courgette/label_manager.h index 3034ebf..72783cf 100644 --- a/courgette/label_manager.h +++ b/courgette/label_manager.h @@ -8,6 +8,7 @@ #include <stddef.h> #include <stdint.h> +#include <map> #include <vector> #include "base/gtest_prod_util.h" @@ -17,12 +18,19 @@ namespace courgette { using LabelVector = std::vector<Label>; +using RVAToLabel = std::map<RVA, Label*>; // A container to store and manage Label instances. class LabelManager { public: virtual ~LabelManager(); + // Returns an exclusive upper bound for all existing indexes in |labels|. + static int GetIndexBound(const LabelVector& labels); + + // Returns an exclusive upper bound for all existing indexes in |labels_map|. + static int GetIndexBound(const RVAToLabel& labels_map); + // Returns the number of Label instances stored. virtual size_t Size() const = 0; diff --git a/courgette/label_manager_unittest.cc b/courgette/label_manager_unittest.cc index b66f563..4b91d92 100644 --- a/courgette/label_manager_unittest.cc +++ b/courgette/label_manager_unittest.cc @@ -78,19 +78,16 @@ LabelVector CreateLabelVectorWithIndexes(const std::string& encoded_index) { labels.reserve(n); std::set<char> used_ch; for (size_t i = 0; i < n; ++i) { - Label label(i); - label.count_ = 1; + int index = Label::kNoIndex; char ch = encoded_index[i]; if (ch != '.') { // Sanity check for test case. if (ch < 'A' || ch > 'Z' || used_ch.find(ch) != used_ch.end()) NOTREACHED() << "Malformed test case: " << encoded_index; used_ch.insert(ch); - label.index_ = ch - 'A'; - } else { - label.index_ = Label::kNoIndex; + index = ch - 'A'; } - labels.push_back(label); + labels.push_back(Label(i, index, 1)); } return labels; } @@ -114,6 +111,49 @@ std::string EncodeLabelIndexes(const LabelVector& labels) { } // namespace +TEST(LabelManagerTest, GetIndexBound_LabelVector) { + LabelVector labels0; + EXPECT_EQ(0, LabelManager::GetIndexBound(labels0)); + + LabelVector labels1_uninit = CreateLabelVectorBasic(1); + ASSERT_EQ(1U, labels1_uninit.size()); + EXPECT_EQ(0, LabelManager::GetIndexBound(labels1_uninit)); + + LabelVector labels1_init = CreateLabelVectorBasic(1); + ASSERT_EQ(1U, labels1_init.size()); + labels1_init[0].index_ = 99; + EXPECT_EQ(100, LabelManager::GetIndexBound(labels1_init)); + + LabelVector labels6_mixed = CreateLabelVectorBasic(6); + ASSERT_EQ(6U, labels6_mixed.size()); + labels6_mixed[1].index_ = 5; + labels6_mixed[2].index_ = 2; + labels6_mixed[4].index_ = 15; + labels6_mixed[5].index_ = 7; + EXPECT_EQ(16, LabelManager::GetIndexBound(labels6_mixed)); +} + +TEST(LabelManagerTest, GetIndexBound_RVAToLabel) { + RVAToLabel labels_map0; + EXPECT_EQ(0, LabelManager::GetIndexBound(labels_map0)); + + RVAToLabel labels1_map_init; + Label label1(static_cast<RVA>(0), 99, 1); + labels1_map_init[label1.rva_] = &label1; + EXPECT_EQ(100, LabelManager::GetIndexBound(labels1_map_init)); + + RVAToLabel labels_map6_mixed; + Label labels6[] = { + Label(static_cast<RVA>(1), 5, 1), + Label(static_cast<RVA>(2), 2, 1), + Label(static_cast<RVA>(4), 15, 1), + Label(static_cast<RVA>(5), 7, 1) + }; + for (Label& label : labels6) + labels_map6_mixed[label.rva_] = &label; + EXPECT_EQ(16, LabelManager::GetIndexBound(labels_map6_mixed)); +} + TEST(LabelManagerTest, Basic) { static const RVA kTestTargetsRaw[] = { 0x04000010, diff --git a/courgette/rel32_finder_win32_x86.h b/courgette/rel32_finder_win32_x86.h index 24523d3..01226ae 100644 --- a/courgette/rel32_finder_win32_x86.h +++ b/courgette/rel32_finder_win32_x86.h @@ -23,6 +23,7 @@ class Rel32FinderWin32X86 { RVA image_end_rva); virtual ~Rel32FinderWin32X86(); + // Subsumes rva != kUnassignedRVA. bool IsValidRVA(RVA rva) const { return rva < image_end_rva_; } // Swaps data in |rel32_locations_| to |dest|. |