From 809972414ab2f11a33b8f286cc1473bb33a3b8e3 Mon Sep 17 00:00:00 2001 From: huangs Date: Wed, 9 Sep 2015 16:37:02 -0700 Subject: [Courgette] Store delta encoding as signed int32. Owing to AdjustmentMethod permuting Rel32Address items, difference of successive items may be negative. Previously Courgette stores deltas vectors using uint32, so negative values can take 5 bytes each. This CL changes storage of deltas vectors to signed int32. Experiment (goo.gl/vvVADx) shows total diff file reduction of ~3%, and zipped diff file reduction of ~1.5%. We apply the same change for Abs32Address items for consistency. Review URL: https://codereview.chromium.org/1328703002 Cr-Commit-Position: refs/heads/master@{#348039} --- courgette/encode_decode_unittest.cc | 8 +- courgette/encoded_program.cc | 47 ++++-------- courgette/encoded_program.h | 13 ++++ courgette/encoded_program_unittest.cc | 139 +++++++++++++++++++++++++++++----- 4 files changed, 152 insertions(+), 55 deletions(-) diff --git a/courgette/encode_decode_unittest.cc b/courgette/encode_decode_unittest.cc index 79cc822..2cf5cd4 100644 --- a/courgette/encode_decode_unittest.cc +++ b/courgette/encode_decode_unittest.cc @@ -70,20 +70,20 @@ void EncodeDecodeTest::TestAssembleToStreamDisassemble( TEST_F(EncodeDecodeTest, PE) { std::string file = FileContents("setup1.exe"); - TestAssembleToStreamDisassemble(file, 971851); + TestAssembleToStreamDisassemble(file, 972845); } TEST_F(EncodeDecodeTest, PE64) { std::string file = FileContents("chrome64_1.exe"); - TestAssembleToStreamDisassemble(file, 808845); + TestAssembleToStreamDisassemble(file, 809635); } TEST_F(EncodeDecodeTest, Elf_Small) { std::string file = FileContents("elf-32-1"); - TestAssembleToStreamDisassemble(file, 135989); + TestAssembleToStreamDisassemble(file, 136218); } TEST_F(EncodeDecodeTest, Elf_HighBSS) { std::string file = FileContents("elf-32-high-bss"); - TestAssembleToStreamDisassemble(file, 7309); + TestAssembleToStreamDisassemble(file, 7312); } diff --git a/courgette/encoded_program.cc b/courgette/encoded_program.cc index 6d6e416..0253894 100644 --- a/courgette/encoded_program.cc +++ b/courgette/encoded_program.cc @@ -23,19 +23,6 @@ namespace courgette { -// Stream indexes. -const int kStreamMisc = 0; -const int kStreamOps = 1; -const int kStreamBytes = 2; -const int kStreamAbs32Indexes = 3; -const int kStreamRel32Indexes = 4; -const int kStreamAbs32Addresses = 5; -const int kStreamRel32Addresses = 6; -const int kStreamCopyCounts = 7; -const int kStreamOriginAddresses = kStreamMisc; - -const int kStreamLimit = 9; - // Constructor is here rather than in the header. Although the constructor // appears to do nothing it is fact quite large because of the implicit calls to // field constructors. Ditto for the destructor. @@ -72,23 +59,23 @@ bool ReadVector(V* items, SourceStream* buffer) { return ok; } -// Serializes a vector, using delta coding followed by Varint32 coding. +// Serializes a vector, using delta coding followed by Varint32Signed coding. template -CheckBool WriteU32Delta(const V& set, SinkStream* buffer) { +CheckBool WriteSigned32Delta(const V& set, SinkStream* buffer) { size_t count = set.size(); bool ok = buffer->WriteSizeVarint32(count); uint32 prev = 0; - for (size_t i = 0; ok && i < count; ++i) { + for (size_t i = 0; ok && i < count; ++i) { uint32 current = set[i]; - uint32 delta = current - prev; - ok = buffer->WriteVarint32(delta); + int32 delta = current - prev; + ok = buffer->WriteVarint32Signed(delta); prev = current; } return ok; } template -static CheckBool ReadU32Delta(V* set, SourceStream* buffer) { +static CheckBool ReadSigned32Delta(V* set, SourceStream* buffer) { uint32 count; if (!buffer->ReadVarint32(&count)) @@ -97,17 +84,15 @@ static CheckBool ReadU32Delta(V* set, SourceStream* buffer) { set->clear(); bool ok = set->reserve(count); uint32 prev = 0; - - for (size_t i = 0; ok && i < count; ++i) { - uint32 delta; - ok = buffer->ReadVarint32(&delta); + for (size_t i = 0; ok && i < count; ++i) { + int32 delta; + ok = buffer->ReadVarint32Signed(&delta); if (ok) { - uint32 current = prev + delta; + uint32 current = static_cast(prev + delta); ok = set->push_back(current); prev = current; } } - return ok; } @@ -333,13 +318,13 @@ CheckBool EncodedProgram::WriteTo(SinkStreamSet* streams) { bool success = true; if (select & INCLUDE_ABS32_ADDRESSES) { - success &= WriteU32Delta(abs32_rva_, - streams->stream(kStreamAbs32Addresses)); + success &= WriteSigned32Delta(abs32_rva_, + streams->stream(kStreamAbs32Addresses)); } if (select & INCLUDE_REL32_ADDRESSES) { - success &= WriteU32Delta(rel32_rva_, - streams->stream(kStreamRel32Addresses)); + success &= WriteSigned32Delta(rel32_rva_, + streams->stream(kStreamRel32Addresses)); } if (select & INCLUDE_MISC) @@ -376,9 +361,9 @@ bool EncodedProgram::ReadFrom(SourceStreamSet* streams) { } image_base_ = (static_cast(high) << 32) | low; - if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses))) + if (!ReadSigned32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses))) return false; - if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses))) + if (!ReadSigned32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses))) return false; if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses))) return false; diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h index e243986..641f523 100644 --- a/courgette/encoded_program.h +++ b/courgette/encoded_program.h @@ -14,6 +14,19 @@ namespace courgette { +// Stream indexes. +const int kStreamMisc = 0; +const int kStreamOps = 1; +const int kStreamBytes = 2; +const int kStreamAbs32Indexes = 3; +const int kStreamRel32Indexes = 4; +const int kStreamAbs32Addresses = 5; +const int kStreamRel32Addresses = 6; +const int kStreamCopyCounts = 7; +const int kStreamOriginAddresses = kStreamMisc; + +const int kStreamLimit = 9; + class SinkStream; class SinkStreamSet; class SourceStreamSet; diff --git a/courgette/encoded_program_unittest.cc b/courgette/encoded_program_unittest.cc index eeeca59..a2ce9b6 100644 --- a/courgette/encoded_program_unittest.cc +++ b/courgette/encoded_program_unittest.cc @@ -4,32 +4,78 @@ #include "courgette/encoded_program.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "courgette/disassembler.h" #include "courgette/streams.h" #include "testing/gtest/include/gtest/gtest.h" -TEST(EncodedProgramTest, Test) { - // - // Create a simple program with a few addresses and references and - // check that the bits produced are as expected. - // - courgette::EncodedProgram* program = new courgette::EncodedProgram(); +namespace { + +using courgette::EncodedProgram; + +struct AddressSpec { + int32 index; + courgette::RVA rva; +}; + +// Creates a simple new program with given addresses. The orders of elements +// in |abs32_specs| and |rel32_specs| are important. +scoped_ptr CreateTestProgram(AddressSpec* abs32_specs, + size_t num_abs32_specs, + AddressSpec* rel32_specs, + size_t num_rel32_specs) { + scoped_ptr program(new EncodedProgram()); uint32 base = 0x00900000; program->set_image_base(base); - EXPECT_TRUE(program->DefineRel32Label(5, 0)); // REL32 index 5 == base + 0 - EXPECT_TRUE(program->DefineAbs32Label(7, 4)); // ABS32 index 7 == base + 4 + for (size_t i = 0; i < num_abs32_specs; ++i) { + EXPECT_TRUE(program->DefineAbs32Label(abs32_specs[i].index, + abs32_specs[i].rva)); + } + for (size_t i = 0; i < num_rel32_specs; ++i) { + EXPECT_TRUE(program->DefineRel32Label(rel32_specs[i].index, + rel32_specs[i].rva)); + } program->EndLabels(); EXPECT_TRUE(program->AddOrigin(0)); // Start at base. - EXPECT_TRUE(program->AddAbs32(7)); - EXPECT_TRUE(program->AddRel32(5)); + for (size_t i = 0; i < num_abs32_specs; ++i) + EXPECT_TRUE(program->AddAbs32(abs32_specs[i].index)); + for (size_t i = 0; i < num_rel32_specs; ++i) + EXPECT_TRUE(program->AddRel32(rel32_specs[i].index)); + return program; +} + +bool CompareSink(const uint8 expected[], + size_t num_expected, + courgette::SinkStream* ss) { + size_t n = ss->Length(); + if (num_expected != n) + return false; + const uint8* buffer = ss->Buffer(); + return memcmp(&expected[0], buffer, n) == 0; +} + +} // namespace + +// Create a simple program with a few addresses and references and +// check that the bits produced are as expected. +TEST(EncodedProgramTest, Test) { + // ABS32 index 7 == base + 4. + AddressSpec abs32_specs[] = {{7, 4}}; + // REL32 index 5 == base + 0. + AddressSpec rel32_specs[] = {{5, 0}}; + scoped_ptr program( + CreateTestProgram(abs32_specs, arraysize(abs32_specs), + rel32_specs, arraysize(rel32_specs))); // Serialize and deserialize. courgette::SinkStreamSet sinks; EXPECT_TRUE(program->WriteTo(&sinks)); - delete program; + program.reset(); courgette::SinkStream sink; bool can_collect = sinks.CopyTo(&sink); @@ -42,7 +88,7 @@ TEST(EncodedProgramTest, Test) { bool can_get_source_streams = sources.Init(buffer, length); EXPECT_TRUE(can_get_source_streams); - courgette::EncodedProgram *encoded2 = new courgette::EncodedProgram(); + scoped_ptr encoded2(new EncodedProgram()); bool can_read = encoded2->ReadFrom(&sources); EXPECT_TRUE(can_read); @@ -50,17 +96,70 @@ TEST(EncodedProgramTest, Test) { courgette::SinkStream assembled; bool can_assemble = encoded2->AssembleTo(&assembled); EXPECT_TRUE(can_assemble); - delete encoded2; - - const void* assembled_buffer = assembled.Buffer(); - size_t assembled_length = assembled.Length(); + encoded2.reset(); - EXPECT_EQ(8U, assembled_length); - - static const uint8 golden[] = { + const uint8 golden[] = { 0x04, 0x00, 0x90, 0x00, // ABS32 to base + 4 0xF8, 0xFF, 0xFF, 0xFF // REL32 from next line to base + 2 }; + EXPECT_TRUE(CompareSink(golden, arraysize(golden), &assembled)); +} + +// A larger test with multiple addresses. We encode the program and check the +// contents of the address streams. +TEST(EncodedProgramTest, TestWriteAddress) { + // Absolute addresses by index: [_, _, _, 2, _, 23, _, 11]. + AddressSpec abs32_specs[] = {{7, 11}, {3, 2}, {5, 23}}; + // Relative addresses by index: [16, 7, _, 32]. + AddressSpec rel32_specs[] = {{0, 16}, {3, 32}, {1, 7}}; + scoped_ptr program( + CreateTestProgram(abs32_specs, arraysize(abs32_specs), + rel32_specs, arraysize(rel32_specs))); + + courgette::SinkStreamSet sinks; + EXPECT_TRUE(program->WriteTo(&sinks)); + program.reset(); - EXPECT_EQ(0, memcmp(assembled_buffer, golden, 8)); + // Check addresses in sinks. + const uint8 golden_abs32_indexes[] = { + 0x03, 0x07, 0x03, 0x05 // 3 indexes: [7, 3, 5]. + }; + EXPECT_TRUE(CompareSink(golden_abs32_indexes, + arraysize(golden_abs32_indexes), + sinks.stream(courgette::kStreamAbs32Indexes))); + + const uint8 golden_rel32_indexes[] = { + 0x03, 0x00, 0x03, 0x01 // 3 indexes: [0, 3, 1]. + }; + EXPECT_TRUE(CompareSink(golden_rel32_indexes, + arraysize(golden_rel32_indexes), + sinks.stream(courgette::kStreamRel32Indexes))); + + // Addresses: [_, _, _, 2, _, 23, _, 11]. + // Padded: [0, 0, 0, 2, 2, 23, 23, 11]. + // Delta: [0, 0, 0, 2, 0, 21, 0, -12]. + // Hex: [0, 0, 0, 0x02, 0, 0x15, 0, 0xFFFFFFF4]. + // Complement neg: [0, 0, 0, 0x02, 0, 0x15, 0, (0x0B)]. + // Varint32 Signed: [0, 0, 0, 0x04, 0, 0x2A, 0, 0x17]. + const uint8 golden_abs32_addresses[] = { + 0x08, // 8 address deltas. + 0x00, 0x00, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x17, + }; + EXPECT_TRUE(CompareSink(golden_abs32_addresses, + arraysize(golden_abs32_addresses), + sinks.stream(courgette::kStreamAbs32Addresses))); + + // Addresses: [16, 7, _, 32]. + // Padded: [16, 7, 7, 32]. + // Delta: [16, -9, 0, 25]. + // Hex: [0x10, 0xFFFFFFF7, 0, 0x19]. + // Complement Neg: [0x10, (0x08), 0, 0x19]. + // Varint32 Signed: [0x20, 0x11, 0, 0x32]. + const uint8 golden_rel32_addresses[] = { + 0x04, // 4 address deltas. + 0x20, 0x11, 0x00, 0x32, + }; + EXPECT_TRUE(CompareSink(golden_rel32_addresses, + arraysize(golden_rel32_addresses), + sinks.stream(courgette::kStreamRel32Addresses))); } -- cgit v1.1