summaryrefslogtreecommitdiffstats
path: root/sdch/open-vcdiff/src/vcdiffengine_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sdch/open-vcdiff/src/vcdiffengine_test.cc')
-rw-r--r--sdch/open-vcdiff/src/vcdiffengine_test.cc947
1 files changed, 947 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/vcdiffengine_test.cc b/sdch/open-vcdiff/src/vcdiffengine_test.cc
new file mode 100644
index 0000000..94a36b5
--- /dev/null
+++ b/sdch/open-vcdiff/src/vcdiffengine_test.cc
@@ -0,0 +1,947 @@
+// Copyright 2008 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <config.h>
+#include "vcdiffengine.h"
+#include <string.h> // memset, strlen
+#include <algorithm>
+#include <string>
+#include <vector>
+#include "addrcache.h"
+#include "blockhash.h"
+#include "encodetable.h"
+#include "google/output_string.h"
+#include "rolling_hash.h"
+#include "testing.h"
+#include "varint_bigendian.h"
+#include "vcdiff_defs.h"
+
+namespace open_vcdiff {
+
+namespace {
+
+class VCDiffEngineTestBase : public testing::Test {
+ protected:
+ typedef std::string string;
+
+ // Some common definitions and helper functions used in the various tests
+ // for VCDiffEngine.
+ static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
+
+ VCDiffEngineTestBase() : interleaved_(false),
+ diff_output_string_(&diff_),
+ verify_position_(0),
+ saved_total_size_position_(0),
+ saved_delta_encoding_position_(0),
+ saved_section_sizes_position_(0),
+ data_bytes_(0),
+ instruction_bytes_(0),
+ address_bytes_(0) { }
+
+ virtual ~VCDiffEngineTestBase() { }
+
+ virtual void TearDown() {
+ VerifyMatchCounts();
+ }
+
+ // Copy string_without_spaces into newly allocated result buffer,
+ // but pad its contents with space characters so that every character
+ // in string_without_spaces corresponds to (block_size - 1)
+ // spaces in the result, followed by that character.
+ // For example:
+ // If string_without_spaces begins "The only thing"... and block_size is 4,
+ // then 3 space characters will be inserted
+ // between each letter in the result, as follows:
+ // " T h e o n l y t h i n g"...
+ // This makes testing simpler, because finding a block_size-byte match
+ // between the dictionary and target only depends on the
+ // trailing letter in each block.
+ // If no_initial_padding is true, then the first letter will not have
+ // spaces added in front of it.
+ static void MakeEachLetterABlock(const char* string_without_spaces,
+ const char** result,
+ int block_size,
+ bool no_initial_padding) {
+ const size_t length_without_spaces = strlen(string_without_spaces);
+ char* padded_text = new char[(block_size * length_without_spaces) + 1];
+ memset(padded_text, ' ', block_size * length_without_spaces);
+ char* padded_text_ptr = padded_text;
+ if (!no_initial_padding) {
+ padded_text_ptr += block_size - 1;
+ }
+ for (size_t i = 0; i < length_without_spaces; ++i) {
+ *padded_text_ptr = string_without_spaces[i];
+ padded_text_ptr += block_size;
+ }
+ *(padded_text_ptr - block_size + 1) = '\0';
+ *result = padded_text;
+ }
+
+ // These functions iterate through the decoded output and expect
+ // simple elements: bytes or variable-length integers.
+ void ExpectByte(char byte) {
+ EXPECT_GT(diff_.size(), verify_position_);
+ EXPECT_EQ(byte, diff_[verify_position_]);
+ ++verify_position_;
+ }
+
+ size_t ExpectVarint(int32_t expected_value) {
+ EXPECT_GT(diff_.size(), verify_position_);
+ const char* const original_position = &diff_[verify_position_];
+ const char* new_position = original_position;
+ const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
+ int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
+ &new_position);
+ EXPECT_LE(0, parsed_value);
+ size_t parsed_length = new_position - original_position;
+ EXPECT_EQ(expected_value, parsed_value);
+ EXPECT_EQ(expected_length, parsed_length);
+ verify_position_ += parsed_length;
+ return parsed_length;
+ }
+
+ size_t ExpectSize(size_t size) {
+ return ExpectVarint(static_cast<int32_t>(size));
+ }
+
+ size_t ExpectStringLength(const char* s) {
+ return ExpectSize(strlen(s));
+ }
+
+ void SkipVarint() {
+ EXPECT_GT(diff_.size(), verify_position_);
+ const char* const original_position = &diff_[verify_position_];
+ const char* new_position = original_position;
+ VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
+ size_t parsed_length = new_position - original_position;
+ verify_position_ += parsed_length;
+ }
+
+ void ExpectDataByte(char byte) {
+ ExpectByte(byte);
+ if (interleaved_) {
+ ++instruction_bytes_;
+ } else {
+ ++data_bytes_;
+ }
+ }
+
+ void ExpectDataString(const char *expected_string) {
+ const char* ptr = expected_string;
+ while (*ptr) {
+ ExpectDataByte(*ptr);
+ ++ptr;
+ }
+ }
+
+ void ExpectDataStringWithBlockSpacing(const char *expected_string,
+ bool trailing_spaces) {
+ const char* ptr = expected_string;
+ while (*ptr) {
+ for (int i = 0; i < (kBlockSize - 1); ++i) {
+ ExpectDataByte(' ');
+ }
+ ExpectDataByte(*ptr);
+ ++ptr;
+ }
+ if (trailing_spaces) {
+ for (int i = 0; i < (kBlockSize - 1); ++i) {
+ ExpectDataByte(' ');
+ }
+ }
+ }
+
+ void ExpectInstructionByte(char byte) {
+ ExpectByte(byte);
+ ++instruction_bytes_;
+ }
+
+ void ExpectInstructionVarint(int32_t value) {
+ instruction_bytes_ += ExpectVarint(value);
+ }
+
+ void ExpectAddressByte(char byte) {
+ ExpectByte(byte);
+ if (interleaved_) {
+ ++instruction_bytes_;
+ } else {
+ ++address_bytes_;
+ }
+ }
+
+ void ExpectAddressVarint(int32_t value) {
+ if (interleaved_) {
+ instruction_bytes_ += ExpectVarint(value);
+ } else {
+ address_bytes_ += ExpectVarint(value);
+ }
+ }
+
+ // The following functions leverage the fact that the encoder uses
+ // the default code table and cache sizes. They are able to search for
+ // instructions of a particular size. The logic for mapping from
+ // instruction type, mode, and size to opcode value is very different here
+ // from the logic used in encodetable.{h,cc}, so hopefully
+ // this version will help validate that the other is correct.
+ // This version uses conditional statements, while encodetable.h
+ // looks up values in a mapping table.
+ void ExpectAddress(int32_t address, int copy_mode) {
+ if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
+ ExpectAddressVarint(address);
+ } else {
+ ExpectAddressByte(address);
+ }
+ }
+
+ void ExpectAddInstruction(int size) {
+ if (size <= 18) {
+ ExpectInstructionByte(0x01 + size);
+ } else {
+ ExpectInstructionByte(0x01);
+ ExpectInstructionVarint(size);
+ }
+ }
+
+ void ExpectCopyInstruction(int size, int mode) {
+ if ((size >= 4) && (size <= 16)) {
+ ExpectInstructionByte(0x10 + (0x10 * mode) + size);
+ } else {
+ ExpectInstructionByte(0x13 + (0x10 * mode));
+ ExpectInstructionVarint(size);
+ }
+ ExpectMatch(size);
+ }
+
+ bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
+ if (!default_cache_.IsSameMode(copy_mode) &&
+ (add_size <= 4) &&
+ (copy_size >= 4) &&
+ (copy_size <= 6)) {
+ ExpectInstructionByte(0x9C +
+ (0x0C * copy_mode) +
+ (0x03 * add_size) +
+ copy_size);
+ ExpectMatch(copy_size);
+ return true;
+ } else if (default_cache_.IsSameMode(copy_mode) &&
+ (add_size <= 4) &&
+ (copy_size == 4)) {
+ ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
+ ExpectMatch(copy_size);
+ return true;
+ } else {
+ ExpectAddInstruction(add_size);
+ return false;
+ }
+ }
+
+ void ExpectAddInstructionForStringLength(const char* s) {
+ ExpectAddInstruction(static_cast<int>(strlen(s)));
+ }
+
+ // Call this function before beginning to iterate through the diff string
+ // using the Expect... functions.
+ // text must be NULL-terminated.
+ void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
+ const char* target_text) {
+ ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary)
+ ExpectStringLength(dictionary);
+ ExpectByte(0x00); // Source segment position: start of dictionary
+ saved_total_size_position_ = verify_position_;
+ SkipVarint(); // Length of the delta encoding
+ saved_delta_encoding_position_ = verify_position_;
+ ExpectStringLength(target_text);
+ ExpectByte(0x00); // Delta_indicator (no compression)
+ saved_section_sizes_position_ = verify_position_;
+ SkipVarint(); // length of data for ADDs and RUNs
+ SkipVarint(); // length of instructions section
+ SkipVarint(); // length of addresses for COPYs
+ }
+
+ // Call this function before beginning to iterating through the entire
+ // diff string using the Expect... functions. It makes sure that the
+ // size totals in the window header match the number of bytes that
+ // were parsed.
+ void VerifySizes() {
+ EXPECT_EQ(verify_position_, diff_.size());
+ const size_t delta_encoding_size = verify_position_ -
+ saved_delta_encoding_position_;
+ verify_position_ = saved_total_size_position_;
+ ExpectSize(delta_encoding_size);
+ verify_position_ = saved_section_sizes_position_;
+ ExpectSize(data_bytes_);
+ ExpectSize(instruction_bytes_);
+ ExpectSize(address_bytes_);
+ }
+
+ void ExpectMatch(size_t match_size) {
+ if (match_size >= expected_match_counts_.size()) {
+ // Be generous to avoid resizing again
+ expected_match_counts_.resize(match_size * 2, 0);
+ }
+ ++expected_match_counts_[match_size];
+ }
+
+ void VerifyMatchCounts() {
+ EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
+ expected_match_counts_.end(),
+ actual_match_counts_.begin()));
+ }
+
+ bool interleaved_;
+ string diff_;
+ OutputString<string> diff_output_string_;
+ size_t verify_position_;
+ size_t saved_total_size_position_;
+ size_t saved_delta_encoding_position_;
+ size_t saved_section_sizes_position_;
+ size_t data_bytes_;
+ size_t instruction_bytes_;
+ size_t address_bytes_;
+ VCDiffAddressCache default_cache_; // Used for finding mode values
+ std::vector<int> expected_match_counts_;
+ std::vector<int> actual_match_counts_;
+};
+
+class VCDiffEngineTest : public VCDiffEngineTestBase {
+ protected:
+ VCDiffEngineTest() :
+ engine_(dictionary_, strlen(dictionary_)) {
+ EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
+ }
+
+ virtual ~VCDiffEngineTest() { }
+
+
+ static void SetUpTestCase() {
+ MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
+ kBlockSize, false);
+ MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
+ }
+
+ static void TearDownTestCase() {
+ delete[] dictionary_;
+ delete[] target_;
+ }
+
+ void EncodeNothing(bool interleaved, bool target_matching) {
+ interleaved_ = interleaved;
+ VCDiffCodeTableWriter coder(interleaved);
+ coder.Init(engine_.dictionary_size());
+ engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
+ EXPECT_TRUE(diff_.empty());
+ actual_match_counts_ = coder.match_counts();
+ }
+
+ // text must be NULL-terminated
+ void EncodeText(const char* text, bool interleaved, bool target_matching) {
+ interleaved_ = interleaved;
+ VCDiffCodeTableWriter coder(interleaved);
+ coder.Init(engine_.dictionary_size());
+ engine_.Encode(text,
+ strlen(text),
+ target_matching,
+ &diff_output_string_,
+ &coder);
+ actual_match_counts_ = coder.match_counts();
+ }
+
+ void Encode(bool interleaved, bool target_matching) {
+ EncodeText(target_, interleaved, target_matching);
+ VerifyHeader();
+ }
+
+ void VerifyHeader() {
+ VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
+ }
+
+ static const char dictionary_without_spaces_[];
+ static const char target_without_spaces_[];
+
+ static const char* dictionary_;
+ static const char* target_;
+
+ const VCDiffEngine engine_;
+};
+
+#ifdef GTEST_HAS_DEATH_TEST
+typedef VCDiffEngineTest VCDiffEngineDeathTest;
+#endif // GTEST_HAS_DEATH_TEST
+
+const char VCDiffEngineTest::dictionary_without_spaces_[] =
+ "The only thing we have to fear is fear itself";
+
+const char VCDiffEngineTest::target_without_spaces_[] =
+ "What we hear is fearsome";
+
+const char* VCDiffEngineTest::dictionary_ = NULL;
+const char* VCDiffEngineTest::target_ = NULL;
+
+#ifdef GTEST_HAS_DEATH_TEST
+TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
+ EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
+ "twice");
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
+ EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
+ EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
+ EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
+ EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
+ const char* small_text = " ";
+ EncodeText(small_text,
+ /* interleaved = */ false,
+ /* target matching = */ false);
+ VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
+ // Data for ADDs
+ ExpectDataString(small_text);
+ // Instructions and sizes
+ ExpectAddInstructionForStringLength(small_text);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
+ const char* small_text = " ";
+ EncodeText(small_text,
+ /* interleaved = */ true,
+ /* target matching = */ false);
+ VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
+ // Interleaved section
+ ExpectAddInstructionForStringLength(small_text);
+ ExpectDataString(small_text);
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
+ Encode(/* interleaved = */ false, /* target matching = */ false);
+ // Data for ADDs
+ ExpectDataStringWithBlockSpacing("W", false);
+ ExpectDataByte('t');
+ ExpectDataByte('s');
+ ExpectDataByte('m');
+ // Instructions and sizes
+ if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
+ VCD_SELF_MODE)) {
+ ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
+ }
+ ExpectAddInstruction(1);
+ ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
+ ExpectCopyInstruction(11 * kBlockSize,
+ default_cache_.FirstNearMode());
+ if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
+ ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
+ }
+ if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
+ ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
+ }
+ // Addresses for COPY
+ ExpectAddressVarint(18 * kBlockSize); // "ha"
+ ExpectAddressVarint(14 * kBlockSize); // " we h"
+ ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
+ ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
+ ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
+ VerifySizes();
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
+ Encode(/* interleaved = */ true, /* target matching = */ false);
+ // Interleaved section
+ if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
+ VCD_SELF_MODE)) {
+ ExpectDataStringWithBlockSpacing("W", false);
+ ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
+ } else {
+ ExpectDataStringWithBlockSpacing("W", false);
+ }
+ ExpectAddressVarint(18 * kBlockSize); // "ha"
+ ExpectAddInstruction(1);
+ ExpectDataByte('t');
+ ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
+ ExpectAddressVarint(14 * kBlockSize); // " we h"
+ ExpectCopyInstruction(11 * kBlockSize,
+ default_cache_.FirstNearMode());
+ ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
+ if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
+ ExpectDataByte('s');
+ ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
+ } else {
+ ExpectDataByte('s');
+ }
+ ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
+ if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
+ ExpectDataByte('m');
+ ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
+ } else {
+ ExpectDataByte('m');
+ }
+ ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
+ VerifySizes();
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
+ Encode(/* interleaved = */ false, /* target matching = */ true);
+ // Data for ADDs
+ ExpectDataStringWithBlockSpacing("W", false);
+ ExpectDataByte('t');
+ ExpectDataByte('s');
+ ExpectDataByte('m');
+ // Instructions and sizes
+ if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
+ VCD_SELF_MODE)) {
+ ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
+ }
+ ExpectAddInstruction(1);
+ ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
+ ExpectCopyInstruction(11 * kBlockSize,
+ default_cache_.FirstNearMode());
+ if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
+ ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
+ }
+ if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
+ ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
+ }
+ // Addresses for COPY
+ ExpectAddressVarint(18 * kBlockSize); // "ha"
+ ExpectAddressVarint(14 * kBlockSize); // " we h"
+ ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
+ ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
+ ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
+ VerifySizes();
+}
+
+TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
+ Encode(/* interleaved = */ true, /* target matching = */ false);
+ // Interleaved section
+ if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
+ VCD_SELF_MODE)) {
+ ExpectDataStringWithBlockSpacing("W", false);
+ ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
+ } else {
+ ExpectDataStringWithBlockSpacing("W", false);
+ }
+ ExpectAddressVarint(18 * kBlockSize); // "ha"
+ ExpectAddInstruction(1);
+ ExpectDataByte('t');
+ ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
+ ExpectAddressVarint(14 * kBlockSize); // " we h"
+ ExpectCopyInstruction(11 * kBlockSize,
+ default_cache_.FirstNearMode());
+ ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
+ if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
+ ExpectDataByte('s');
+ ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
+ } else {
+ ExpectDataByte('s');
+ }
+ ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
+ if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
+ ExpectDataByte('m');
+ ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
+ } else {
+ ExpectDataByte('m');
+ }
+ ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
+ VerifySizes();
+}
+
+// This test case takes a dictionary containing several instances of the string
+// "weasel", and a target string which is identical to the dictionary
+// except that all instances of "weasel" have been replaced with the string
+// "moon-pie". It tests that COPY instructions are generated for all
+// boilerplate text (that is, the text between the "moon-pie" instances in
+// the target) and, if target matching is enabled, that each instance of
+// "moon-pie" (except the first one) is encoded using a COPY instruction
+// rather than an ADD.
+class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
+ protected:
+ // kCompressibleTestBlockSize:
+ // The size of the block to create for each letter in the
+ // dictionary and search string for the "compressible text" test.
+ // See MakeEachLetterABlock, below.
+ // If we use kCompressibleTestBlockSize = kBlockSize, then the
+ // encoder will find one match per unique letter in the HTML text.
+ // There are too many examples of "<" in the text for the encoder
+ // to iterate through them all, and some matches are not found.
+ // If we use kCompressibleTextBlockSize = 1, then the boilerplate
+ // text between "weasel" strings in the dictionary and "moon-pie"
+ // strings in the target may not be long enough to be found by
+ // the encoder's block-hash algorithm. A good value, that will give
+ // reproducible results across all block sizes, will be somewhere
+ // in between these extremes.
+ static const int kCompressibleTestBlockSize = kBlockSize / 4;
+ static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
+
+ WeaselsToMoonpiesTest() :
+ engine_(dictionary_, strlen(dictionary_)),
+ match_index_(0),
+ search_dictionary_(dictionary_, strlen(dictionary_)),
+ copied_moonpie_address_(0) {
+ EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
+ weasel_positions_[0] = 0;
+ after_weasel_[0] = 0;
+ moonpie_positions_[0] = 0;
+ after_moonpie_[0] = 0;
+ }
+
+ virtual ~WeaselsToMoonpiesTest() { }
+
+ static void SetUpTestCase() {
+ MakeEachLetterABlock(dictionary_without_spaces_,
+ &dictionary_,
+ kCompressibleTestBlockSize,
+ false);
+ MakeEachLetterABlock(target_without_spaces_,
+ &target_,
+ kCompressibleTestBlockSize,
+ false);
+ MakeEachLetterABlock(weasel_text_without_spaces_,
+ &weasel_text_,
+ kCompressibleTestBlockSize,
+ true);
+ MakeEachLetterABlock(moonpie_text_without_spaces_,
+ &moonpie_text_,
+ kCompressibleTestBlockSize,
+ true);
+ }
+
+ static void TearDownTestCase() {
+ delete[] dictionary_;
+ delete[] target_;
+ delete[] weasel_text_;
+ delete[] moonpie_text_;
+ }
+
+ // text must be NULL-terminated
+ void EncodeText(const char* text, bool interleaved, bool target_matching) {
+ interleaved_ = interleaved;
+ VCDiffCodeTableWriter coder(interleaved);
+ coder.Init(engine_.dictionary_size());
+ engine_.Encode(text,
+ strlen(text),
+ target_matching,
+ &diff_output_string_,
+ &coder);
+ actual_match_counts_ = coder.match_counts();
+ }
+
+ void Encode(bool interleaved, bool target_matching) {
+ EncodeText(target_, interleaved, target_matching);
+ VerifyHeader();
+ }
+
+ void VerifyHeader() {
+ VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
+ }
+
+ void ExpectCopyForSize(size_t size, int mode) {
+ ExpectCopyInstruction(static_cast<int>(size), mode);
+ }
+
+ void ExpectAddForSize(size_t size) {
+ ExpectAddInstruction(static_cast<int>(size));
+ }
+
+ void ExpectAddressVarintForSize(size_t value) {
+ ExpectAddressVarint(static_cast<int32_t>(value));
+ }
+
+ void FindNextMoonpie(bool include_trailing_spaces) {
+ ++match_index_;
+ SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
+ AfterLastWeasel()));
+ if (CurrentWeaselPosition() == string::npos) {
+ SetCurrentMoonpiePosition(string::npos);
+ } else {
+ SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
+ + strlen(weasel_text_)
+ + (include_trailing_spaces ?
+ kTrailingSpaces : 0));
+ SetCurrentMoonpiePosition(AfterLastMoonpie()
+ + CurrentBoilerplateLength());
+ SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
+ + strlen(moonpie_text_)
+ + (include_trailing_spaces ?
+ kTrailingSpaces : 0));
+ }
+ }
+ bool NoMoreMoonpies() const {
+ return CurrentMoonpiePosition() == string::npos;
+ }
+ size_t CurrentWeaselPosition() const {
+ return weasel_positions_[match_index_];
+ }
+ size_t LastWeaselPosition() const {
+ return weasel_positions_[match_index_ - 1];
+ }
+ size_t CurrentMoonpiePosition() const {
+ return moonpie_positions_[match_index_];
+ }
+ size_t LastMoonpiePosition() const {
+ return moonpie_positions_[match_index_ - 1];
+ }
+ size_t AfterLastWeasel() const {
+ CHECK_GE(match_index_, 1);
+ return after_weasel_[match_index_ - 1];
+ }
+ size_t AfterPreviousWeasel() const {
+ CHECK_GE(match_index_, 2);
+ return after_weasel_[match_index_ - 2];
+ }
+ size_t AfterLastMoonpie() const {
+ CHECK_GE(match_index_, 1);
+ return after_moonpie_[match_index_ - 1];
+ }
+ size_t AfterPreviousMoonpie() const {
+ CHECK_GE(match_index_, 2);
+ return after_moonpie_[match_index_ - 2];
+ }
+
+ void SetCurrentWeaselPosition(size_t value) {
+ weasel_positions_[match_index_] = value;
+ }
+ void SetCurrentAfterWeaselPosition(size_t value) {
+ after_weasel_[match_index_] = value;
+ }
+ void SetCurrentMoonpiePosition(size_t value) {
+ moonpie_positions_[match_index_] = value;
+ }
+ void SetCurrentAfterMoonpiePosition(size_t value) {
+ after_moonpie_[match_index_] = value;
+ }
+
+ // Find the length of the text in between the "weasel" strings in the
+ // compressible dictionary, which is the same as the text between
+ // the "moon-pie" strings in the compressible target.
+ size_t CurrentBoilerplateLength() const {
+ CHECK_GE(match_index_, 1);
+ return CurrentWeaselPosition() - AfterLastWeasel();
+ }
+ size_t DistanceFromLastWeasel() const {
+ CHECK_GE(match_index_, 1);
+ return CurrentWeaselPosition() - LastWeaselPosition();
+ }
+ size_t DistanceFromLastMoonpie() const {
+ CHECK_GE(match_index_, 1);
+ return CurrentMoonpiePosition() - LastMoonpiePosition();
+ }
+ size_t DistanceBetweenLastTwoWeasels() const {
+ CHECK_GE(match_index_, 2);
+ return AfterLastWeasel() - AfterPreviousWeasel();
+ }
+ size_t DistanceBetweenLastTwoMoonpies() const {
+ CHECK_GE(match_index_, 2);
+ return AfterLastMoonpie() - AfterPreviousMoonpie();
+ }
+
+ int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
+ int UpdateCopyModeForMoonpie(int copy_mode) const;
+ int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
+
+ void CopyBoilerplateAndAddMoonpie(int copy_mode);
+ void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
+
+ static const char dictionary_without_spaces_[];
+ static const char target_without_spaces_[];
+ static const char weasel_text_without_spaces_[];
+ static const char moonpie_text_without_spaces_[];
+
+ static const char* dictionary_;
+ static const char* target_;
+ static const char* weasel_text_;
+ static const char* moonpie_text_;
+
+ const VCDiffEngine engine_;
+ size_t weasel_positions_[128];
+ size_t after_weasel_[128];
+ size_t moonpie_positions_[128];
+ size_t after_moonpie_[128];
+ int match_index_;
+ string search_dictionary_;
+ size_t copied_moonpie_address_;
+};
+
+// Care is taken in the formulation of the dictionary
+// to ensure that the surrounding letters do not match; for example,
+// there are not two instances of the string "weasels". Otherwise,
+// the matching behavior would not be as predictable.
+const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
+ "<html>\n"
+ "<head>\n"
+ "<meta content=\"text/html; charset=ISO-8859-1\"\n"
+ "http-equiv=\"content-type\">\n"
+ "<title>All about weasels</title>\n"
+ "</head>\n"
+ "<!-- You will notice that the word \"weasel\" may be replaced"
+ " with something else -->\n"
+ "<body>\n"
+ "<h1>All about the weasel: highly compressible HTML text</h1>"
+ "<ul>\n"
+ "<li>Don\'t look a gift weasel in its mouth.</li>\n"
+ "<li>This item makes sure the next occurrence is found.</li>\n"
+ "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
+ "</ul>\n"
+ "<br>\n"
+ "</body>\n"
+ "</html>\n";
+
+const char WeaselsToMoonpiesTest::target_without_spaces_[] =
+ "<html>\n"
+ "<head>\n"
+ "<meta content=\"text/html; charset=ISO-8859-1\"\n"
+ "http-equiv=\"content-type\">\n"
+ "<title>All about moon-pies</title>\n"
+ "</head>\n"
+ "<!-- You will notice that the word \"moon-pie\" may be replaced"
+ " with something else -->\n"
+ "<body>\n"
+ "<h1>All about the moon-pie: highly compressible HTML text</h1>"
+ "<ul>\n"
+ "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
+ "<li>This item makes sure the next occurrence is found.</li>\n"
+ "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
+ "</ul>\n"
+ "<br>\n"
+ "</body>\n"
+ "</html>\n";
+
+const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
+const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
+
+const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
+const char* WeaselsToMoonpiesTest::target_ = NULL;
+const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
+const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
+
+int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
+ int copy_mode) const {
+ size_t copy_address = 0;
+ if (default_cache_.IsSelfMode(copy_mode)) {
+ copy_address = AfterLastWeasel();
+ } else if (default_cache_.IsNearMode(copy_mode)) {
+ copy_address = DistanceBetweenLastTwoWeasels();
+ } else if (default_cache_.IsSameMode(copy_mode)) {
+ copy_address = AfterLastWeasel() % 256;
+ }
+ return static_cast<int32_t>(copy_address);
+}
+
+int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
+ if (copy_mode == default_cache_.FirstSameMode()) {
+ return default_cache_.FirstSameMode()
+ + static_cast<int>((copied_moonpie_address_ / 256) % 3);
+ } else {
+ return copy_mode;
+ }
+}
+
+int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
+ int copy_mode) const {
+ size_t copy_address = 0;
+ if (default_cache_.IsHereMode(copy_mode)) {
+ copy_address = DistanceFromLastMoonpie();
+ } else if (default_cache_.IsNearMode(copy_mode)) {
+ copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
+ } else if (default_cache_.IsSameMode(copy_mode)) {
+ copy_address = copied_moonpie_address_ % 256;
+ }
+ return static_cast<int32_t>(copy_address);
+}
+
+// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
+// in the encoding.
+void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
+ EXPECT_FALSE(NoMoreMoonpies());
+ ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
+ ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
+ ExpectAddInstructionForStringLength(moonpie_text_);
+ ExpectDataString(moonpie_text_);
+}
+
+// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
+// in the encoding. The "moon-pie" text will be copied from the previously
+// encoded target.
+void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
+ int copy_mode,
+ int moonpie_copy_mode) {
+ EXPECT_FALSE(NoMoreMoonpies());
+ ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
+ ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
+ moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
+ ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
+ ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
+ moonpie_copy_mode);
+ copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
+}
+
+TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
+ Encode(/* interleaved = */ true, /* target matching = */ false);
+ FindNextMoonpie(false);
+ // Expect all five "weasel"s to be replaced with "moon-pie"s
+ CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
+ FindNextMoonpie(false);
+ CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
+ FindNextMoonpie(false);
+ CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
+ FindNextMoonpie(false);
+ CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
+ FindNextMoonpie(false);
+ CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
+ FindNextMoonpie(false);
+ EXPECT_TRUE(NoMoreMoonpies());
+ ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
+ default_cache_.FirstNearMode());
+ ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
+ VerifySizes();
+}
+
+TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
+ Encode(/* interleaved = */ true, /* target matching = */ true);
+ // Expect all five "weasel"s to be replaced with "moon-pie"s.
+ // Every "moon-pie" after the first one should be copied from the
+ // previously encoded target text.
+ FindNextMoonpie(false);
+ CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
+ FindNextMoonpie(true);
+ CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
+ FindNextMoonpie(true);
+ CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
+ default_cache_.FirstSameMode());
+ FindNextMoonpie(true);
+ CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
+ VCD_HERE_MODE);
+ FindNextMoonpie(true);
+ CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
+ default_cache_.FirstSameMode());
+ FindNextMoonpie(true);
+ EXPECT_TRUE(NoMoreMoonpies());
+ ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
+ default_cache_.FirstNearMode() + 3);
+ ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
+ VerifySizes();
+}
+
+} // anonymous namespace
+} // namespace open-vcdiff