summaryrefslogtreecommitdiffstats
path: root/sdch/open-vcdiff/src/google/vcencoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'sdch/open-vcdiff/src/google/vcencoder.h')
-rw-r--r--sdch/open-vcdiff/src/google/vcencoder.h298
1 files changed, 298 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/google/vcencoder.h b/sdch/open-vcdiff/src/google/vcencoder.h
new file mode 100644
index 0000000..64cdb86
--- /dev/null
+++ b/sdch/open-vcdiff/src/google/vcencoder.h
@@ -0,0 +1,298 @@
+// Copyright 2007 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OPEN_VCDIFF_VCENCODER_H_
+#define OPEN_VCDIFF_VCENCODER_H_
+
+#include <stddef.h> // size_t
+#include <vector>
+#include "google/output_string.h"
+
+namespace open_vcdiff {
+
+class VCDiffEngine;
+class VCDiffStreamingEncoderImpl;
+
+// These flags are passed to the constructor of VCDiffStreamingEncoder
+// to determine whether certain open-vcdiff format extensions
+// (which are not part of the RFC 3284 draft standard for VCDIFF)
+// are employed.
+//
+// Because these extensions are not part of the VCDIFF standard, if
+// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller
+// must be certain that the receiver of the data will be using open-vcdiff
+// to decode the delta file, or at least that the receiver can interpret
+// these extensions. The encoder will use an 'S' as the fourth character
+// in the delta file to indicate that non-standard extensions are being used.
+//
+enum VCDiffFormatExtensionFlagValues {
+ // No extensions: the encoded format will conform to the RFC
+ // draft standard for VCDIFF.
+ VCD_STANDARD_FORMAT = 0x00,
+ // If this flag is specified, then the encoder writes each delta file
+ // window by interleaving instructions and sizes with their corresponding
+ // addresses and data, rather than placing these elements
+ // into three separate sections. This facilitates providing partially
+ // decoded results when only a portion of a delta file window is received
+ // (e.g. when HTTP over TCP is used as the transmission protocol.)
+ VCD_FORMAT_INTERLEAVED = 0x01,
+ // If this flag is specified, then an Adler32 checksum
+ // of the target window data is included in the delta window.
+ VCD_FORMAT_CHECKSUM = 0x02
+};
+
+typedef int VCDiffFormatExtensionFlags;
+
+// A HashedDictionary must be constructed from the dictionary data
+// in order to use VCDiffStreamingEncoder. If the same dictionary will
+// be used to perform several encoding operations, then the caller should
+// create the HashedDictionary once and cache it for reuse. This object
+// is thread-safe: the same const HashedDictionary can be used
+// by several threads simultaneously, each with its own VCDiffStreamingEncoder.
+//
+// dictionary_contents is copied into the HashedDictionary, so the
+// caller may free that string, if desired, after the constructor returns.
+//
+class HashedDictionary {
+ public:
+ HashedDictionary(const char* dictionary_contents,
+ size_t dictionary_size);
+ ~HashedDictionary();
+
+ // Init() must be called before using the HashedDictionary as an argument
+ // to the VCDiffStreamingEncoder, or for any other purpose except
+ // destruction. It returns true if initialization succeeded, or false
+ // if an error occurred, in which case the caller should destroy the object
+ // without using it.
+ bool Init();
+
+ const VCDiffEngine* engine() const { return engine_; }
+
+ private:
+ const VCDiffEngine* engine_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ HashedDictionary(const HashedDictionary&); // NOLINT
+ void operator=(const HashedDictionary&);
+};
+
+// The standard streaming interface to the VCDIFF (RFC 3284) encoder.
+// "Streaming" in this context means that, even though the entire set of
+// input data to be encoded may not be available at once, the encoder
+// can produce partial output based on what is available. Of course,
+// the caller should try to maximize the sizes of the data chunks passed
+// to the encoder.
+class VCDiffStreamingEncoder {
+ public:
+ // The HashedDictionary object passed to the constructor must remain valid,
+ // without being deleted, for the lifetime of the VCDiffStreamingEncoder
+ // object.
+ //
+ // format_extensions allows certain open-vcdiff extensions to the VCDIFF
+ // format to be included in the encoded output. These extensions are not
+ // part of the RFC 3284 draft standard, so specifying any extension flags
+ // will make the output compatible only with open-vcdiff, or with other
+ // VCDIFF implementations that accept these extensions. See above for an
+ // explanation of each possible flag value.
+ //
+ // *** look_for_target_matches:
+ // The VCDIFF format allows COPY instruction addresses to reference data from
+ // the source (dictionary), or from previously encoded target data.
+ //
+ // If look_for_target_matches is false, then the encoder will only
+ // produce COPY instructions that reference source data from the dictionary,
+ // never from previously encoded target data. This will speed up the encoding
+ // process, but the encoded data will not be as compact.
+ //
+ // If this value is true, then the encoder will produce COPY instructions
+ // that reference either source data or target data. A COPY instruction from
+ // the previously encoded target data may even extend into the range of the
+ // data being produced by that same COPY instruction; for example, if the
+ // previously encoded target data is "LA", then a single COPY instruction of
+ // length 10 can produce the additional target data "LALALALALA".
+ //
+ // There is a third type of COPY instruction that starts within
+ // the source data and extends from the end of the source data
+ // into the beginning of the target data. This VCDIFF encoder will never
+ // produce a COPY instruction of this third type (regardless of the value of
+ // look_for_target_matches) because the cost of checking for matches
+ // across the source-target boundary would not justify its benefits.
+ //
+ VCDiffStreamingEncoder(const HashedDictionary* dictionary,
+ VCDiffFormatExtensionFlags format_extensions,
+ bool look_for_target_matches);
+ ~VCDiffStreamingEncoder();
+
+ // The client should use these routines as follows:
+ // HashedDictionary hd(dictionary, dictionary_size);
+ // if (!hd.Init()) {
+ // HandleError();
+ // return;
+ // }
+ // string output_string;
+ // VCDiffStreamingEncoder v(hd, false, false);
+ // if (!v.StartEncoding(&output_string)) {
+ // HandleError();
+ // return; // No need to call FinishEncoding()
+ // }
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ // while (get data_buf) {
+ // if (!v.EncodeChunk(data_buf, data_len, &output_string)) {
+ // HandleError();
+ // return; // No need to call FinishEncoding()
+ // }
+ // // The encoding is appended to output_string at each call,
+ // // so clear output_string once its contents have been processed.
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ // }
+ // if (!v.FinishEncoding(&output_string)) {
+ // HandleError();
+ // return;
+ // }
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ //
+ // I.e., the allowed pattern of calls is
+ // StartEncoding EncodeChunk* FinishEncoding
+ //
+ // The size of the encoded output depends on the sizes of the chunks
+ // passed in (i.e. the chunking boundary affects compression).
+ // However the decoded output is independent of chunk boundaries.
+
+ // Sets up the data structures for encoding.
+ // Writes a VCDIFF delta file header (as defined in RFC section 4.1)
+ // to *output_string.
+ //
+ // Note: we *append*, so the old contents of *output_string stick around.
+ // This convention differs from the non-streaming Encode/Decode
+ // interfaces in VCDiffEncoder.
+ //
+ // If an error occurs, this function returns false; otherwise it returns true.
+ // If this function returns false, the caller does not need to call
+ // FinishEncoding or to do any cleanup except destroying the
+ // VCDiffStreamingEncoder object.
+ template<class OutputType>
+ bool StartEncoding(OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return StartEncodingToInterface(&output_string);
+ }
+
+ bool StartEncodingToInterface(OutputStringInterface* output_string);
+
+ // Appends compressed encoding for "data" (one complete VCDIFF delta window)
+ // to *output_string.
+ // If an error occurs (for example, if StartEncoding was not called
+ // earlier or StartEncoding returned false), this function returns false;
+ // otherwise it returns true. The caller does not need to call FinishEncoding
+ // or do any cleanup except destroying the VCDiffStreamingEncoder
+ // if this function returns false.
+ template<class OutputType>
+ bool EncodeChunk(const char* data, size_t len, OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return EncodeChunkToInterface(data, len, &output_string);
+ }
+
+ bool EncodeChunkToInterface(const char* data, size_t len,
+ OutputStringInterface* output_string);
+
+ // Finishes encoding and appends any leftover encoded data to *output_string.
+ // If an error occurs (for example, if StartEncoding was not called
+ // earlier or StartEncoding returned false), this function returns false;
+ // otherwise it returns true. The caller does not need to
+ // do any cleanup except destroying the VCDiffStreamingEncoder
+ // if this function returns false.
+ template<class OutputType>
+ bool FinishEncoding(OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return FinishEncodingToInterface(&output_string);
+ }
+
+ bool FinishEncodingToInterface(OutputStringInterface* output_string);
+
+ // Replaces the contents of match_counts with a vector of integers,
+ // one for each possible match length. The value of match_counts[n]
+ // is equal to the number of matches of length n found so far
+ // for this VCDiffStreamingEncoder object.
+ void GetMatchCounts(std::vector<int>* match_counts) const;
+
+ private:
+ VCDiffStreamingEncoderImpl* const impl_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT
+ void operator=(const VCDiffStreamingEncoder&);
+};
+
+// A simpler (non-streaming) interface to the VCDIFF encoder that can be used
+// if the entire target data string is available.
+//
+class VCDiffEncoder {
+ public:
+ VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size)
+ : dictionary_(dictionary_contents, dictionary_size),
+ encoder_(NULL),
+ flags_(VCD_STANDARD_FORMAT),
+ look_for_target_matches_(true) { }
+
+ ~VCDiffEncoder() {
+ delete encoder_;
+ }
+
+ // By default, VCDiffEncoder uses standard VCDIFF format. This function
+ // can be used before calling Encode(), to specify that interleaved format
+ // and/or checksum format should be used.
+ void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; }
+
+ // By default, VCDiffEncoder looks for matches in the dictionary and also in
+ // the previously encoded target data. This function can be used before
+ // calling Encode(), to specify whether or not target matching should be
+ // enabled.
+ void SetTargetMatching(bool look_for_target_matches) {
+ look_for_target_matches_ = look_for_target_matches;
+ }
+
+ // Replaces old contents of output_string with the encoded form of
+ // target_data.
+ template<class OutputType>
+ bool Encode(const char* target_data,
+ size_t target_len,
+ OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return EncodeToInterface(target_data, target_len, &output_string);
+ }
+
+ private:
+ bool EncodeToInterface(const char* target_data,
+ size_t target_len,
+ OutputStringInterface* output_string);
+
+ HashedDictionary dictionary_;
+ VCDiffStreamingEncoder* encoder_;
+ VCDiffFormatExtensionFlags flags_;
+ bool look_for_target_matches_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ VCDiffEncoder(const VCDiffEncoder&); // NOLINT
+ void operator=(const VCDiffEncoder&);
+};
+
+} // namespace open_vcdiff
+
+#endif // OPEN_VCDIFF_VCENCODER_H_