diff options
Diffstat (limited to 'sdch/open-vcdiff/src/google/vcencoder.h')
-rw-r--r-- | sdch/open-vcdiff/src/google/vcencoder.h | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/google/vcencoder.h b/sdch/open-vcdiff/src/google/vcencoder.h new file mode 100644 index 0000000..64cdb86 --- /dev/null +++ b/sdch/open-vcdiff/src/google/vcencoder.h @@ -0,0 +1,298 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCENCODER_H_ +#define OPEN_VCDIFF_VCENCODER_H_ + +#include <stddef.h> // size_t +#include <vector> +#include "google/output_string.h" + +namespace open_vcdiff { + +class VCDiffEngine; +class VCDiffStreamingEncoderImpl; + +// These flags are passed to the constructor of VCDiffStreamingEncoder +// to determine whether certain open-vcdiff format extensions +// (which are not part of the RFC 3284 draft standard for VCDIFF) +// are employed. +// +// Because these extensions are not part of the VCDIFF standard, if +// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller +// must be certain that the receiver of the data will be using open-vcdiff +// to decode the delta file, or at least that the receiver can interpret +// these extensions. The encoder will use an 'S' as the fourth character +// in the delta file to indicate that non-standard extensions are being used. +// +enum VCDiffFormatExtensionFlagValues { + // No extensions: the encoded format will conform to the RFC + // draft standard for VCDIFF. + VCD_STANDARD_FORMAT = 0x00, + // If this flag is specified, then the encoder writes each delta file + // window by interleaving instructions and sizes with their corresponding + // addresses and data, rather than placing these elements + // into three separate sections. This facilitates providing partially + // decoded results when only a portion of a delta file window is received + // (e.g. when HTTP over TCP is used as the transmission protocol.) + VCD_FORMAT_INTERLEAVED = 0x01, + // If this flag is specified, then an Adler32 checksum + // of the target window data is included in the delta window. + VCD_FORMAT_CHECKSUM = 0x02 +}; + +typedef int VCDiffFormatExtensionFlags; + +// A HashedDictionary must be constructed from the dictionary data +// in order to use VCDiffStreamingEncoder. If the same dictionary will +// be used to perform several encoding operations, then the caller should +// create the HashedDictionary once and cache it for reuse. This object +// is thread-safe: the same const HashedDictionary can be used +// by several threads simultaneously, each with its own VCDiffStreamingEncoder. +// +// dictionary_contents is copied into the HashedDictionary, so the +// caller may free that string, if desired, after the constructor returns. +// +class HashedDictionary { + public: + HashedDictionary(const char* dictionary_contents, + size_t dictionary_size); + ~HashedDictionary(); + + // Init() must be called before using the HashedDictionary as an argument + // to the VCDiffStreamingEncoder, or for any other purpose except + // destruction. It returns true if initialization succeeded, or false + // if an error occurred, in which case the caller should destroy the object + // without using it. + bool Init(); + + const VCDiffEngine* engine() const { return engine_; } + + private: + const VCDiffEngine* engine_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + HashedDictionary(const HashedDictionary&); // NOLINT + void operator=(const HashedDictionary&); +}; + +// The standard streaming interface to the VCDIFF (RFC 3284) encoder. +// "Streaming" in this context means that, even though the entire set of +// input data to be encoded may not be available at once, the encoder +// can produce partial output based on what is available. Of course, +// the caller should try to maximize the sizes of the data chunks passed +// to the encoder. +class VCDiffStreamingEncoder { + public: + // The HashedDictionary object passed to the constructor must remain valid, + // without being deleted, for the lifetime of the VCDiffStreamingEncoder + // object. + // + // format_extensions allows certain open-vcdiff extensions to the VCDIFF + // format to be included in the encoded output. These extensions are not + // part of the RFC 3284 draft standard, so specifying any extension flags + // will make the output compatible only with open-vcdiff, or with other + // VCDIFF implementations that accept these extensions. See above for an + // explanation of each possible flag value. + // + // *** look_for_target_matches: + // The VCDIFF format allows COPY instruction addresses to reference data from + // the source (dictionary), or from previously encoded target data. + // + // If look_for_target_matches is false, then the encoder will only + // produce COPY instructions that reference source data from the dictionary, + // never from previously encoded target data. This will speed up the encoding + // process, but the encoded data will not be as compact. + // + // If this value is true, then the encoder will produce COPY instructions + // that reference either source data or target data. A COPY instruction from + // the previously encoded target data may even extend into the range of the + // data being produced by that same COPY instruction; for example, if the + // previously encoded target data is "LA", then a single COPY instruction of + // length 10 can produce the additional target data "LALALALALA". + // + // There is a third type of COPY instruction that starts within + // the source data and extends from the end of the source data + // into the beginning of the target data. This VCDIFF encoder will never + // produce a COPY instruction of this third type (regardless of the value of + // look_for_target_matches) because the cost of checking for matches + // across the source-target boundary would not justify its benefits. + // + VCDiffStreamingEncoder(const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches); + ~VCDiffStreamingEncoder(); + + // The client should use these routines as follows: + // HashedDictionary hd(dictionary, dictionary_size); + // if (!hd.Init()) { + // HandleError(); + // return; + // } + // string output_string; + // VCDiffStreamingEncoder v(hd, false, false); + // if (!v.StartEncoding(&output_string)) { + // HandleError(); + // return; // No need to call FinishEncoding() + // } + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // while (get data_buf) { + // if (!v.EncodeChunk(data_buf, data_len, &output_string)) { + // HandleError(); + // return; // No need to call FinishEncoding() + // } + // // The encoding is appended to output_string at each call, + // // so clear output_string once its contents have been processed. + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // } + // if (!v.FinishEncoding(&output_string)) { + // HandleError(); + // return; + // } + // Process(output_string.data(), output_string.size()); + // output_string.clear(); + // + // I.e., the allowed pattern of calls is + // StartEncoding EncodeChunk* FinishEncoding + // + // The size of the encoded output depends on the sizes of the chunks + // passed in (i.e. the chunking boundary affects compression). + // However the decoded output is independent of chunk boundaries. + + // Sets up the data structures for encoding. + // Writes a VCDIFF delta file header (as defined in RFC section 4.1) + // to *output_string. + // + // Note: we *append*, so the old contents of *output_string stick around. + // This convention differs from the non-streaming Encode/Decode + // interfaces in VCDiffEncoder. + // + // If an error occurs, this function returns false; otherwise it returns true. + // If this function returns false, the caller does not need to call + // FinishEncoding or to do any cleanup except destroying the + // VCDiffStreamingEncoder object. + template<class OutputType> + bool StartEncoding(OutputType* output) { + OutputString<OutputType> output_string(output); + return StartEncodingToInterface(&output_string); + } + + bool StartEncodingToInterface(OutputStringInterface* output_string); + + // Appends compressed encoding for "data" (one complete VCDIFF delta window) + // to *output_string. + // If an error occurs (for example, if StartEncoding was not called + // earlier or StartEncoding returned false), this function returns false; + // otherwise it returns true. The caller does not need to call FinishEncoding + // or do any cleanup except destroying the VCDiffStreamingEncoder + // if this function returns false. + template<class OutputType> + bool EncodeChunk(const char* data, size_t len, OutputType* output) { + OutputString<OutputType> output_string(output); + return EncodeChunkToInterface(data, len, &output_string); + } + + bool EncodeChunkToInterface(const char* data, size_t len, + OutputStringInterface* output_string); + + // Finishes encoding and appends any leftover encoded data to *output_string. + // If an error occurs (for example, if StartEncoding was not called + // earlier or StartEncoding returned false), this function returns false; + // otherwise it returns true. The caller does not need to + // do any cleanup except destroying the VCDiffStreamingEncoder + // if this function returns false. + template<class OutputType> + bool FinishEncoding(OutputType* output) { + OutputString<OutputType> output_string(output); + return FinishEncodingToInterface(&output_string); + } + + bool FinishEncodingToInterface(OutputStringInterface* output_string); + + // Replaces the contents of match_counts with a vector of integers, + // one for each possible match length. The value of match_counts[n] + // is equal to the number of matches of length n found so far + // for this VCDiffStreamingEncoder object. + void GetMatchCounts(std::vector<int>* match_counts) const; + + private: + VCDiffStreamingEncoderImpl* const impl_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT + void operator=(const VCDiffStreamingEncoder&); +}; + +// A simpler (non-streaming) interface to the VCDIFF encoder that can be used +// if the entire target data string is available. +// +class VCDiffEncoder { + public: + VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size) + : dictionary_(dictionary_contents, dictionary_size), + encoder_(NULL), + flags_(VCD_STANDARD_FORMAT), + look_for_target_matches_(true) { } + + ~VCDiffEncoder() { + delete encoder_; + } + + // By default, VCDiffEncoder uses standard VCDIFF format. This function + // can be used before calling Encode(), to specify that interleaved format + // and/or checksum format should be used. + void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; } + + // By default, VCDiffEncoder looks for matches in the dictionary and also in + // the previously encoded target data. This function can be used before + // calling Encode(), to specify whether or not target matching should be + // enabled. + void SetTargetMatching(bool look_for_target_matches) { + look_for_target_matches_ = look_for_target_matches; + } + + // Replaces old contents of output_string with the encoded form of + // target_data. + template<class OutputType> + bool Encode(const char* target_data, + size_t target_len, + OutputType* output) { + OutputString<OutputType> output_string(output); + return EncodeToInterface(target_data, target_len, &output_string); + } + + private: + bool EncodeToInterface(const char* target_data, + size_t target_len, + OutputStringInterface* output_string); + + HashedDictionary dictionary_; + VCDiffStreamingEncoder* encoder_; + VCDiffFormatExtensionFlags flags_; + bool look_for_target_matches_; + + // Make the copy constructor and assignment operator private + // so that they don't inadvertently get used. + VCDiffEncoder(const VCDiffEncoder&); // NOLINT + void operator=(const VCDiffEncoder&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCENCODER_H_ |