summaryrefslogtreecommitdiffstats
path: root/sdch/open-vcdiff/src/google/vcencoder.h
diff options
context:
space:
mode:
authorPatrick Scott <phanna@android.com>2010-02-04 10:37:17 -0500
committerPatrick Scott <phanna@android.com>2010-02-04 10:39:42 -0500
commitc7f5f8508d98d5952d42ed7648c2a8f30a4da156 (patch)
treedd51dbfbf6670daa61279b3a19e7b1835b301dbf /sdch/open-vcdiff/src/google/vcencoder.h
parent139d8152182f9093f03d9089822b688e49fa7667 (diff)
downloadexternal_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.zip
external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.gz
external_chromium-c7f5f8508d98d5952d42ed7648c2a8f30a4da156.tar.bz2
Initial source checkin.
The source files were determined by building net_unittests in chromium's source tree. Some of the obvious libraries were left out (v8, gmock, gtest). The Android.mk file has all the sources (minus unittests and tools) that were used during net_unittests compilation. Nothing builds yet because of STL but that is the next task. The .cpp files will most likely not compile anyways because of the LOCAL_CPP_EXTENSION mod. I will have to break this into multiple projects to get around that limitation.
Diffstat (limited to 'sdch/open-vcdiff/src/google/vcencoder.h')
-rw-r--r--sdch/open-vcdiff/src/google/vcencoder.h298
1 files changed, 298 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/google/vcencoder.h b/sdch/open-vcdiff/src/google/vcencoder.h
new file mode 100644
index 0000000..64cdb86
--- /dev/null
+++ b/sdch/open-vcdiff/src/google/vcencoder.h
@@ -0,0 +1,298 @@
+// Copyright 2007 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef OPEN_VCDIFF_VCENCODER_H_
+#define OPEN_VCDIFF_VCENCODER_H_
+
+#include <stddef.h> // size_t
+#include <vector>
+#include "google/output_string.h"
+
+namespace open_vcdiff {
+
+class VCDiffEngine;
+class VCDiffStreamingEncoderImpl;
+
+// These flags are passed to the constructor of VCDiffStreamingEncoder
+// to determine whether certain open-vcdiff format extensions
+// (which are not part of the RFC 3284 draft standard for VCDIFF)
+// are employed.
+//
+// Because these extensions are not part of the VCDIFF standard, if
+// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller
+// must be certain that the receiver of the data will be using open-vcdiff
+// to decode the delta file, or at least that the receiver can interpret
+// these extensions. The encoder will use an 'S' as the fourth character
+// in the delta file to indicate that non-standard extensions are being used.
+//
+enum VCDiffFormatExtensionFlagValues {
+ // No extensions: the encoded format will conform to the RFC
+ // draft standard for VCDIFF.
+ VCD_STANDARD_FORMAT = 0x00,
+ // If this flag is specified, then the encoder writes each delta file
+ // window by interleaving instructions and sizes with their corresponding
+ // addresses and data, rather than placing these elements
+ // into three separate sections. This facilitates providing partially
+ // decoded results when only a portion of a delta file window is received
+ // (e.g. when HTTP over TCP is used as the transmission protocol.)
+ VCD_FORMAT_INTERLEAVED = 0x01,
+ // If this flag is specified, then an Adler32 checksum
+ // of the target window data is included in the delta window.
+ VCD_FORMAT_CHECKSUM = 0x02
+};
+
+typedef int VCDiffFormatExtensionFlags;
+
+// A HashedDictionary must be constructed from the dictionary data
+// in order to use VCDiffStreamingEncoder. If the same dictionary will
+// be used to perform several encoding operations, then the caller should
+// create the HashedDictionary once and cache it for reuse. This object
+// is thread-safe: the same const HashedDictionary can be used
+// by several threads simultaneously, each with its own VCDiffStreamingEncoder.
+//
+// dictionary_contents is copied into the HashedDictionary, so the
+// caller may free that string, if desired, after the constructor returns.
+//
+class HashedDictionary {
+ public:
+ HashedDictionary(const char* dictionary_contents,
+ size_t dictionary_size);
+ ~HashedDictionary();
+
+ // Init() must be called before using the HashedDictionary as an argument
+ // to the VCDiffStreamingEncoder, or for any other purpose except
+ // destruction. It returns true if initialization succeeded, or false
+ // if an error occurred, in which case the caller should destroy the object
+ // without using it.
+ bool Init();
+
+ const VCDiffEngine* engine() const { return engine_; }
+
+ private:
+ const VCDiffEngine* engine_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ HashedDictionary(const HashedDictionary&); // NOLINT
+ void operator=(const HashedDictionary&);
+};
+
+// The standard streaming interface to the VCDIFF (RFC 3284) encoder.
+// "Streaming" in this context means that, even though the entire set of
+// input data to be encoded may not be available at once, the encoder
+// can produce partial output based on what is available. Of course,
+// the caller should try to maximize the sizes of the data chunks passed
+// to the encoder.
+class VCDiffStreamingEncoder {
+ public:
+ // The HashedDictionary object passed to the constructor must remain valid,
+ // without being deleted, for the lifetime of the VCDiffStreamingEncoder
+ // object.
+ //
+ // format_extensions allows certain open-vcdiff extensions to the VCDIFF
+ // format to be included in the encoded output. These extensions are not
+ // part of the RFC 3284 draft standard, so specifying any extension flags
+ // will make the output compatible only with open-vcdiff, or with other
+ // VCDIFF implementations that accept these extensions. See above for an
+ // explanation of each possible flag value.
+ //
+ // *** look_for_target_matches:
+ // The VCDIFF format allows COPY instruction addresses to reference data from
+ // the source (dictionary), or from previously encoded target data.
+ //
+ // If look_for_target_matches is false, then the encoder will only
+ // produce COPY instructions that reference source data from the dictionary,
+ // never from previously encoded target data. This will speed up the encoding
+ // process, but the encoded data will not be as compact.
+ //
+ // If this value is true, then the encoder will produce COPY instructions
+ // that reference either source data or target data. A COPY instruction from
+ // the previously encoded target data may even extend into the range of the
+ // data being produced by that same COPY instruction; for example, if the
+ // previously encoded target data is "LA", then a single COPY instruction of
+ // length 10 can produce the additional target data "LALALALALA".
+ //
+ // There is a third type of COPY instruction that starts within
+ // the source data and extends from the end of the source data
+ // into the beginning of the target data. This VCDIFF encoder will never
+ // produce a COPY instruction of this third type (regardless of the value of
+ // look_for_target_matches) because the cost of checking for matches
+ // across the source-target boundary would not justify its benefits.
+ //
+ VCDiffStreamingEncoder(const HashedDictionary* dictionary,
+ VCDiffFormatExtensionFlags format_extensions,
+ bool look_for_target_matches);
+ ~VCDiffStreamingEncoder();
+
+ // The client should use these routines as follows:
+ // HashedDictionary hd(dictionary, dictionary_size);
+ // if (!hd.Init()) {
+ // HandleError();
+ // return;
+ // }
+ // string output_string;
+ // VCDiffStreamingEncoder v(hd, false, false);
+ // if (!v.StartEncoding(&output_string)) {
+ // HandleError();
+ // return; // No need to call FinishEncoding()
+ // }
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ // while (get data_buf) {
+ // if (!v.EncodeChunk(data_buf, data_len, &output_string)) {
+ // HandleError();
+ // return; // No need to call FinishEncoding()
+ // }
+ // // The encoding is appended to output_string at each call,
+ // // so clear output_string once its contents have been processed.
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ // }
+ // if (!v.FinishEncoding(&output_string)) {
+ // HandleError();
+ // return;
+ // }
+ // Process(output_string.data(), output_string.size());
+ // output_string.clear();
+ //
+ // I.e., the allowed pattern of calls is
+ // StartEncoding EncodeChunk* FinishEncoding
+ //
+ // The size of the encoded output depends on the sizes of the chunks
+ // passed in (i.e. the chunking boundary affects compression).
+ // However the decoded output is independent of chunk boundaries.
+
+ // Sets up the data structures for encoding.
+ // Writes a VCDIFF delta file header (as defined in RFC section 4.1)
+ // to *output_string.
+ //
+ // Note: we *append*, so the old contents of *output_string stick around.
+ // This convention differs from the non-streaming Encode/Decode
+ // interfaces in VCDiffEncoder.
+ //
+ // If an error occurs, this function returns false; otherwise it returns true.
+ // If this function returns false, the caller does not need to call
+ // FinishEncoding or to do any cleanup except destroying the
+ // VCDiffStreamingEncoder object.
+ template<class OutputType>
+ bool StartEncoding(OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return StartEncodingToInterface(&output_string);
+ }
+
+ bool StartEncodingToInterface(OutputStringInterface* output_string);
+
+ // Appends compressed encoding for "data" (one complete VCDIFF delta window)
+ // to *output_string.
+ // If an error occurs (for example, if StartEncoding was not called
+ // earlier or StartEncoding returned false), this function returns false;
+ // otherwise it returns true. The caller does not need to call FinishEncoding
+ // or do any cleanup except destroying the VCDiffStreamingEncoder
+ // if this function returns false.
+ template<class OutputType>
+ bool EncodeChunk(const char* data, size_t len, OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return EncodeChunkToInterface(data, len, &output_string);
+ }
+
+ bool EncodeChunkToInterface(const char* data, size_t len,
+ OutputStringInterface* output_string);
+
+ // Finishes encoding and appends any leftover encoded data to *output_string.
+ // If an error occurs (for example, if StartEncoding was not called
+ // earlier or StartEncoding returned false), this function returns false;
+ // otherwise it returns true. The caller does not need to
+ // do any cleanup except destroying the VCDiffStreamingEncoder
+ // if this function returns false.
+ template<class OutputType>
+ bool FinishEncoding(OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return FinishEncodingToInterface(&output_string);
+ }
+
+ bool FinishEncodingToInterface(OutputStringInterface* output_string);
+
+ // Replaces the contents of match_counts with a vector of integers,
+ // one for each possible match length. The value of match_counts[n]
+ // is equal to the number of matches of length n found so far
+ // for this VCDiffStreamingEncoder object.
+ void GetMatchCounts(std::vector<int>* match_counts) const;
+
+ private:
+ VCDiffStreamingEncoderImpl* const impl_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT
+ void operator=(const VCDiffStreamingEncoder&);
+};
+
+// A simpler (non-streaming) interface to the VCDIFF encoder that can be used
+// if the entire target data string is available.
+//
+class VCDiffEncoder {
+ public:
+ VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size)
+ : dictionary_(dictionary_contents, dictionary_size),
+ encoder_(NULL),
+ flags_(VCD_STANDARD_FORMAT),
+ look_for_target_matches_(true) { }
+
+ ~VCDiffEncoder() {
+ delete encoder_;
+ }
+
+ // By default, VCDiffEncoder uses standard VCDIFF format. This function
+ // can be used before calling Encode(), to specify that interleaved format
+ // and/or checksum format should be used.
+ void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; }
+
+ // By default, VCDiffEncoder looks for matches in the dictionary and also in
+ // the previously encoded target data. This function can be used before
+ // calling Encode(), to specify whether or not target matching should be
+ // enabled.
+ void SetTargetMatching(bool look_for_target_matches) {
+ look_for_target_matches_ = look_for_target_matches;
+ }
+
+ // Replaces old contents of output_string with the encoded form of
+ // target_data.
+ template<class OutputType>
+ bool Encode(const char* target_data,
+ size_t target_len,
+ OutputType* output) {
+ OutputString<OutputType> output_string(output);
+ return EncodeToInterface(target_data, target_len, &output_string);
+ }
+
+ private:
+ bool EncodeToInterface(const char* target_data,
+ size_t target_len,
+ OutputStringInterface* output_string);
+
+ HashedDictionary dictionary_;
+ VCDiffStreamingEncoder* encoder_;
+ VCDiffFormatExtensionFlags flags_;
+ bool look_for_target_matches_;
+
+ // Make the copy constructor and assignment operator private
+ // so that they don't inadvertently get used.
+ VCDiffEncoder(const VCDiffEncoder&); // NOLINT
+ void operator=(const VCDiffEncoder&);
+};
+
+} // namespace open_vcdiff
+
+#endif // OPEN_VCDIFF_VCENCODER_H_