diff options
Diffstat (limited to 'sdch/open-vcdiff/src/vcdiffengine.h')
-rw-r--r-- | sdch/open-vcdiff/src/vcdiffengine.h | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/vcdiffengine.h b/sdch/open-vcdiff/src/vcdiffengine.h new file mode 100644 index 0000000..c69fc28 --- /dev/null +++ b/sdch/open-vcdiff/src/vcdiffengine.h @@ -0,0 +1,127 @@ +// Copyright 2006 Google Inc. +// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_ +#define OPEN_VCDIFF_VCDIFFENGINE_H_ + +#include <config.h> +#include <stddef.h> // size_t +#include <stdint.h> // uint32_t + +namespace open_vcdiff { + +class BlockHash; +class OutputStringInterface; +class CodeTableWriterInterface; + +// The VCDiffEngine class is used to find the optimal encoding (in terms of COPY +// and ADD instructions) for a given dictionary and target window. To write the +// instructions for this encoding, it calls the Copy() and Add() methods of the +// code table writer object which is passed as an argument to Encode(). +class VCDiffEngine { + public: + // The minimum size of a string match that is worth putting into a COPY + // instruction. Since this value is more than twice the block size, the + // encoder will always discover a match of this size, no matter whether it is + // aligned on block boundaries in the dictionary text. + static const size_t kMinimumMatchSize = 32; + + VCDiffEngine(const char* dictionary, size_t dictionary_size); + + ~VCDiffEngine(); + + // Initializes the object before use. + // This method must be called after constructing a VCDiffEngine object, + // and before any other method may be called. It should not be called + // twice on the same object. + // Returns true if initialization succeeded, or false if an error occurred, + // in which case no other method except the destructor may then be used + // on the object. + // The Init() method is the only one allowed to treat hashed_dictionary_ + // as non-const. + bool Init(); + + size_t dictionary_size() const { return dictionary_size_; } + + // Main worker function. Finds the best matches between the dictionary + // (source) and target data, and uses the coder to write a + // delta file window into *diff. + // Because it is a const function, many threads + // can call Encode() at once for the same VCDiffEngine object. + // All thread-specific data will be stored in the coder and diff arguments. + // The coder object must have been fully initialized (by calling its Init() + // method, if any) before calling this function. + // + // look_for_target_matches determines whether to look for matches + // within the previously encoded target data, or just within the source + // (dictionary) data. Please see vcencoder.h for a full explanation + // of this parameter. + void Encode(const char* target_data, + size_t target_size, + bool look_for_target_matches, + OutputStringInterface* diff, + CodeTableWriterInterface* coder) const; + + private: + static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) { + return size >= kMinimumMatchSize; + } + + // The following two functions use templates to produce two different + // versions of the code depending on the value of the option + // look_for_target_matches. This approach saves a test-and-branch instruction + // within the inner loop of EncodeCopyForBestMatch. + template<bool look_for_target_matches> + void EncodeInternal(const char* target_data, + size_t target_size, + OutputStringInterface* diff, + CodeTableWriterInterface* coder) const; + + // If look_for_target_matches is true, then target_hash must point to a valid + // BlockHash object, and cannot be NULL. If look_for_target_matches is + // false, then the value of target_hash is ignored. + template<bool look_for_target_matches> + size_t EncodeCopyForBestMatch(uint32_t hash_value, + const char* target_candidate_start, + const char* unencoded_target_start, + size_t unencoded_target_size, + const BlockHash* target_hash, + CodeTableWriterInterface* coder) const; + + void AddUnmatchedRemainder(const char* unencoded_target_start, + size_t unencoded_target_size, + CodeTableWriterInterface* coder) const; + + void FinishEncoding(size_t target_size, + OutputStringInterface* diff, + CodeTableWriterInterface* coder) const; + + const char* dictionary_; // A copy of the dictionary contents + + const size_t dictionary_size_; + + // A hash that contains one element for every kBlockSize bytes of dictionary_. + // This can be reused to encode many different target strings using the + // same dictionary, without the need to compute the hash values each time. + const BlockHash* hashed_dictionary_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffEngine(const VCDiffEngine&); + void operator=(const VCDiffEngine&); +}; + +} // namespace open_vcdiff + +#endif // OPEN_VCDIFF_VCDIFFENGINE_H_ |