diff options
Diffstat (limited to 'sdch/open-vcdiff/src/vcencoder.cc')
-rw-r--r-- | sdch/open-vcdiff/src/vcencoder.cc | 229 |
1 files changed, 229 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/vcencoder.cc b/sdch/open-vcdiff/src/vcencoder.cc new file mode 100644 index 0000000..7efb230 --- /dev/null +++ b/sdch/open-vcdiff/src/vcencoder.cc @@ -0,0 +1,229 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Classes to implement an Encoder for the format described in +// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// +// The RFC describes the possibility of using a secondary compressor +// to further reduce the size of each section of the VCDIFF output. +// That feature is not supported in this implementation of the encoder +// and decoder. +// No secondary compressor types have been publicly registered with +// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids +// in the more than five years since the registry was created, so there +// is no standard set of compressor IDs which would be generated by other +// encoders or accepted by other decoders. + +#include <config.h> +#include "google/vcencoder.h" +#include <vector> +#include "checksum.h" +#include "encodetable.h" +#include "logging.h" +#include "google/output_string.h" +#include "vcdiffengine.h" + +namespace open_vcdiff { + +HashedDictionary::HashedDictionary(const char* dictionary_contents, + size_t dictionary_size) + : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } + +HashedDictionary::~HashedDictionary() { delete engine_; } + +bool HashedDictionary::Init() { + return const_cast<VCDiffEngine*>(engine_)->Init(); +} + +class VCDiffStreamingEncoderImpl { + public: + VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches); + + // These functions are identical to their counterparts + // in VCDiffStreamingEncoder. + bool StartEncoding(OutputStringInterface* out); + + bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); + + bool FinishEncoding(OutputStringInterface* out); + + const std::vector<int>& match_counts() const { + return coder_.match_counts(); + } + + private: + // Write the header (as defined in section 4.1 of the RFC) to *output. + // This includes information that can be gathered + // before the first chunk of input is available. + void WriteHeader(OutputStringInterface* output) const; + + const VCDiffEngine* engine_; + + // This implementation of the encoder uses the default + // code table. A VCDiffCodeTableWriter could also be constructed + // using a custom code table. + VCDiffCodeTableWriter coder_; + + const VCDiffFormatExtensionFlags format_extensions_; + + // Determines whether to look for matches within the previously encoded + // target data, or just within the source (dictionary) data. Please see + // vcencoder.h for a full explanation of this parameter. + const bool look_for_target_matches_; + + // This state variable is used to ensure that StartEncoding(), EncodeChunk(), + // and FinishEncoding() are called in the correct order. It will be true + // if StartEncoding() has been called, followed by zero or more calls to + // EncodeChunk(), but FinishEncoding() has not yet been called. It will + // be false initially, and also after FinishEncoding() has been called. + bool encode_chunk_allowed_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT + void operator=(const VCDiffStreamingEncoderImpl&); +}; + +inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( + const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches) + : engine_(dictionary->engine()), + coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0), + format_extensions_(format_extensions), + look_for_target_matches_(look_for_target_matches), + encode_chunk_allowed_(false) { } + +inline void VCDiffStreamingEncoderImpl::WriteHeader( + OutputStringInterface* output) const { + DeltaFileHeader header_data = { + 0xD6, // Header1: "V" | 0x80 + 0xC3, // Header2: "C" | 0x80 + 0xC4, // Header3: "D" | 0x80 + 0x00, // Header4: Draft standard format + 0x00 }; // Hdr_Indicator: + // No compression, no custom code table + if (format_extensions_ != VCD_STANDARD_FORMAT) { + header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used + } + output->append(reinterpret_cast<const char*>(&header_data), + sizeof(header_data)); + // If custom cache table sizes or a custom code table were used + // for encoding, here is where they would be appended to *output. + // This implementation of the encoder does not use those features, + // although the decoder can understand and interpret them. +} + +inline bool VCDiffStreamingEncoderImpl::StartEncoding( + OutputStringInterface* out) { + if (!coder_.Init(engine_->dictionary_size())) { + LOG(DFATAL) << "Internal error: " + "Initialization of code table writer failed" << LOG_ENDL; + return false; + } + WriteHeader(out); + encode_chunk_allowed_ = true; + return true; +} + +inline bool VCDiffStreamingEncoderImpl::EncodeChunk( + const char* data, + size_t len, + OutputStringInterface* out) { + if (!encode_chunk_allowed_) { + LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL; + return false; + } + if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { + coder_.AddChecksum(ComputeAdler32(data, len)); + } + engine_->Encode(data, len, look_for_target_matches_, out, &coder_); + return true; +} + +inline bool VCDiffStreamingEncoderImpl::FinishEncoding( + OutputStringInterface* /*out*/) { + if (!encode_chunk_allowed_) { + LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL; + return false; + } + encode_chunk_allowed_ = false; + // There should not be any need to output more data + // since EncodeChunk() encodes a complete target window + // and there is no end-of-delta-file marker. + return true; +} + +VCDiffStreamingEncoder::VCDiffStreamingEncoder( + const HashedDictionary* dictionary, + VCDiffFormatExtensionFlags format_extensions, + bool look_for_target_matches) + : impl_(new VCDiffStreamingEncoderImpl(dictionary, + format_extensions, + look_for_target_matches)) { } + +VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } + +bool VCDiffStreamingEncoder::StartEncodingToInterface( + OutputStringInterface* out) { + return impl_->StartEncoding(out); +} + +bool VCDiffStreamingEncoder::EncodeChunkToInterface( + const char* data, + size_t len, + OutputStringInterface* out) { + return impl_->EncodeChunk(data, len, out); +} + +bool VCDiffStreamingEncoder::FinishEncodingToInterface( + OutputStringInterface* out) { + return impl_->FinishEncoding(out); +} + +void VCDiffStreamingEncoder::GetMatchCounts( + std::vector<int>* match_counts) const { + if (!match_counts) { + LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL; + return; + } + *match_counts = impl_->match_counts(); +} + +bool VCDiffEncoder::EncodeToInterface(const char* target_data, + size_t target_len, + OutputStringInterface* out) { + out->clear(); + if (!encoder_) { + if (!dictionary_.Init()) { + LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL; + return false; + } + encoder_ = new VCDiffStreamingEncoder(&dictionary_, + flags_, + look_for_target_matches_); + } + if (!encoder_->StartEncodingToInterface(out)) { + return false; + } + if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { + return false; + } + return encoder_->FinishEncodingToInterface(out); +} + +} // namespace open_vcdiff |