summaryrefslogtreecommitdiffstats
path: root/sdch/open-vcdiff/src/vcencoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sdch/open-vcdiff/src/vcencoder.cc')
-rw-r--r--sdch/open-vcdiff/src/vcencoder.cc229
1 files changed, 229 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/vcencoder.cc b/sdch/open-vcdiff/src/vcencoder.cc
new file mode 100644
index 0000000..7efb230
--- /dev/null
+++ b/sdch/open-vcdiff/src/vcencoder.cc
@@ -0,0 +1,229 @@
+// Copyright 2007 Google Inc.
+// Author: Lincoln Smith
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Classes to implement an Encoder for the format described in
+// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
+// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
+//
+// The RFC describes the possibility of using a secondary compressor
+// to further reduce the size of each section of the VCDIFF output.
+// That feature is not supported in this implementation of the encoder
+// and decoder.
+// No secondary compressor types have been publicly registered with
+// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
+// in the more than five years since the registry was created, so there
+// is no standard set of compressor IDs which would be generated by other
+// encoders or accepted by other decoders.
+
+#include <config.h>
+#include "google/vcencoder.h"
+#include <vector>
+#include "checksum.h"
+#include "encodetable.h"
+#include "logging.h"
+#include "google/output_string.h"
+#include "vcdiffengine.h"
+
+namespace open_vcdiff {
+
+HashedDictionary::HashedDictionary(const char* dictionary_contents,
+ size_t dictionary_size)
+ : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
+
+HashedDictionary::~HashedDictionary() { delete engine_; }
+
+bool HashedDictionary::Init() {
+ return const_cast<VCDiffEngine*>(engine_)->Init();
+}
+
+class VCDiffStreamingEncoderImpl {
+ public:
+ VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
+ VCDiffFormatExtensionFlags format_extensions,
+ bool look_for_target_matches);
+
+ // These functions are identical to their counterparts
+ // in VCDiffStreamingEncoder.
+ bool StartEncoding(OutputStringInterface* out);
+
+ bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
+
+ bool FinishEncoding(OutputStringInterface* out);
+
+ const std::vector<int>& match_counts() const {
+ return coder_.match_counts();
+ }
+
+ private:
+ // Write the header (as defined in section 4.1 of the RFC) to *output.
+ // This includes information that can be gathered
+ // before the first chunk of input is available.
+ void WriteHeader(OutputStringInterface* output) const;
+
+ const VCDiffEngine* engine_;
+
+ // This implementation of the encoder uses the default
+ // code table. A VCDiffCodeTableWriter could also be constructed
+ // using a custom code table.
+ VCDiffCodeTableWriter coder_;
+
+ const VCDiffFormatExtensionFlags format_extensions_;
+
+ // Determines whether to look for matches within the previously encoded
+ // target data, or just within the source (dictionary) data. Please see
+ // vcencoder.h for a full explanation of this parameter.
+ const bool look_for_target_matches_;
+
+ // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
+ // and FinishEncoding() are called in the correct order. It will be true
+ // if StartEncoding() has been called, followed by zero or more calls to
+ // EncodeChunk(), but FinishEncoding() has not yet been called. It will
+ // be false initially, and also after FinishEncoding() has been called.
+ bool encode_chunk_allowed_;
+
+ // Making these private avoids implicit copy constructor & assignment operator
+ VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
+ void operator=(const VCDiffStreamingEncoderImpl&);
+};
+
+inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
+ const HashedDictionary* dictionary,
+ VCDiffFormatExtensionFlags format_extensions,
+ bool look_for_target_matches)
+ : engine_(dictionary->engine()),
+ coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
+ format_extensions_(format_extensions),
+ look_for_target_matches_(look_for_target_matches),
+ encode_chunk_allowed_(false) { }
+
+inline void VCDiffStreamingEncoderImpl::WriteHeader(
+ OutputStringInterface* output) const {
+ DeltaFileHeader header_data = {
+ 0xD6, // Header1: "V" | 0x80
+ 0xC3, // Header2: "C" | 0x80
+ 0xC4, // Header3: "D" | 0x80
+ 0x00, // Header4: Draft standard format
+ 0x00 }; // Hdr_Indicator:
+ // No compression, no custom code table
+ if (format_extensions_ != VCD_STANDARD_FORMAT) {
+ header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used
+ }
+ output->append(reinterpret_cast<const char*>(&header_data),
+ sizeof(header_data));
+ // If custom cache table sizes or a custom code table were used
+ // for encoding, here is where they would be appended to *output.
+ // This implementation of the encoder does not use those features,
+ // although the decoder can understand and interpret them.
+}
+
+inline bool VCDiffStreamingEncoderImpl::StartEncoding(
+ OutputStringInterface* out) {
+ if (!coder_.Init(engine_->dictionary_size())) {
+ LOG(DFATAL) << "Internal error: "
+ "Initialization of code table writer failed" << LOG_ENDL;
+ return false;
+ }
+ WriteHeader(out);
+ encode_chunk_allowed_ = true;
+ return true;
+}
+
+inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
+ const char* data,
+ size_t len,
+ OutputStringInterface* out) {
+ if (!encode_chunk_allowed_) {
+ LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
+ return false;
+ }
+ if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
+ coder_.AddChecksum(ComputeAdler32(data, len));
+ }
+ engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
+ return true;
+}
+
+inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
+ OutputStringInterface* /*out*/) {
+ if (!encode_chunk_allowed_) {
+ LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
+ return false;
+ }
+ encode_chunk_allowed_ = false;
+ // There should not be any need to output more data
+ // since EncodeChunk() encodes a complete target window
+ // and there is no end-of-delta-file marker.
+ return true;
+}
+
+VCDiffStreamingEncoder::VCDiffStreamingEncoder(
+ const HashedDictionary* dictionary,
+ VCDiffFormatExtensionFlags format_extensions,
+ bool look_for_target_matches)
+ : impl_(new VCDiffStreamingEncoderImpl(dictionary,
+ format_extensions,
+ look_for_target_matches)) { }
+
+VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
+
+bool VCDiffStreamingEncoder::StartEncodingToInterface(
+ OutputStringInterface* out) {
+ return impl_->StartEncoding(out);
+}
+
+bool VCDiffStreamingEncoder::EncodeChunkToInterface(
+ const char* data,
+ size_t len,
+ OutputStringInterface* out) {
+ return impl_->EncodeChunk(data, len, out);
+}
+
+bool VCDiffStreamingEncoder::FinishEncodingToInterface(
+ OutputStringInterface* out) {
+ return impl_->FinishEncoding(out);
+}
+
+void VCDiffStreamingEncoder::GetMatchCounts(
+ std::vector<int>* match_counts) const {
+ if (!match_counts) {
+ LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
+ return;
+ }
+ *match_counts = impl_->match_counts();
+}
+
+bool VCDiffEncoder::EncodeToInterface(const char* target_data,
+ size_t target_len,
+ OutputStringInterface* out) {
+ out->clear();
+ if (!encoder_) {
+ if (!dictionary_.Init()) {
+ LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
+ return false;
+ }
+ encoder_ = new VCDiffStreamingEncoder(&dictionary_,
+ flags_,
+ look_for_target_matches_);
+ }
+ if (!encoder_->StartEncodingToInterface(out)) {
+ return false;
+ }
+ if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
+ return false;
+ }
+ return encoder_->FinishEncodingToInterface(out);
+}
+
+} // namespace open_vcdiff