diff options
Diffstat (limited to 'sdch/open-vcdiff/src/vcdiff_main.cc')
-rw-r--r-- | sdch/open-vcdiff/src/vcdiff_main.cc | 652 |
1 files changed, 652 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/vcdiff_main.cc b/sdch/open-vcdiff/src/vcdiff_main.cc new file mode 100644 index 0000000..11e7304 --- /dev/null +++ b/sdch/open-vcdiff/src/vcdiff_main.cc @@ -0,0 +1,652 @@ +// Copyright 2008 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A command-line interface to the open-vcdiff library. + +#include <config.h> +#include <assert.h> +#include <errno.h> +#ifdef WIN32 +#include <fcntl.h> +#include <io.h> +#endif // WIN32 +#include <stdio.h> +#include <string.h> // strerror +#include <iostream> +#include <memory> +#include <string> +#include <vector> +#include "gflags/gflags.h" +#include "google/vcdecoder.h" +#include "google/vcencoder.h" + +#ifndef HAS_GLOBAL_STRING +using std::string; +#endif // !HAS_GLOBAL_STRING +using google::GetCommandLineFlagInfoOrDie; +using google::ShowUsageWithFlagsRestrict; + +static const size_t kDefaultMaxTargetSize = 1 << 26; // 64 MB + +// Definitions of command-line flags +DEFINE_string(dictionary, "", + "File containing dictionary data (required)"); +DEFINE_string(target, "", + "Target file (default is stdin for encode, stdout for decode"); +DEFINE_string(delta, "", + "Encoded delta file (default is stdout for encode, " + "stdin for decode"); +// --buffersize is the maximum allowable size of a target window. +// This value may be increased if there is sufficient memory available. +DEFINE_uint64(buffersize, 1 << 20, // 1 MB + "Buffer size for reading input file"); +DEFINE_bool(allow_vcd_target, true, + "If false, the decoder issues an error when the VCD_TARGET flag " + "is encountered"); +DEFINE_bool(checksum, false, + "Include an Adler32 checksum of the target data when encoding"); +DEFINE_bool(interleaved, false, "Use interleaved format"); +DEFINE_bool(stats, false, "Report compression percentage"); +DEFINE_bool(target_matches, false, "Find duplicate strings in target data" + " as well as dictionary data"); +DEFINE_uint64(max_target_file_size, kDefaultMaxTargetSize, + "Maximum target file size allowed by decoder"); +DEFINE_uint64(max_target_window_size, kDefaultMaxTargetSize, + "Maximum target window size allowed by decoder"); + +static const char* const kUsageString = + " {encode | delta | decode | patch }[ <options> ]\n" + "encode or delta: create delta file from dictionary and target file\n" + "decode or patch: reconstruct target file from dictionary and delta file"; + +namespace open_vcdiff { + +class VCDiffFileBasedCoder { + public: + VCDiffFileBasedCoder(); + ~VCDiffFileBasedCoder(); + + // Once the command-line arguments have been parsed, these functions + // will use the supplied options to carry out a file-based encode + // or decode operation. + bool Encode(); + bool Decode(); + bool DecodeAndCompare(); // for "vcdiff test"; compare target with original + + private: + // Determines the size of the file. The given file must be an input file + // opened for reading only, not an input stream such as stdin. The function + // returns true and populates file_size if successful; otherwise, it returns + // false. + static bool FileSize(FILE* file, size_t* file_size); + + // Opens a file for incremental reading. file_name is the name of the file + // to be opened. file_type should be a descriptive name (like "target") for + // use in log messages. If successful, returns true and sets *file to a + // valid input file, *buffer to a region of memory allocated using malloc() + // (so the caller must release it using free()), and buffer_size to the size + // of the buffer, which will not be larger than the size of the file, and + // will not be smaller than the --buffersize option. If the function fails, + // it outputs a log message and returns false. + bool OpenFileForReading(const string& file_name, + const char* file_type, + FILE** file, + std::vector<char>* buffer); + + // Opens the dictionary file and reads it into a newly allocated buffer. + // If successful, returns true and populates dictionary_ with the dictionary + // contents; otherwise, returns false. + bool OpenDictionary(); + + // Opens the input file (the delta or target file) for reading. + // Allocates space for the input buffer. If successful, + // input_file_ will be valid and input_buffer_ will be allocated. + bool OpenInputFile() { + return OpenFileForReading(input_file_name_, + input_file_type_, + &input_file_, + &input_buffer_); + } + + // Opens the output file (the target or delta file) for writing. + // If successful, output_file_ will be valid. + bool OpenOutputFile(); + + // Opens the output file (the target file) for comparison against the decoded + // output when using "vcdiff test". + bool OpenOutputFileForCompare() { + return OpenFileForReading(output_file_name_, + output_file_type_, + &output_file_, + &compare_buffer_); + } + + // Reads as much input data as possible from the input file + // into input_buffer_. If successful, returns true and sets *bytes_read + // to the number of bytes read into input_buffer_. If an error occurs, + // writes an error log message and returns false. + bool ReadInput(size_t* bytes_read); + + // Writes the contents of output to output_file_. If successful, returns + // true. If an error occurs, writes an error log message and returns false. + bool WriteOutput(const string& output); + + // Reads a number of bytes from output_file_ equal to the size of output, + // and compares to make sure they match the contents of output. If the bytes + // do not match, or if end of file is reached before the expected number of + // bytes have been read, or a read error occurs, the function returns false; + // otherwise, returns true. + bool CompareOutput(const string& output); + + // Dictionary contents. The entire dictionary file will be read into memory. + std::vector<char> dictionary_; + + std::auto_ptr<open_vcdiff::HashedDictionary> hashed_dictionary_; + + // These should be set to either "delta" or "target". They are only + // used in log messages such as "Error opening delta file..." + const char* input_file_type_; + const char* output_file_type_; + + // The filenames used for input and output. Will be empty if stdin + // or stdout is being used. + string input_file_name_; + string output_file_name_; + + // stdio-style file handles for the input and output files and the dictionary. + // When encoding, input_file_ is the target file and output_file_ is the delta + // file; when decoding, the reverse is true. The dictionary is always read + // from a file rather than from standard input. + FILE* input_file_; + FILE* output_file_; + + // A memory buffer used to load the input file into memory. If the input + // comes from stdin because no input file was specified, then the size of + // input_buffer_ will be the value specified by the --buffersize option. + // If the input comes from a file, then the buffer will be allocated to match + // the file size, if possible. However, the buffer will not exceed + // --buffersize bytes in length. + std::vector<char> input_buffer_; + + // A memory buffer used to load the output file into memory for comparison + // if "vcdiff test" is specified. + std::vector<char> compare_buffer_; + + // Making these private avoids implicit copy constructor & assignment operator + VCDiffFileBasedCoder(const VCDiffFileBasedCoder&); // NOLINT + void operator=(const VCDiffFileBasedCoder&); +}; + +inline VCDiffFileBasedCoder::VCDiffFileBasedCoder() + : input_file_type_(""), + output_file_type_(""), + input_file_(NULL), + output_file_(NULL) { } + +VCDiffFileBasedCoder::~VCDiffFileBasedCoder() { + if (input_file_ && (input_file_ != stdin)) { + fclose(input_file_); + input_file_ = NULL; + } + if (output_file_ && (output_file_ != stdout)) { + fclose(output_file_); + output_file_ = NULL; + } +} + +bool VCDiffFileBasedCoder::FileSize(FILE* file, size_t* file_size) { + long initial_position = ftell(file); + if (fseek(file, 0, SEEK_END) != 0) { + return false; + } + *file_size = static_cast<size_t>(ftell(file)); + if (fseek(file, initial_position, SEEK_SET) != 0) { + return false; + } + return true; +} + +bool VCDiffFileBasedCoder::OpenDictionary() { + assert(dictionary_.empty()); + assert(!FLAGS_dictionary.empty()); + FILE* dictionary_file = fopen(FLAGS_dictionary.c_str(), "rb"); + if (!dictionary_file) { + std::cerr << "Error opening dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << std::endl; + return false; + } + size_t dictionary_size = 0U; + if (!FileSize(dictionary_file, &dictionary_size)) { + std::cerr << "Error finding size of dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << std::endl; + return false; + } + dictionary_.resize(dictionary_size); + if (dictionary_size > 0) { + if (fread(&dictionary_[0], 1, dictionary_size, dictionary_file) + != dictionary_size) { + std::cerr << "Unable to read dictionary file '" << FLAGS_dictionary + << "': " << strerror(errno) << std::endl; + fclose(dictionary_file); + dictionary_.clear(); + return false; + } + } + fclose(dictionary_file); + return true; +} + +bool VCDiffFileBasedCoder::OpenFileForReading(const string& file_name, + const char* file_type, + FILE** file, + std::vector<char>* buffer) { + assert(buffer->empty()); + size_t buffer_size = 0U; + if (!*file && file_name.empty()) { +#ifdef WIN32 + _setmode(_fileno(stdin), _O_BINARY); +#endif + *file = stdin; + buffer_size = static_cast<size_t>(FLAGS_buffersize); + } else { + if (!*file) { + *file = fopen(file_name.c_str(), "rb"); + if (!*file) { + std::cerr << "Error opening " << file_type << " file '" + << file_name << "': " << strerror(errno) << std::endl; + return false; + } + } + size_t file_size = 0U; + if (!FileSize(*file, &file_size)) { + std::cerr << "Error finding size of " << file_type << " file '" + << file_name << "': " << strerror(errno) << std::endl; + return false; + } + buffer_size = static_cast<size_t>(FLAGS_buffersize); + if (file_size < buffer_size) { + // Allocate just enough memory to store the entire file + buffer_size = file_size; + } + } + buffer->resize(buffer_size); + return true; +} + +// Opens the output file for streamed read operations using the +// standard C I/O library, i.e., fopen(), fwrite(), fclose(). +// No output buffer is allocated because the encoded/decoded output +// is constructed progressively using a std::string object +// whose buffer is resized as needed. +bool VCDiffFileBasedCoder::OpenOutputFile() { + if (output_file_name_.empty()) { +#ifdef WIN32 + _setmode(_fileno(stdout), _O_BINARY); +#endif + output_file_ = stdout; + } else { + output_file_ = fopen(output_file_name_.c_str(), "wb"); + if (!output_file_) { + std::cerr << "Error opening " << output_file_type_ << " file '" + << output_file_name_ + << "': " << strerror(errno) << std::endl; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::ReadInput(size_t* bytes_read) { + // Read from file or stdin + *bytes_read = fread(&input_buffer_[0], 1, input_buffer_.size(), input_file_); + if (ferror(input_file_)) { + std::cerr << "Error reading from " << input_file_type_ << " file '" + << input_file_name_ + << "': " << strerror(errno) << std::endl; + return false; + } + return true; +} + +bool VCDiffFileBasedCoder::WriteOutput(const string& output) { + if (!output.empty()) { + // Some new output has been generated and is ready to be written + // to the output file or to stdout. + fwrite(output.data(), 1, output.size(), output_file_); + if (ferror(output_file_)) { + std::cerr << "Error writing " << output.size() << " bytes to " + << output_file_type_ << " file '" << output_file_name_ + << "': " << strerror(errno) << std::endl; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::CompareOutput(const string& output) { + if (!output.empty()) { + size_t output_size = output.size(); + // Some new output has been generated and is ready to be compared against + // the output file. + if (output_size > compare_buffer_.size()) { + compare_buffer_.resize(output_size); + } + size_t bytes_read = fread(&compare_buffer_[0], + 1, + output_size, + output_file_); + if (ferror(output_file_)) { + std::cerr << "Error reading from " << output_file_type_ << " file '" + << output_file_name_ << "': " << strerror(errno) << std::endl; + return false; + } + if (bytes_read < output_size) { + std::cerr << "Decoded target is longer than original target file" + << std::endl; + return false; + } + if (output.compare(0, output_size, &compare_buffer_[0], bytes_read) != 0) { + std::cerr << "Original target file does not match decoded target" + << std::endl; + return false; + } + } + return true; +} + +bool VCDiffFileBasedCoder::Encode() { + input_file_type_ = "target"; + input_file_name_ = FLAGS_target; + output_file_type_ = "delta"; + output_file_name_ = FLAGS_delta; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) { + return false; + } + // Issue 6: Visual Studio STL produces a runtime exception + // if &dictionary_[0] is attempted for an empty dictionary. + if (dictionary_.empty()) { + hashed_dictionary_.reset(new open_vcdiff::HashedDictionary("", 0)); + } else { + hashed_dictionary_.reset( + new open_vcdiff::HashedDictionary(&dictionary_[0], + dictionary_.size())); + } + if (!hashed_dictionary_->Init()) { + std::cerr << "Error initializing hashed dictionary" << std::endl; + return false; + } + VCDiffFormatExtensionFlags format_flags = open_vcdiff::VCD_STANDARD_FORMAT; + if (FLAGS_interleaved) { + format_flags |= open_vcdiff::VCD_FORMAT_INTERLEAVED; + } + if (FLAGS_checksum) { + format_flags |= open_vcdiff::VCD_FORMAT_CHECKSUM; + } + open_vcdiff::VCDiffStreamingEncoder encoder(hashed_dictionary_.get(), + format_flags, + FLAGS_target_matches); + string output; + size_t input_size = 0; + size_t output_size = 0; + { + if (!encoder.StartEncoding(&output)) { + std::cerr << "Error during encoder initialization" << std::endl; + return false; + } + } + do { + size_t bytes_read = 0; + if (!WriteOutput(output) || !ReadInput(&bytes_read)) { + return false; + } + output_size += output.size(); + output.clear(); + if (bytes_read > 0) { + input_size += bytes_read; + if (!encoder.EncodeChunk(&input_buffer_[0], bytes_read, &output)) { + std::cerr << "Error trying to encode data chunk of length " + << bytes_read << std::endl; + return false; + } + } + } while (!feof(input_file_)); + encoder.FinishEncoding(&output); + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (FLAGS_stats && (input_size > 0)) { + std::cerr << "Original size: " << input_size + << "\tCompressed size: " << output_size << " (" + << ((static_cast<double>(output_size) / input_size) * 100) + << "% of original)" << std::endl; + } + return true; +} + +bool VCDiffFileBasedCoder::Decode() { + input_file_type_ = "delta"; + input_file_name_ = FLAGS_delta; + output_file_type_ = "target"; + output_file_name_ = FLAGS_target; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFile()) { + return false; + } + + open_vcdiff::VCDiffStreamingDecoder decoder; + decoder.SetMaximumTargetFileSize( + static_cast<size_t>(FLAGS_max_target_file_size)); + decoder.SetMaximumTargetWindowSize( + static_cast<size_t>(FLAGS_max_target_window_size)); + decoder.SetAllowVcdTarget(FLAGS_allow_vcd_target); + string output; + size_t input_size = 0; + size_t output_size = 0; + // Issue 6: Visual Studio STL produces a runtime exception + // if &dictionary_[0] is attempted for an empty dictionary. + if (dictionary_.empty()) { + decoder.StartDecoding("", 0); + } else { + decoder.StartDecoding(&dictionary_[0], dictionary_.size()); + } + + do { + size_t bytes_read = 0; + if (!ReadInput(&bytes_read)) { + return false; + } + if (bytes_read > 0) { + input_size += bytes_read; + if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) { + std::cerr << "Error trying to decode data chunk of length " + << bytes_read << std::endl; + return false; + } + } + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + } while (!feof(input_file_)); + if (!decoder.FinishDecoding()) { + std::cerr << "Decode error; '" << FLAGS_delta + << " may not be a valid VCDIFF delta file" << std::endl; + return false; + } + if (!WriteOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (FLAGS_stats && (output_size > 0)) { + std::cerr << "Decompressed size: " << output_size + << "\tCompressed size: " << input_size << " (" + << ((static_cast<double>(input_size) / output_size) * 100) + << "% of original)" << std::endl; + } + return true; +} + +bool VCDiffFileBasedCoder::DecodeAndCompare() { + input_file_type_ = "delta"; + input_file_name_ = FLAGS_delta; + output_file_type_ = "target"; + output_file_name_ = FLAGS_target; + if (!OpenDictionary() || !OpenInputFile() || !OpenOutputFileForCompare()) { + return false; + } + + open_vcdiff::VCDiffStreamingDecoder decoder; + decoder.SetMaximumTargetFileSize( + static_cast<size_t>(FLAGS_max_target_file_size)); + decoder.SetMaximumTargetWindowSize( + static_cast<size_t>(FLAGS_max_target_window_size)); + decoder.SetAllowVcdTarget(FLAGS_allow_vcd_target); + string output; + size_t input_size = 0; + size_t output_size = 0; + // Issue 6: Visual Studio STL produces a runtime exception + // if &dictionary_[0] is attempted for an empty dictionary. + if (dictionary_.empty()) { + decoder.StartDecoding("", 0); + } else { + decoder.StartDecoding(&dictionary_[0], dictionary_.size()); + } + + do { + size_t bytes_read = 0; + if (!ReadInput(&bytes_read)) { + return false; + } + if (bytes_read > 0) { + input_size += bytes_read; + if (!decoder.DecodeChunk(&input_buffer_[0], bytes_read, &output)) { + std::cerr << "Error trying to decode data chunk of length " + << bytes_read << std::endl; + return false; + } + } + if (!CompareOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + } while (!feof(input_file_)); + if (!decoder.FinishDecoding()) { + std::cerr << "Decode error; '" << FLAGS_delta + << " may not be a valid VCDIFF delta file" << std::endl; + return false; + } + if (!CompareOutput(output)) { + return false; + } + output_size += output.size(); + output.clear(); + if (fgetc(output_file_) != EOF) { + std::cerr << "Decoded target is shorter than original target file" + << std::endl; + return false; + } + if (ferror(output_file_)) { + std::cerr << "Error reading end-of-file indicator from target file" + << std::endl; + return false; + } + if (FLAGS_stats && (output_size > 0)) { + std::cerr << "Decompressed size: " << output_size + << "\tCompressed size: " << input_size << " (" + << ((static_cast<double>(input_size) / output_size) * 100) + << "% of original)" << std::endl; + } + return true; +} + +} // namespace open_vcdiff + +int main(int argc, char** argv) { + const char* const command_name = argv[0]; + google::SetUsageMessage(kUsageString); + google::ParseCommandLineFlags(&argc, &argv, true); + if (argc != 2) { + std::cerr << command_name << ": Must specify exactly one command option" + << std::endl; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + const char* const command_option = argv[1]; + if (FLAGS_dictionary.empty()) { + std::cerr << command_name << " " << command_option + << ": Must specify --dictionary <file-name>" << std::endl; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + if (!GetCommandLineFlagInfoOrDie("buffersize").is_default && + (FLAGS_buffersize == 0)) { + std::cerr << command_name << ": Option --buffersize cannot be 0" + << std::endl; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + if ((strcmp(command_option, "encode") == 0) || + (strcmp(command_option, "delta") == 0)) { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Encode()) { + return 1; + } + // The destructor for VCDiffFileBasedCoder will clean up the open files + // and allocated memory. + } else if ((strcmp(command_option, "decode") == 0) || + (strcmp(command_option, "patch") == 0)) { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Decode()) { + return 1; + } + } else if ((strcmp(command_option, "test") == 0)) { + // "vcdiff test" does not appear in the usage string, but can be + // used for debugging. It encodes, then decodes, then compares the result + // with the original target. It expects the same arguments as + // "vcdiff encode", with the additional requirement that the --target + // and --delta file arguments must be specified, rather than using stdin + // or stdout. It produces a delta file just as for "vcdiff encode". + if (FLAGS_target.empty() || FLAGS_delta.empty()) { + std::cerr << command_name + << " test: Must specify both --target <file-name>" + " and --delta <file-name>" << std::endl; + return 1; + } + const string original_target(FLAGS_target); + // Put coder into a separate scope. + { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.Encode()) { + return 1; + } + } + { + open_vcdiff::VCDiffFileBasedCoder coder; + if (!coder.DecodeAndCompare()) { + return 1; + } + } + } else { + std::cerr << command_name << ": Unrecognized command option " + << command_option << std::endl; + ShowUsageWithFlagsRestrict(command_name, "vcdiff"); + return 1; + } + return 0; +} |