diff options
Diffstat (limited to 'sdch/open-vcdiff/src/addrcache.cc')
-rw-r--r-- | sdch/open-vcdiff/src/addrcache.cc | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/sdch/open-vcdiff/src/addrcache.cc b/sdch/open-vcdiff/src/addrcache.cc new file mode 100644 index 0000000..2948266 --- /dev/null +++ b/sdch/open-vcdiff/src/addrcache.cc @@ -0,0 +1,331 @@ +// Copyright 2007 Google Inc. +// Author: Lincoln Smith +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Implementation of the Address Cache and Address Encoding +// algorithms described in sections 5.1 - 5.4 of RFC 3284 - +// The VCDIFF Generic Differencing and Compression Data Format. +// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html +// +// Assumptions: +// * The VCDAddress type is large enough to hold any offset within +// the source and target windows. The limit (for int32_t) is 2^31-1 bytes. +// The source (dictionary) should not approach this size limit; +// to compress a target file that is larger than +// INT_MAX - (dictionary size) bytes, the encoder must +// break it up into multiple target windows. + +#include <config.h> +#include "addrcache.h" +#include "logging.h" +#include "varint_bigendian.h" +#include "vcdiff_defs.h" // RESULT_ERROR + +namespace open_vcdiff { + +// The constructor does not initialize near_addresses_ and same_addresses_. +// Therefore, Init() must be called before any other method can be used. +// +// Arguments: +// near_cache_size: Size of the NEAR cache (number of 4-byte integers) +// same_cache_size: Size of the SAME cache (number of blocks of +// 256 4-byte integers per block) +// Because the mode is expressed as a byte value, +// near_cache_size + same_cache_size should not exceed 254. +// +VCDiffAddressCache::VCDiffAddressCache(int near_cache_size, + int same_cache_size) + : near_cache_size_(near_cache_size), + same_cache_size_(same_cache_size), + next_slot_(0) { } + +VCDiffAddressCache::VCDiffAddressCache() + : near_cache_size_(kDefaultNearCacheSize), + same_cache_size_(kDefaultSameCacheSize), + next_slot_(0) { } + +// Sets up data structures needed to call other methods. Operations that may +// fail at runtime (for example, validating the provided near_cache_size_ and +// same_cache_size_ parameters against their maximum allowed values) are +// confined to this routine in order to guarantee that the class constructor +// will never fail. Other methods (except the destructor) cannot be invoked +// until this method has been called successfully. After the object has been +// initialized and used, Init() can be called again to reset it to its initial +// state. +// +// Return value: "true" if initialization succeeded, "false" if it failed. +// No other method except the destructor may be invoked if this function +// returns false. The caller is responsible for checking the return value +// and providing an exit path in case of error. +// +bool VCDiffAddressCache::Init() { + // The mode is expressed as a byte value, so there is only room for 256 modes, + // including the two non-cached modes (SELF and HERE). Do not allow a larger + // number of modes to be defined. We do a separate sanity check for + // near_cache_size_ and same_cache_size_ because adding them together can + // cause an integer overflow if each is set to, say, INT_MAX. + if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) { + LOG(ERROR) << "Near cache size " << near_cache_size_ << " is invalid" + << LOG_ENDL; + return false; + } + if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) { + LOG(ERROR) << "Same cache size " << same_cache_size_ << " is invalid" + << LOG_ENDL; + return false; + } + if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) { + LOG(ERROR) << "Using near cache size " << near_cache_size_ + << " and same cache size " << same_cache_size_ + << " would exceed maximum number of COPY modes (" + << VCD_MAX_MODES << ")" << LOG_ENDL; + return false; + } + if (near_cache_size_ > 0) { + near_addresses_.assign(near_cache_size_, 0); + } + if (same_cache_size_ > 0) { + same_addresses_.assign(same_cache_size_ * 256, 0); + } + next_slot_ = 0; // in case Init() is called a second time to reinit + return true; +} + +// This method will be called whenever an address is calculated for an +// encoded or decoded COPY instruction, and will update the contents +// of the SAME and NEAR caches. It is vital that the use of +// UpdateCache (called cache_update in the RFC examples) exactly match +// the RFC standard, and that the same caching logic be used in the +// decoder as in the encoder, in order for the decoded addresses to +// match. +// +// Argument: +// address: This must be a valid address between 0 and +// (source window size + target window size). It is assumed that +// these bounds have been checked before calling UpdateCache. +// +void VCDiffAddressCache::UpdateCache(VCDAddress address) { + if (near_cache_size_ > 0) { + near_addresses_[next_slot_] = address; + next_slot_ = (next_slot_ + 1) % near_cache_size_; + } + if (same_cache_size_ > 0) { + same_addresses_[address % (same_cache_size_ * 256)] = address; + } +} + +// Determines the address mode that yields the most compact encoding +// of the given address value, writes the encoded address into the +// address stream, and returns the mode used. The most compact encoding +// is found by looking for the numerically lowest encoded address. +// The Init() function must already have been called. +// +// Arguments: +// address: The address to be encoded. Must be a non-negative integer +// between 0 and (here_address - 1). +// here_address: The current location in the target data (i.e., the +// position just after the last encoded value.) Must be non-negative. +// encoded_addr: Points to an VCDAddress that will be replaced +// with the encoded representation of address. +// If WriteAddressAsVarintForMode returns true when passed +// the return value, then encoded_addr should be written +// into the delta file as a variable-length integer (Varint); +// otherwise, it should be written as a byte (unsigned char). +// +// Return value: A mode value between 0 and 255. The mode will tell +// how to interpret the next value in the address stream. +// The values 0 and 1 correspond to SELF and HERE addressing. +// +// The function is guaranteed to succeed unless the conditions on the arguments +// have not been met, in which case a LOG(DFATAL) message will be produced, +// 0 will be returned, and *encoded_addr will be replaced with 0. +// +unsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address, + VCDAddress here_address, + VCDAddress* encoded_addr) { + if (address < 0) { + LOG(DFATAL) << "EncodeAddress was passed a negative address: " + << address << LOG_ENDL; + *encoded_addr = 0; + return 0; + } + if (address >= here_address) { + LOG(DFATAL) << "EncodeAddress was called with address (" << address + << ") < here_address (" << here_address << ")" << LOG_ENDL; + *encoded_addr = 0; + return 0; + } + // Try using the SAME cache. This method, if available, always + // results in the smallest encoding and takes priority over other modes. + if (same_cache_size() > 0) { + const VCDAddress same_cache_pos = + address % (same_cache_size() * 256); + if (SameAddress(same_cache_pos) == address) { + // This is the only mode for which an single byte will be written + // to the address stream instead of a variable-length integer. + UpdateCache(address); + *encoded_addr = same_cache_pos % 256; + return FirstSameMode() + (same_cache_pos / 256); // SAME mode + } + } + + // Try SELF mode + unsigned char best_mode = VCD_SELF_MODE; + VCDAddress best_encoded_address = address; + + // Try HERE mode + { + const VCDAddress here_encoded_address = here_address - address; + if (here_encoded_address < best_encoded_address) { + best_mode = VCD_HERE_MODE; + best_encoded_address = here_encoded_address; + } + } + + // Try using the NEAR cache + for (int i = 0; i < near_cache_size(); ++i) { + const VCDAddress near_encoded_address = address - NearAddress(i); + if ((near_encoded_address >= 0) && + (near_encoded_address < best_encoded_address)) { + best_mode = FirstNearMode() + i; + best_encoded_address = near_encoded_address; + } + } + + UpdateCache(address); + *encoded_addr = best_encoded_address; + return best_mode; +} + +// Increments *byte_pointer and returns the byte it pointed to before the +// increment. The caller must check bounds to ensure that *byte_pointer +// points to a valid address in memory. +static unsigned char ParseByte(const char** byte_pointer) { + unsigned char byte_value = static_cast<unsigned char>(**byte_pointer); + ++(*byte_pointer); + return byte_value; +} + +// Checks the given decoded address for validity. Returns true if the +// address is valid; otherwise, prints an error message to the log and +// returns false. +static bool IsDecodedAddressValid(VCDAddress decoded_address, + VCDAddress here_address) { + if (decoded_address < 0) { + LOG(ERROR) << "Decoded address " << decoded_address << " is invalid" + << LOG_ENDL; + return false; + } else if (decoded_address >= here_address) { + LOG(ERROR) << "Decoded address (" << decoded_address + << ") is beyond location in target file (" << here_address + << ")" << LOG_ENDL; + return false; + } + return true; +} + +// Interprets the next value in the address_stream using the provided mode, +// which may need to access the SAME or NEAR address cache. Returns the +// decoded address. +// The Init() function must already have been called. +// +// Arguments: +// here_address: The current location in the source + target data (i.e., the +// location into which the COPY instruction will copy.) By definition, +// all addresses between 0 and (here_address - 1) are valid, and +// any other address is invalid. +// mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1) +// which tells how to interpret the next value in the address stream. +// The values 0 and 1 correspond to SELF and HERE addressing. +// The validity of "mode" should already have been checked before +// calling this function. +// address_stream: Points to a pointer holding the position +// in the "Addresses section for COPYs" part of the input data. +// That section must already have been uncompressed +// using a secondary decompressor (if necessary.) +// This is an IN/OUT argument; the value of *address_stream will be +// incremented by the size of an integer, or (if the SAME cache +// was used) by the size of a byte (1). +// address_stream_end: Points to the position just after the end of +// the address stream buffer. All addresses between *address_stream +// and address_stream_end should contain valid address data. +// +// Return value: If the input conditions were met, and the address section +// of the input data contains properly encoded addresses that match +// the instructions section, then an integer between 0 and here_address - 1 +// will be returned, representing the address from which data should +// be copied from the source or target window into the output stream. +// If an invalid address value is found in address_stream, then +// RESULT_ERROR will be returned. If the limit address_stream_end +// is reached before the address can be decoded, then +// RESULT_END_OF_DATA will be returned. If more streamed data +// is expected, this means that the consumer should block and wait +// for more data before continuing to decode. If no more data is expected, +// this return value signals an error condition. +// +VCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address, + unsigned char mode, + const char** address_stream, + const char* address_stream_end) { + if (here_address < 0) { + LOG(DFATAL) << "DecodeAddress was passed a negative value" + " for here_address: " << here_address << LOG_ENDL; + return RESULT_ERROR; + } + const char* new_address_pos = *address_stream; + if (new_address_pos >= address_stream_end) { + return RESULT_END_OF_DATA; + } + VCDAddress decoded_address; + if (IsSameMode(mode)) { + // SAME mode expects a byte value as the encoded address + unsigned char encoded_address = ParseByte(&new_address_pos); + decoded_address = DecodeSameAddress(mode, encoded_address); + } else { + // All modes except SAME mode expect a VarintBE as the encoded address + int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end, + &new_address_pos); + switch (encoded_address) { + case RESULT_ERROR: + LOG(ERROR) << "Found invalid variable-length integer " + "as encoded address value" << LOG_ENDL; + return RESULT_ERROR; + case RESULT_END_OF_DATA: + return RESULT_END_OF_DATA; + default: + break; + } + if (IsSelfMode(mode)) { + decoded_address = DecodeSelfAddress(encoded_address); + } else if (IsHereMode(mode)) { + decoded_address = DecodeHereAddress(encoded_address, here_address); + } else if (IsNearMode(mode)) { + decoded_address = DecodeNearAddress(mode, encoded_address); + } else { + LOG(DFATAL) << "Invalid mode value (" << static_cast<int>(mode) + << ") passed to DecodeAddress; maximum mode value = " + << static_cast<int>(LastMode()) << LOG_ENDL; + return RESULT_ERROR; + } + } + // Check for an out-of-bounds address (corrupt/malicious data) + if (!IsDecodedAddressValid(decoded_address, here_address)) { + return RESULT_ERROR; + } + *address_stream = new_address_pos; + UpdateCache(decoded_address); + return decoded_address; +} + +} // namespace open_vcdiff |