diff options
Diffstat (limited to 'third_party/courgette/streams.cc')
-rw-r--r-- | third_party/courgette/streams.cc | 383 |
1 files changed, 0 insertions, 383 deletions
diff --git a/third_party/courgette/streams.cc b/third_party/courgette/streams.cc deleted file mode 100644 index 9870cd2..0000000 --- a/third_party/courgette/streams.cc +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Streams classes. -// -// These memory-resident streams are used for serializing data into a sequential -// region of memory. -// -// Streams are divided into SourceStreams for reading and SinkStreams for -// writing. Streams are aggregated into Sets which allows several streams to be -// used at once. Example: we can write A1, B1, A2, B2 but achieve the memory -// layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another. -// -// The aggregated streams are important to Courgette's compression efficiency, -// we use it to cluster similar kinds of data which helps to generate longer -// common subsequences and repeated sequences. - -#include "third_party/courgette/streams.h" - -#include <io.h> -#include <memory.h> - -#include "base/basictypes.h" - -namespace courgette { - -// Update this version number if the serialization format of a StreamSet -// changes. -static const unsigned int kStreamsSerializationFormatVersion = 20090218; - -// -// This is a cut down Varint implementation, implementing only what we use for -// streams. The original implementation is at google3/util/coding/varint.h -// -class Varint { - public: - // Maximum lengths of varint encoding of uint32 - static const int kMax32 = 5; - - // Parses a Varint32 encoded value from |source| and stores it in |output|, - // and returns a pointer to the following byte. Returns NULL if a valid - // varint value was not found before |limit|. - static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit, - uint32* output); - - // Writes the Varint32 encoded representation of |value| to buffer - // |destination|. |destination| must have sufficient length to hold kMax32 - // bytes. Returns a pointer to the byte just past the last encoded byte. - static uint8* Encode32(uint8* destination, uint32 value); -}; - -// Parses a Varint32 encoded unsigned number from |source|. The Varint32 -// encoding is a little-endian sequence of bytes containing base-128 digits, -// with the high order bit set to indicate if there are more digits. -// -// For each byte, we mask out the digit and 'or' it into the right place in the -// result. -// -// The digit loop is unrolled for performance. It usually exits after the first -// one or two digits. -const uint8* Varint::Parse32WithLimit(const uint8* source, - const uint8* limit, - uint32* output) { - uint32 digit, result; - if (source >= limit) - return NULL; - digit = *(source++); - result = digit & 127; - if (digit < 128) { - *output = result; - return source; - } - - if (source >= limit) - return NULL; - digit = *(source++); - result |= (digit & 127) << 7; - if (digit < 128) { - *output = result; - return source; - } - - if (source >= limit) - return NULL; - digit = *(source++); - result |= (digit & 127) << 14; - if (digit < 128) { - *output = result; - return source; - } - - if (source >= limit) - return NULL; - digit = *(source++); - result |= (digit & 127) << 21; - if (digit < 128) { - *output = result; - return source; - } - - if (source >= limit) - return NULL; - digit = *(source++); - result |= (digit & 127) << 28; - if (digit < 128) { - *output = result; - return source; - } - - return NULL; // Value is too long to be a Varint32. -} - -// Write the base-128 digits in little-endian order. All except the last digit -// have the high bit set to indicate more digits. -inline uint8* Varint::Encode32(uint8* destination, uint32 value) { - while (value >= 128) { - *(destination++) = value | 128; - value = value >> 7; - } - *(destination++) = value; - return destination; -} - -void SourceStream::Init(const SinkStream& sink) { - Init(sink.Buffer(), sink.Length()); -} - -bool SourceStream::Read(void* destination, size_t count) { - if (current_ + count > end_) - return false; - memcpy(destination, current_, count); - current_ += count; - return true; -} - -bool SourceStream::ReadVarint32(uint32* output_value) { - const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value); - if (!after) - return false; - current_ = after; - return true; -} - -bool SourceStream::ReadVarint32Signed(int32* output_value) { - // Signed numbers are encoded as unsigned numbers so that numbers nearer zero - // have shorter varint encoding. - // 0000xxxx encoded as 000xxxx0. - // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. - uint32 unsigned_value; - if (!ReadVarint32(&unsigned_value)) - return false; - if (unsigned_value & 1) - *output_value = ~static_cast<int32>(unsigned_value >> 1); - else - *output_value = (unsigned_value >> 1); - return true; -} - -bool SourceStream::ShareSubstream(size_t offset, size_t length, - SourceStream* substream) { - if (offset > Remaining()) - return false; - if (length > Remaining() - offset) - return false; - substream->Init(current_ + offset, length); - return true; -} - -bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) { - if (!ShareSubstream(0, length, substream)) - return false; - current_ += length; - return true; -} - -bool SourceStream::Skip(size_t byte_count) { - if (current_ + byte_count > end_) - return false; - current_ += byte_count; - return true; -} - -void SinkStream::Write(const void* data, size_t byte_count) { - buffer_.append(static_cast<const char*>(data), byte_count); -} - -void SinkStream::WriteVarint32(uint32 value) { - uint8 buffer[Varint::kMax32]; - uint8* end = Varint::Encode32(buffer, value); - Write(buffer, end - buffer); -} - -void SinkStream::WriteVarint32Signed(int32 value) { - // Encode signed numbers so that numbers nearer zero have shorter - // varint encoding. - // 0000xxxx encoded as 000xxxx0. - // 1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx. - if (value < 0) - WriteVarint32(~value * 2 + 1); - else - WriteVarint32(value * 2); -} - -void SinkStream::Append(SinkStream* other) { - Write(other->buffer_.c_str(), other->buffer_.size()); - other->buffer_.clear(); - other->buffer_.reserve(0); // Non-binding request to reduce storage. -} - -//////////////////////////////////////////////////////////////////////////////// - -SourceStreamSet::SourceStreamSet() - : count_(kMaxStreams) { -} - -SourceStreamSet::~SourceStreamSet() { -} - - -// Initializes from |source|. -// The stream set for N streams is serialized as a header -// <version><N><length1><length2>...<lengthN> -// followed by the stream contents -// <bytes1><bytes2>...<bytesN> -// -bool SourceStreamSet::Init(const void* source, size_t byte_count) { - const uint8* start = static_cast<const uint8*>(source); - const uint8* end = start + byte_count; - - unsigned int version; - const uint8* finger = Varint::Parse32WithLimit(start, end, &version); - if (finger == NULL) - return false; - if (version != kStreamsSerializationFormatVersion) - return false; - - unsigned int count; - finger = Varint::Parse32WithLimit(finger, end, &count); - if (finger == NULL) - return false; - if (count > kMaxStreams) - return false; - - count_ = count; - - unsigned int lengths[kMaxStreams]; - size_t accumulated_length = 0; - - for (size_t i = 0; i < count_; ++i) { - finger = Varint::Parse32WithLimit(finger, end, &lengths[i]); - if (finger == NULL) - return false; - accumulated_length += lengths[i]; - } - - // Remaining bytes should add up to sum of lengths. - if (end - finger != accumulated_length) - return false; - - accumulated_length = finger - start; - for (size_t i = 0; i < count_; ++i) { - stream(i)->Init(start + accumulated_length, lengths[i]); - accumulated_length += lengths[i]; - } - - return true; -} - -bool SourceStreamSet::Init(SourceStream* source) { - // TODO(sra): consume the rest of |source|. - return Init(source->Buffer(), source->Remaining()); -} - -bool SourceStreamSet::ReadSet(SourceStreamSet* set) { - uint32 stream_count = 0; - SourceStream* control_stream = this->stream(0); - if (!control_stream->ReadVarint32(&stream_count)) - return false; - - uint32 lengths[kMaxStreams] = {}; // i.e. all zero. - - for (size_t i = 0; i < stream_count; ++i) { - if (!control_stream->ReadVarint32(&lengths[i])) - return false; - } - - for (size_t i = 0; i < stream_count; ++i) { - if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i))) - return false; - } - return true; -} - -bool SourceStreamSet::Empty() const { - for (size_t i = 0; i < count_; ++i) { - if (streams_[i].Remaining() != 0) - return false; - } - return true; -} - -//////////////////////////////////////////////////////////////////////////////// - -SinkStreamSet::SinkStreamSet() - : count_(kMaxStreams) { -} - -SinkStreamSet::~SinkStreamSet() { -} - -void SinkStreamSet::Init(size_t stream_index_limit) { - count_ = stream_index_limit; -} - -// The header for a stream set for N streams is serialized as -// <version><N><length1><length2>...<lengthN> -void SinkStreamSet::CopyHeaderTo(SinkStream* header) { - header->WriteVarint32(kStreamsSerializationFormatVersion); - header->WriteVarint32(count_); - for (size_t i = 0; i < count_; ++i) { - header->WriteVarint32(stream(i)->Length()); - } -} - -// Writes |this| to |combined_stream|. See SourceStreamSet::Init for the layout -// of the stream metadata and contents. -bool SinkStreamSet::CopyTo(SinkStream *combined_stream) { - SinkStream header; - CopyHeaderTo(&header); - combined_stream->Append(&header); - for (size_t i = 0; i < count_; ++i) { - combined_stream->Append(stream(i)); - } - return true; -} - -namespace { -bool Write(int file_descriptor, SinkStream* sink) { - size_t length = sink->Length(); - const void *buffer = sink->Buffer(); - int bytes_written = _write(file_descriptor, buffer, length); - return bytes_written == length; -} -} - -bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) { - SinkStream header; - CopyHeaderTo(&header); - if (!Write(file_descriptor, &header)) - return false; - for (size_t i = 0; i < count_; ++i) { - if (!Write(file_descriptor, stream(i))) - return false; - } - return true; -} - -bool SinkStreamSet::WriteSet(SinkStreamSet* set) { - uint32 lengths[kMaxStreams]; - // 'stream_count' includes all non-empty streams and all empty stream numbered - // lower than a non-empty stream. - size_t stream_count = 0; - for (size_t i = 0; i < kMaxStreams; ++i) { - SinkStream* stream = set->stream(i); - lengths[i] = stream->Length(); - if (lengths[i] > 0) - stream_count = i + 1; - } - - SinkStream* control_stream = this->stream(0); - control_stream->WriteVarint32(stream_count); - for (size_t i = 0; i < stream_count; ++i) { - control_stream->WriteVarint32(lengths[i]); - } - - for (size_t i = 0; i < stream_count; ++i) { - this->stream(i)->Append(set->stream(i)); - } - return true; -} - -} // namespace |