diff options
Diffstat (limited to 'src/google/protobuf/io')
23 files changed, 280 insertions, 2067 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc index 5344975..6a91a13 100644 --- a/src/google/protobuf/io/coded_stream.cc +++ b/src/google/protobuf/io/coded_stream.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -43,7 +43,7 @@ #include <limits.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stl_util.h> +#include <google/protobuf/stubs/stl_util-inl.h> namespace google { @@ -56,36 +56,10 @@ static const int kMaxVarintBytes = 10; static const int kMaxVarint32Bytes = 5; -inline bool NextNonEmpty(ZeroCopyInputStream* input, - const void** data, int* size) { - bool success; - do { - success = input->Next(data, size); - } while (success && *size == 0); - return success; -} - } // namespace // CodedInputStream ================================================== -CodedInputStream::~CodedInputStream() { - if (input_ != NULL) { - BackUpInputToCurrentPosition(); - } - - if (total_bytes_warning_threshold_ == -2) { - GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_; - } -} - -// Static. -int CodedInputStream::default_recursion_limit_ = 100; - - -void CodedOutputStream::EnableAliasing(bool enabled) { - aliasing_enabled_ = enabled && output_->AllowsAliasing(); -} void CodedInputStream::BackUpInputToCurrentPosition() { int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; @@ -115,7 +89,8 @@ inline void CodedInputStream::RecomputeBufferLimits() { CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { // Current position relative to the beginning of the stream. - int current_position = CurrentPosition(); + int current_position = total_bytes_read_ - + (BufferSize() + buffer_size_after_limit_); Limit old_limit = current_limit_; @@ -149,9 +124,10 @@ void CodedInputStream::PopLimit(Limit limit) { legitimate_message_end_ = false; } -int CodedInputStream::BytesUntilLimit() const { +int CodedInputStream::BytesUntilLimit() { if (current_limit_ == INT_MAX) return -1; - int current_position = CurrentPosition(); + int current_position = total_bytes_read_ - + (BufferSize() + buffer_size_after_limit_); return current_limit_ - current_position; } @@ -160,22 +136,13 @@ void CodedInputStream::SetTotalBytesLimit( int total_bytes_limit, int warning_threshold) { // Make sure the limit isn't already past, since this could confuse other // code. - int current_position = CurrentPosition(); + int current_position = total_bytes_read_ - + (BufferSize() + buffer_size_after_limit_); total_bytes_limit_ = max(current_position, total_bytes_limit); - if (warning_threshold >= 0) { - total_bytes_warning_threshold_ = warning_threshold; - } else { - // warning_threshold is negative - total_bytes_warning_threshold_ = -1; - } + total_bytes_warning_threshold_ = warning_threshold; RecomputeBufferLimits(); } -int CodedInputStream::BytesUntilTotalBytesLimit() const { - if (total_bytes_limit_ == INT_MAX) return -1; - return total_bytes_limit_ - CurrentPosition(); -} - void CodedInputStream::PrintTotalBytesLimitError() { GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too " "big (more than " << total_bytes_limit_ @@ -256,14 +223,6 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) { buffer->clear(); } - int closest_limit = min(current_limit_, total_bytes_limit_); - if (closest_limit != INT_MAX) { - int bytes_to_limit = closest_limit - CurrentPosition(); - if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) { - buffer->reserve(size); - } - } - int current_buffer_size; while ((current_buffer_size = BufferSize()) < size) { // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). @@ -330,16 +289,11 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) { uint32 b; uint32 result; - b = *(ptr++); result = b ; if (!(b & 0x80)) goto done; - result -= 0x80; - b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done; - result -= 0x80 << 7; - b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done; - result -= 0x80 << 14; - b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done; - result -= 0x80 << 21; - b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done; - // "result -= 0x80 << 28" is irrevelant. + b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; // If the input is larger than 32 bits, we still need to read it all // and discard the high-order bits. @@ -369,8 +323,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) { bool CodedInputStream::ReadVarint32Fallback(uint32* value) { if (BufferSize() >= kMaxVarintBytes || - // Optimization: We're also safe if the buffer is non-empty and it ends - // with a byte that would terminate a varint. + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { const uint8* end = ReadVarint32FromArray(buffer_, value); if (end == NULL) return false; @@ -405,17 +359,16 @@ uint32 CodedInputStream::ReadTagSlow() { // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags // again, since we have now refreshed the buffer. - uint64 result = 0; + uint64 result; if (!ReadVarint64(&result)) return 0; return static_cast<uint32>(result); } uint32 CodedInputStream::ReadTagFallback() { - const int buf_size = BufferSize(); - if (buf_size >= kMaxVarintBytes || - // Optimization: We're also safe if the buffer is non-empty and it ends - // with a byte that would terminate a varint. - (buf_size > 0 && !(buffer_end_[-1] & 0x80))) { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { uint32 tag; const uint8* end = ReadVarint32FromArray(buffer_, &tag); if (end == NULL) { @@ -426,9 +379,7 @@ uint32 CodedInputStream::ReadTagFallback() { } else { // We are commonly at a limit when attempting to read tags. Try to quickly // detect this case without making another function call. - if ((buf_size == 0) && - ((buffer_size_after_limit_ > 0) || - (total_bytes_read_ == current_limit_)) && + if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 && // Make sure that the limit we hit is not total_bytes_limit_, since // in that case we still need to call Refresh() so that it prints an // error. @@ -466,8 +417,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) { bool CodedInputStream::ReadVarint64Fallback(uint64* value) { if (BufferSize() >= kMaxVarintBytes || - // Optimization: We're also safe if the buffer is non-empty and it ends - // with a byte that would terminate a varint. + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { // Fast path: We have enough bytes left in the buffer to guarantee that // this read won't cross the end, so we can skip the checks. @@ -479,30 +430,20 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) { // processors. uint32 part0 = 0, part1 = 0, part2 = 0; - b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done; - part0 -= 0x80; - b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 7; - b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 14; - b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 21; - b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done; - part1 -= 0x80; - b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 7; - b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 14; - b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 21; - b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done; - part2 -= 0x80; - b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done; - // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0. + b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; // We have overrun the maximum size of a varint (10 bytes). The data // must be corrupt. - return false; + return NULL; done: Advance(ptr - buffer_); @@ -542,13 +483,13 @@ bool CodedInputStream::Refresh() { "CodedInputStream::SetTotalBytesLimit() in " "google/protobuf/io/coded_stream.h."; - // Don't warn again for this stream, and print total size at the end. - total_bytes_warning_threshold_ = -2; + // Don't warn again for this stream. + total_bytes_warning_threshold_ = -1; } const void* void_buffer; int buffer_size; - if (NextNonEmpty(input_, &void_buffer, &buffer_size)) { + if (input_->Next(&void_buffer, &buffer_size)) { buffer_ = reinterpret_cast<const uint8*>(void_buffer); buffer_end_ = buffer_ + buffer_size; GOOGLE_CHECK_GE(buffer_size, 0); @@ -587,8 +528,7 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output) buffer_(NULL), buffer_size_(0), total_bytes_(0), - had_error_(false), - aliasing_enabled_(false) { + had_error_(false) { // Eagerly Refresh() so buffer space is immediately available. Refresh(); // The Refresh() may have failed. If the client doesn't write any data, @@ -642,23 +582,6 @@ uint8* CodedOutputStream::WriteRawToArray( } -void CodedOutputStream::WriteAliasedRaw(const void* data, int size) { - if (size < buffer_size_ - ) { - WriteRaw(data, size); - } else { - if (buffer_size_ > 0) { - output_->BackUp(buffer_size_); - total_bytes_ -= buffer_size_; - buffer_ = NULL; - buffer_size_ = 0; - } - - total_bytes_ += size; - had_error_ |= !output_->WriteAliasedRaw(data, size); - } -} - void CodedOutputStream::WriteLittleEndian32(uint32 value) { uint8 bytes[sizeof(value)]; @@ -902,13 +825,6 @@ int CodedOutputStream::VarintSize64(uint64 value) { } } -uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str, - uint8* target) { - GOOGLE_DCHECK_LE(str.size(), kuint32max); - target = WriteVarint32ToArray(str.size(), target); - return WriteStringToArray(str, target); -} - } // namespace io } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h index 81fabb1..e5f6161 100644 --- a/src/google/protobuf/io/coded_stream.h +++ b/src/google/protobuf/io/coded_stream.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -110,27 +110,14 @@ #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ #include <string> -#ifdef _MSC_VER - #if defined(_M_IX86) && \ - !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) - #define PROTOBUF_LITTLE_ENDIAN 1 - #endif - #if _MSC_VER >= 1300 - // If MSVC has "/RTCc" set, it will complain about truncating casts at - // runtime. This file contains some intentional truncating casts. - #pragma runtime_checks("c", off) - #endif -#else - #include <sys/param.h> // __BYTE_ORDER - #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && \ - !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) - #define PROTOBUF_LITTLE_ENDIAN 1 - #endif -#endif +#ifndef _MSC_VER +#include <sys/param.h> +#endif // !_MSC_VER #include <google/protobuf/stubs/common.h> - +#include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro namespace google { + namespace protobuf { class DescriptorPool; @@ -170,9 +157,6 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // successfully and the stream's byte limit. ~CodedInputStream(); - // Return true if this CodedInputStream reads from a flat array instead of - // a ZeroCopyInputStream. - inline bool IsFlat() const; // Skips a number of bytes. Returns false if an underlying read error // occurs. @@ -233,22 +217,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Read a tag. This calls ReadVarint32() and returns the result, or returns // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates // the last tag value, which can be checked with LastTagWas(). - // Always inline because this is only called in one place per parse loop + // Always inline because this is only called in once place per parse loop // but it is called for every iteration of said loop, so it should be fast. // GCC doesn't want to inline this by default. uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; - // This usually a faster alternative to ReadTag() when cutoff is a manifest - // constant. It does particularly well for cutoff >= 127. The first part - // of the return value is the tag that was read, though it can also be 0 in - // the cases where ReadTag() would return 0. If the second part is true - // then the tag is known to be in [0, cutoff]. If not, the tag either is - // above cutoff or is 0. (There's intentional wiggle room when tag is 0, - // because that can arise in several ways, and for best performance we want - // to avoid an extra "is tag == 0?" check here.) - inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) - GOOGLE_ATTRIBUTE_ALWAYS_INLINE; - // Usually returns true if calling ReadVarint32() now would produce the given // value. Will always return false if ReadVarint32() would not return the // given value. If ExpectTag() returns true, it also advances past @@ -275,8 +248,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // zero, and ConsumedEntireMessage() will return true. bool ExpectAtEnd(); - // If the last call to ReadTag() or ReadTagWithCutoff() returned the - // given value, returns true. Otherwise, returns false; + // If the last call to ReadTag() returned the given value, returns true. + // Otherwise, returns false; // // This is needed because parsers for some types of embedded messages // (with field type TYPE_GROUP) don't actually know that they've reached the @@ -325,10 +298,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Returns the number of bytes left until the nearest limit on the // stack is hit, or -1 if no limits are in place. - int BytesUntilLimit() const; - - // Returns current position relative to the beginning of the input stream. - int CurrentPosition() const; + int BytesUntilLimit(); // Total Bytes Limit ----------------------------------------------- // To prevent malicious users from sending excessively large messages @@ -344,9 +314,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // cause integer overflows is 512MB. The default limit is 64MB. Apps // should set shorter limits if possible. If warning_threshold is not -1, // a warning will be printed to stderr after warning_threshold bytes are - // read. For backwards compatibility all negative values get squashed to -1, - // as other negative values might have special internal meanings. - // An error will always be printed to stderr if the limit is reached. + // read. An error will always be printed to stderr if the limit is + // reached. // // This is unrelated to PushLimit()/PopLimit(). // @@ -367,20 +336,15 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // something unusual. void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); - // The Total Bytes Limit minus the Current Position, or -1 if there - // is no Total Bytes Limit. - int BytesUntilTotalBytesLimit() const; - // Recursion Limit ------------------------------------------------- // To prevent corrupt or malicious messages from causing stack overflows, // we must keep track of the depth of recursion when parsing embedded // messages and groups. CodedInputStream keeps track of this because it // is the only object that is passed down the stack during parsing. - // Sets the maximum recursion depth. The default is 100. + // Sets the maximum recursion depth. The default is 64. void SetRecursionLimit(int limit); - // Increments the current recursion depth. Returns true if the depth is // under the limit, false if it has gone over. bool IncrementRecursionDepth(); @@ -456,8 +420,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // // Note that this feature is ignored when parsing "lite" messages as they do // not have descriptors. - void SetExtensionRegistry(const DescriptorPool* pool, - MessageFactory* factory); + void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory); // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool // has been provided. @@ -481,7 +444,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { int overflow_bytes_; // LastTagWas() stuff. - uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). + uint32 last_tag_; // result of last ReadTag(). // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly // at EOF, or by ExpectAtEnd() when it returns true. This happens when we @@ -506,11 +469,6 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Maximum number of bytes to read, period. This is unrelated to // current_limit_. Set using SetTotalBytesLimit(). int total_bytes_limit_; - - // If positive/0: Limit for bytes read after which a warning due to size - // should be logged. - // If -1: Printing of warning disabled. Can be set by client. - // If -2: Internal: Limit has been reached, print full size when destructing. int total_bytes_warning_threshold_; // Current recursion depth, controlled by IncrementRecursionDepth() and @@ -563,13 +521,12 @@ class LIBPROTOBUF_EXPORT CodedInputStream { bool ReadStringFallback(string* buffer, int size); // Return the size of the buffer. - int BufferSize() const; + uint32 BufferSize() const; static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB - - static int default_recursion_limit_; // 100 by default. + static const int kDefaultRecursionLimit = 64; }; // Class which encodes and writes binary data which is composed of varint- @@ -597,7 +554,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // char text[] = "Hello world!"; // // int coded_size = sizeof(magic_number) + -// CodedOutputStream::VarintSize32(strlen(text)) + +// CodedOutputStream::Varint32Size(strlen(text)) + // strlen(text); // // uint8* buffer = @@ -653,9 +610,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Write raw bytes, copying them from the given buffer. void WriteRaw(const void* buffer, int size); - // Like WriteRaw() but will try to write aliased data if aliasing is - // turned on. - void WriteRawMaybeAliased(const void* data, int size); // Like WriteRaw() but writing directly to the target array. // This is _not_ inlined, as the compiler often optimizes memcpy into inline // copy loops. Since this gets called by every field with string or bytes @@ -667,21 +621,8 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { void WriteString(const string& str); // Like WriteString() but writing directly to the target array. static uint8* WriteStringToArray(const string& str, uint8* target); - // Write the varint-encoded size of str followed by str. - static uint8* WriteStringWithSizeToArray(const string& str, uint8* target); - // Instructs the CodedOutputStream to allow the underlying - // ZeroCopyOutputStream to hold pointers to the original structure instead of - // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the - // underlying stream does not support aliasing, then enabling it has no - // affect. For now, this only affects the behavior of - // WriteRawMaybeAliased(). - // - // NOTE: It is caller's responsibility to ensure that the chunk of memory - // remains live until all of the data has been consumed from the stream. - void EnableAliasing(bool enabled); - // Write a 32-bit little-endian integer. void WriteLittleEndian32(uint32 value); // Like WriteLittleEndian32() but writing directly to the target array. @@ -726,21 +667,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // If negative, 10 bytes. Otheriwse, same as VarintSize32(). static int VarintSize32SignExtended(int32 value); - // Compile-time equivalent of VarintSize32(). - template <uint32 Value> - struct StaticVarintSize32 { - static const int value = - (Value < (1 << 7)) - ? 1 - : (Value < (1 << 14)) - ? 2 - : (Value < (1 << 21)) - ? 3 - : (Value < (1 << 28)) - ? 4 - : 5; - }; - // Returns the total number of bytes written since this object was created. inline int ByteCount() const; @@ -756,7 +682,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { int buffer_size_; int total_bytes_; // Sum of sizes of all buffers seen so far. bool had_error_; // Whether an error occurred during output. - bool aliasing_enabled_; // See EnableAliasing(). // Advance the buffer by a given number of bytes. void Advance(int amount); @@ -765,10 +690,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Advance(buffer_size_). bool Refresh(); - // Like WriteRaw() but may avoid copying if the underlying - // ZeroCopyOutputStream supports it. - void WriteAliasedRaw(const void* buffer, int size); - static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target); // Always-inlined versions of WriteVarint* functions so that code can be @@ -814,7 +735,8 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) { inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( const uint8* buffer, uint32* value) { -#if defined(PROTOBUF_LITTLE_ENDIAN) +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(value, buffer, sizeof(*value)); return buffer + sizeof(*value); #else @@ -829,7 +751,8 @@ inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( const uint8* buffer, uint64* value) { -#if defined(PROTOBUF_LITTLE_ENDIAN) +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(value, buffer, sizeof(*value)); return buffer + sizeof(*value); #else @@ -848,8 +771,9 @@ inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( } inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { -#if defined(PROTOBUF_LITTLE_ENDIAN) - if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { memcpy(value, buffer_, sizeof(*value)); Advance(sizeof(*value)); return true; @@ -862,8 +786,9 @@ inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { } inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { -#if defined(PROTOBUF_LITTLE_ENDIAN) - if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN + if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { memcpy(value, buffer_, sizeof(*value)); Advance(sizeof(*value)); return true; @@ -886,45 +811,6 @@ inline uint32 CodedInputStream::ReadTag() { } } -inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff( - uint32 cutoff) { - // In performance-sensitive code we can expect cutoff to be a compile-time - // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at - // compile time. - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { - // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). - // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields - // is large enough then is it better to check for the two-byte case first? - if (static_cast<int8>(buffer_[0]) > 0) { - const uint32 kMax1ByteVarint = 0x7f; - uint32 tag = last_tag_ = buffer_[0]; - Advance(1); - return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff); - } - // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, - // and tag is two bytes. The latter is tested by bitwise-and-not of the - // first byte and the second byte. - if (cutoff >= 0x80 && - GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && - GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { - const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f; - uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); - Advance(2); - // It might make sense to test for tag == 0 now, but it is so rare that - // that we don't bother. A varint-encoded 0 should be one byte unless - // the encoder lost its mind. The second part of the return value of - // this function is allowed to be either true or false if the tag is 0, - // so we don't have to check for tag == 0. We may need to check whether - // it exceeds cutoff. - bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff; - return make_pair(tag, at_or_below_cutoff); - } - } - // Slow path - last_tag_ = ReadTagFallback(); - return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff); -} - inline bool CodedInputStream::LastTagWas(uint32 expected) { return last_tag_ == expected; } @@ -981,9 +867,7 @@ inline bool CodedInputStream::ExpectAtEnd() { // If we are at a limit we know no more bytes can be read. Otherwise, it's // hard to say without calling Refresh(), and we'd rather not do that. - if (buffer_ == buffer_end_ && - ((buffer_size_after_limit_ != 0) || - (total_bytes_read_ == current_limit_))) { + if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) { last_tag_ = 0; // Pretend we called ReadTag()... legitimate_message_end_ = true; // ... and it hit EOF. return true; @@ -992,10 +876,6 @@ inline bool CodedInputStream::ExpectAtEnd() { } } -inline int CodedInputStream::CurrentPosition() const { - return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_); -} - inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) { if (buffer_size_ < size) { return NULL; @@ -1035,7 +915,8 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray( inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, uint8* target) { -#if defined(PROTOBUF_LITTLE_ENDIAN) +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(target, &value, sizeof(value)); #else target[0] = static_cast<uint8>(value); @@ -1048,7 +929,8 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, uint8* target) { -#if defined(PROTOBUF_LITTLE_ENDIAN) +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ + defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN memcpy(target, &value, sizeof(value)); #else uint32 part0 = static_cast<uint32>(value); @@ -1101,21 +983,12 @@ inline int CodedOutputStream::VarintSize32SignExtended(int32 value) { } inline void CodedOutputStream::WriteString(const string& str) { - WriteRaw(str.data(), static_cast<int>(str.size())); -} - -inline void CodedOutputStream::WriteRawMaybeAliased( - const void* data, int size) { - if (aliasing_enabled_) { - WriteAliasedRaw(data, size); - } else { - WriteRaw(data, size); - } + WriteRaw(str.data(), str.size()); } inline uint8* CodedOutputStream::WriteStringToArray( const string& str, uint8* target) { - return WriteRawToArray(str.data(), static_cast<int>(str.size()), target); + return WriteRawToArray(str.data(), str.size(), target); } inline int CodedOutputStream::ByteCount() const { @@ -1144,7 +1017,7 @@ inline void CodedInputStream::DecrementRecursionDepth() { if (recursion_depth_ > 0) --recursion_depth_; } -inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool, +inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory) { extension_pool_ = pool; extension_factory_ = factory; @@ -1158,7 +1031,7 @@ inline MessageFactory* CodedInputStream::GetExtensionFactory() { return extension_factory_; } -inline int CodedInputStream::BufferSize() const { +inline uint32 CodedInputStream::BufferSize() const { return buffer_end_ - buffer_; } @@ -1171,12 +1044,12 @@ inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) last_tag_(0), legitimate_message_end_(false), aliasing_enabled_(false), - current_limit_(kint32max), + current_limit_(INT_MAX), buffer_size_after_limit_(0), total_bytes_limit_(kDefaultTotalBytesLimit), total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), recursion_depth_(0), - recursion_limit_(default_recursion_limit_), + recursion_limit_(kDefaultRecursionLimit), extension_pool_(NULL), extension_factory_(NULL) { // Eagerly Refresh() so buffer space is immediately available. @@ -1197,24 +1070,21 @@ inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) total_bytes_limit_(kDefaultTotalBytesLimit), total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), recursion_depth_(0), - recursion_limit_(default_recursion_limit_), + recursion_limit_(kDefaultRecursionLimit), extension_pool_(NULL), extension_factory_(NULL) { // Note that setting current_limit_ == size is important to prevent some // code paths from trying to access input_ and segfaulting. } -inline bool CodedInputStream::IsFlat() const { - return input_ == NULL; +inline CodedInputStream::~CodedInputStream() { + if (input_ != NULL) { + BackUpInputToCurrentPosition(); + } } } // namespace io } // namespace protobuf - -#if defined(_MSC_VER) && _MSC_VER >= 1300 - #pragma runtime_checks("c", restore) -#endif // _MSC_VER - } // namespace google #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h index 88c14ca..e9799d4 100644 --- a/src/google/protobuf/io/coded_stream_inl.h +++ b/src/google/protobuf/io/coded_stream_inl.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -37,9 +37,8 @@ #define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__ #include <google/protobuf/io/coded_stream.h> -#include <google/protobuf/io/zero_copy_stream_impl_lite.h> #include <string> -#include <google/protobuf/stubs/stl_util.h> +#include <google/protobuf/stubs/stl_util-inl.h> namespace google { namespace protobuf { @@ -51,12 +50,8 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer, if (BufferSize() >= size) { STLStringResizeUninitialized(buffer, size); - // When buffer is empty, string_as_array(buffer) will return NULL but memcpy - // requires non-NULL pointers even when size is 0. Hench this check. - if (size > 0) { - memcpy(mutable_string_data(buffer), buffer_, size); - Advance(size); - } + memcpy(string_as_array(buffer), buffer_, size); + Advance(size); return true; } diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc index f4cb5ea..7d29833 100644 --- a/src/google/protobuf/io/coded_stream_unittest.cc +++ b/src/google/protobuf/io/coded_stream_unittest.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -44,6 +44,7 @@ #include <google/protobuf/testing/googletest.h> #include <gtest/gtest.h> #include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/stubs/strutil.h> // This declares an unsigned long long integer literal in a portable way. @@ -124,13 +125,6 @@ namespace { class CodedStreamTest : public testing::Test { protected: - // Helper method used by tests for bytes warning. See implementation comment - // for further information. - static void SetupTotalBytesLimitWarningTest( - int total_bytes_limit, int warning_threshold, - vector<string>* out_errors, vector<string>* out_warnings); - - // Buffer used during most of the tests. This assumes tests run sequentially. static const int kBufferSize = 1024 * 64; static uint8 buffer_[kBufferSize]; }; @@ -214,33 +208,6 @@ TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) { EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); } -// This is the regression test that verifies that there is no issues -// with the empty input buffers handling. -TEST_F(CodedStreamTest, EmptyInputBeforeEos) { - class In : public ZeroCopyInputStream { - public: - In() : count_(0) {} - private: - virtual bool Next(const void** data, int* size) { - *data = NULL; - *size = 0; - return count_++ < 2; - } - virtual void BackUp(int count) { - GOOGLE_LOG(FATAL) << "Tests never call this."; - } - virtual bool Skip(int count) { - GOOGLE_LOG(FATAL) << "Tests never call this."; - return false; - } - virtual int64 ByteCount() const { return 0; } - int count_; - } in; - CodedInputStream input(&in); - input.ReadTag(); - EXPECT_TRUE(input.ConsumedEntireMessage()); -} - TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) { // Leave one byte at the beginning of the buffer so we can read it // to force the first buffer to be loaded. @@ -682,197 +649,14 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) { EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); } -TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); - - { - CodedInputStream coded_input(&input); - coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes)); - EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit()); - - string str; - EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); - EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes), - coded_input.BytesUntilTotalBytesLimit()); - EXPECT_EQ(kRawBytes, str); - // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). - EXPECT_GE(str.capacity(), strlen(kRawBytes)); - } - - EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); -} - -TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(sizeof(buffer_)); - - string str; - EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); - EXPECT_EQ(kRawBytes, str); - // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). - EXPECT_GE(str.capacity(), strlen(kRawBytes)); - } - - EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); -} - -TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - - string str; - EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); - EXPECT_EQ(kRawBytes, str); - // Note: this check depends on string class implementation. It - // expects that string will allocate more than strlen(kRawBytes) - // if the content of kRawBytes is appended to string in small - // chunks. - // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). - EXPECT_GE(str.capacity(), strlen(kRawBytes)); - } - - EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); -} - -TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(sizeof(buffer_)); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, -1)); - // Note: this check depends on string class implementation. It - // expects that string will always allocate the same amount of - // memory for an empty string. - EXPECT_EQ(string().capacity(), str.capacity()); - } -} - -TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(sizeof(buffer_)); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); - EXPECT_GT(1 << 30, str.capacity()); - } -} - -TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(16); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); - // Note: this check depends on string class implementation. It - // expects that string will allocate less than strlen(kRawBytes) - // for an empty string. - EXPECT_GT(strlen(kRawBytes), str.capacity()); - } -} - -TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.SetTotalBytesLimit(16, 16); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); - // Note: this check depends on string class implementation. It - // expects that string will allocate less than strlen(kRawBytes) - // for an empty string. - EXPECT_GT(strlen(kRawBytes), str.capacity()); - } -} - -TEST_F(CodedStreamTest, - ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(sizeof(buffer_)); - coded_input.SetTotalBytesLimit(16, 16); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); - // Note: this check depends on string class implementation. It - // expects that string will allocate less than strlen(kRawBytes) - // for an empty string. - EXPECT_GT(strlen(kRawBytes), str.capacity()); - } -} - -TEST_F(CodedStreamTest, - ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) { - memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); - // Buffer size in the input must be smaller than sizeof(kRawBytes), - // otherwise check against capacity will fail as ReadStringInline() - // will handle the reading and will reserve the memory as needed. - ArrayInputStream input(buffer_, sizeof(buffer_), 32); - - { - CodedInputStream coded_input(&input); - coded_input.PushLimit(16); - coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_)); - EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit()); - - string str; - EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); - // Note: this check depends on string class implementation. It - // expects that string will allocate less than strlen(kRawBytes) - // for an empty string. - EXPECT_GT(strlen(kRawBytes), str.capacity()); - } -} - // ------------------------------------------------------------------- // Skip const char kSkipTestBytes[] = "<Before skipping><To be skipped><After skipping>"; +const char kSkipOutputTestBytes[] = + "-----------------<To be skipped>----------------"; TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) { memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes)); @@ -1163,11 +947,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) { ArrayInputStream input(buffer_, sizeof(buffer_)); CodedInputStream coded_input(&input); coded_input.SetTotalBytesLimit(16, -1); - EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); string str; EXPECT_TRUE(coded_input.ReadString(&str, 16)); - EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); vector<string> errors; @@ -1182,9 +964,7 @@ TEST_F(CodedStreamTest, TotalBytesLimit) { "A protocol message was rejected because it was too big", errors[0]); coded_input.SetTotalBytesLimit(32, -1); - EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); EXPECT_TRUE(coded_input.ReadString(&str, 16)); - EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); } TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) { @@ -1215,60 +995,6 @@ TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) { EXPECT_FALSE(coded_input.ConsumedEntireMessage()); } -// This method is used by the tests below. -// It constructs a CodedInputStream with the given limits and tries to read 2KiB -// of data from it. Then it returns the logged errors and warnings in the given -// vectors. -void CodedStreamTest::SetupTotalBytesLimitWarningTest( - int total_bytes_limit, int warning_threshold, - vector<string>* out_errors, vector<string>* out_warnings) { - ArrayInputStream raw_input(buffer_, sizeof(buffer_), 128); - - ScopedMemoryLog scoped_log; - { - CodedInputStream input(&raw_input); - input.SetTotalBytesLimit(total_bytes_limit, warning_threshold); - string str; - EXPECT_TRUE(input.ReadString(&str, 2048)); - } - - *out_errors = scoped_log.GetMessages(ERROR); - *out_warnings = scoped_log.GetMessages(WARNING); -} - -TEST_F(CodedStreamTest, TotalBytesLimitWarning) { - vector<string> errors; - vector<string> warnings; - SetupTotalBytesLimitWarningTest(10240, 1024, &errors, &warnings); - - EXPECT_EQ(0, errors.size()); - - ASSERT_EQ(2, warnings.size()); - EXPECT_PRED_FORMAT2(testing::IsSubstring, - "Reading dangerously large protocol message. If the message turns out to " - "be larger than 10240 bytes, parsing will be halted for security reasons.", - warnings[0]); - EXPECT_PRED_FORMAT2(testing::IsSubstring, - "The total number of bytes read was 2048", - warnings[1]); -} - -TEST_F(CodedStreamTest, TotalBytesLimitWarningDisabled) { - vector<string> errors; - vector<string> warnings; - - // Test with -1 - SetupTotalBytesLimitWarningTest(10240, -1, &errors, &warnings); - EXPECT_EQ(0, errors.size()); - EXPECT_EQ(0, warnings.size()); - - // Test again with -2, expecting the same result - SetupTotalBytesLimitWarningTest(10240, -2, &errors, &warnings); - EXPECT_EQ(0, errors.size()); - EXPECT_EQ(0, warnings.size()); -} - - TEST_F(CodedStreamTest, RecursionLimit) { ArrayInputStream input(buffer_, sizeof(buffer_)); CodedInputStream coded_input(&input); @@ -1306,7 +1032,6 @@ TEST_F(CodedStreamTest, RecursionLimit) { EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7 } - class ReallyBigInputStream : public ZeroCopyInputStream { public: ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {} diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc index ee28696..84d277f 100644 --- a/src/google/protobuf/io/gzip_stream.cc +++ b/src/google/protobuf/io/gzip_stream.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -73,17 +73,6 @@ GzipInputStream::~GzipInputStream() { zerror_ = inflateEnd(&zcontext_); } -static inline int internalInflateInit2( - z_stream* zcontext, GzipInputStream::Format format) { - int windowBitsFormat = 0; - switch (format) { - case GzipInputStream::GZIP: windowBitsFormat = 16; break; - case GzipInputStream::AUTO: windowBitsFormat = 32; break; - case GzipInputStream::ZLIB: windowBitsFormat = 0; break; - } - return inflateInit2(zcontext, /* windowBits */15 | windowBitsFormat); -} - int GzipInputStream::Inflate(int flush) { if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) { // previous inflate filled output buffer. don't change input params yet. @@ -100,7 +89,14 @@ int GzipInputStream::Inflate(int flush) { zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in)); zcontext_.avail_in = in_size; if (first) { - int error = internalInflateInit2(&zcontext_, format_); + int windowBitsFormat = 0; + switch (format_) { + case GZIP: windowBitsFormat = 16; break; + case AUTO: windowBitsFormat = 32; break; + case ZLIB: windowBitsFormat = 0; break; + } + int error = inflateInit2(&zcontext_, + /* windowBits */15 | windowBitsFormat); if (error != Z_OK) { return error; } @@ -131,21 +127,9 @@ bool GzipInputStream::Next(const void** data, int* size) { return true; } if (zerror_ == Z_STREAM_END) { - if (zcontext_.next_out != NULL) { - // sub_stream_ may have concatenated streams to follow - zerror_ = inflateEnd(&zcontext_); - if (zerror_ != Z_OK) { - return false; - } - zerror_ = internalInflateInit2(&zcontext_, format_); - if (zerror_ != Z_OK) { - return false; - } - } else { - *data = NULL; - *size = 0; - return false; - } + *data = NULL; + *size = 0; + return false; } zerror_ = Inflate(Z_NO_FLUSH); if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) { @@ -199,6 +183,16 @@ GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream, Init(sub_stream, options); } +GzipOutputStream::GzipOutputStream( + ZeroCopyOutputStream* sub_stream, Format format, int buffer_size) { + Options options; + options.format = format; + if (buffer_size != -1) { + options.buffer_size = buffer_size; + } + Init(sub_stream, options); +} + void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream, const Options& options) { sub_stream_ = sub_stream; @@ -257,7 +251,8 @@ int GzipOutputStream::Deflate(int flush) { } error = deflate(&zcontext_, flush); } while (error == Z_OK && zcontext_.avail_out == 0); - if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) { + if (((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) + && (zcontext_.avail_out != sub_data_size_)) { // Notify lower layer of data. sub_stream_->BackUp(zcontext_.avail_out); // We don't own the buffer anymore. @@ -299,11 +294,10 @@ int64 GzipOutputStream::ByteCount() const { } bool GzipOutputStream::Flush() { - zerror_ = Deflate(Z_FULL_FLUSH); - // Return true if the flush succeeded or if it was a no-op. - return (zerror_ == Z_OK) || - (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 && - zcontext_.avail_out != 0); + do { + zerror_ = Deflate(Z_FULL_FLUSH); + } while (zerror_ == Z_OK); + return zerror_ == Z_OK; } bool GzipOutputStream::Close() { diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h index c7ccc26..65dbc5b 100644 --- a/src/google/protobuf/io/gzip_stream.h +++ b/src/google/protobuf/io/gzip_stream.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -45,7 +45,6 @@ #include <zlib.h> -#include <google/protobuf/stubs/common.h> #include <google/protobuf/io/zero_copy_stream.h> namespace google { @@ -145,6 +144,12 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream { ZeroCopyOutputStream* sub_stream, const Options& options); + // DEPRECATED: Use one of the above constructors instead. + GzipOutputStream( + ZeroCopyOutputStream* sub_stream, + Format format, + int buffer_size = -1) GOOGLE_ATTRIBUTE_DEPRECATED; + virtual ~GzipOutputStream(); // Return last error message or NULL if no error. @@ -160,13 +165,6 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream { // necessary. // Compression may be less efficient stopping and starting around flushes. // Returns true if no error. - // - // Please ensure that block size is > 6. Here is an excerpt from the zlib - // doc that explains why: - // - // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out - // is greater than six to avoid repeated flush markers due to - // avail_out == 0 on return. bool Flush(); // Writes out all data and closes the gzip stream. diff --git a/src/google/protobuf/io/gzip_stream_unittest.sh b/src/google/protobuf/io/gzip_stream_unittest.sh index 16251a9..6e8a094 100755 --- a/src/google/protobuf/io/gzip_stream_unittest.sh +++ b/src/google/protobuf/io/gzip_stream_unittest.sh @@ -2,7 +2,7 @@ # # Protocol Buffers - Google's data interchange format # Copyright 2009 Google Inc. All rights reserved. -# https://developers.google.com/protocol-buffers/ +# http://code.google.com/p/protobuf/ # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/src/google/protobuf/io/package_info.h b/src/google/protobuf/io/package_info.h index dc1fc91..7a7a4e7 100644 --- a/src/google/protobuf/io/package_info.h +++ b/src/google/protobuf/io/package_info.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc index c8df417..c7d3074 100644 --- a/src/google/protobuf/io/printer.cc +++ b/src/google/protobuf/io/printer.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -35,6 +35,7 @@ #include <google/protobuf/io/printer.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/strutil.h> namespace google { namespace protobuf { @@ -50,8 +51,8 @@ Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter) } Printer::~Printer() { - // Only BackUp() if we have called Next() at least once and never failed. - if (buffer_size_ > 0 && !failed_) { + // Only BackUp() if we're sure we've successfully called Next() at least once. + if (buffer_size_ > 0) { output_->BackUp(buffer_size_); } } @@ -131,17 +132,6 @@ void Printer::Print(const char* text, Print(vars, text); } -void Printer::Print(const char* text, - const char* variable1, const string& value1, - const char* variable2, const string& value2, - const char* variable3, const string& value3) { - map<string, string> vars; - vars[variable1] = value1; - vars[variable2] = value2; - vars[variable3] = value3; - Print(vars, text); -} - void Printer::Indent() { indent_ += " "; } @@ -168,7 +158,7 @@ void Printer::WriteRaw(const char* data, int size) { if (failed_) return; if (size == 0) return; - if (at_start_of_line_ && (size > 0) && (data[0] != '\n')) { + if (at_start_of_line_) { // Insert an indent. at_start_of_line_ = false; WriteRaw(indent_.data(), indent_.size()); diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h index f06cbf2..de08538 100644 --- a/src/google/protobuf/io/printer.h +++ b/src/google/protobuf/io/printer.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -82,11 +82,7 @@ class LIBPROTOBUF_EXPORT Printer { // Like the first Print(), except the substitutions are given as parameters. void Print(const char* text, const char* variable1, const string& value1, const char* variable2, const string& value2); - // Like the first Print(), except the substitutions are given as parameters. - void Print(const char* text, const char* variable1, const string& value1, - const char* variable2, const string& value2, - const char* variable3, const string& value3); - // TODO(kenton): Overloaded versions with more variables? Three seems + // TODO(kenton): Overloaded versions with more variables? Two seems // to be enough. // Indent text by two spaces. After calling Indent(), two spaces will be diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc index 1331a8d..580a53d 100644 --- a/src/google/protobuf/io/printer_unittest.cc +++ b/src/google/protobuf/io/printer_unittest.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -220,7 +220,7 @@ TEST(Printer, Indenting) { } // Death tests do not work on Windows as of yet. -#ifdef PROTOBUF_HAS_DEATH_TEST +#ifdef GTEST_HAS_DEATH_TEST TEST(Printer, Death) { char buffer[8192]; @@ -231,33 +231,9 @@ TEST(Printer, Death) { EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name"); EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent"); } -#endif // PROTOBUF_HAS_DEATH_TEST +#endif // GTEST_HAS_DEATH_TEST -TEST(Printer, WriteFailurePartial) { - char buffer[17]; - - ArrayOutputStream output(buffer, sizeof(buffer)); - Printer printer(&output, '$'); - - // Print 16 bytes to almost fill the buffer (should not fail). - printer.Print("0123456789abcdef"); - EXPECT_FALSE(printer.failed()); - - // Try to print 2 chars. Only one fits. - printer.Print("<>"); - EXPECT_TRUE(printer.failed()); - - // Anything else should fail too. - printer.Print(" "); - EXPECT_TRUE(printer.failed()); - printer.Print("blah"); - EXPECT_TRUE(printer.failed()); - - // Buffer should contain the first 17 bytes written. - EXPECT_EQ("0123456789abcdef<", string(buffer, sizeof(buffer))); -} - -TEST(Printer, WriteFailureExact) { +TEST(Printer, WriteFailure) { char buffer[16]; ArrayOutputStream output(buffer, sizeof(buffer)); diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc deleted file mode 100644 index 5697343..0000000 --- a/src/google/protobuf/io/strtod.cc +++ /dev/null @@ -1,113 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include <google/protobuf/io/strtod.h> - -#include <cstdio> -#include <cstring> -#include <string> - -#include <google/protobuf/stubs/common.h> - -namespace google { -namespace protobuf { -namespace io { - -// ---------------------------------------------------------------------- -// NoLocaleStrtod() -// This code will make you cry. -// ---------------------------------------------------------------------- - -namespace { - -// Returns a string identical to *input except that the character pointed to -// by radix_pos (which should be '.') is replaced with the locale-specific -// radix character. -string LocalizeRadix(const char* input, const char* radix_pos) { - // Determine the locale-specific radix character by calling sprintf() to - // print the number 1.5, then stripping off the digits. As far as I can - // tell, this is the only portable, thread-safe way to get the C library - // to divuldge the locale's radix character. No, localeconv() is NOT - // thread-safe. - char temp[16]; - int size = sprintf(temp, "%.1f", 1.5); - GOOGLE_CHECK_EQ(temp[0], '1'); - GOOGLE_CHECK_EQ(temp[size-1], '5'); - GOOGLE_CHECK_LE(size, 6); - - // Now replace the '.' in the input with it. - string result; - result.reserve(strlen(input) + size - 3); - result.append(input, radix_pos); - result.append(temp + 1, size - 2); - result.append(radix_pos + 1); - return result; -} - -} // namespace - -double NoLocaleStrtod(const char* text, char** original_endptr) { - // We cannot simply set the locale to "C" temporarily with setlocale() - // as this is not thread-safe. Instead, we try to parse in the current - // locale first. If parsing stops at a '.' character, then this is a - // pretty good hint that we're actually in some other locale in which - // '.' is not the radix character. - - char* temp_endptr; - double result = strtod(text, &temp_endptr); - if (original_endptr != NULL) *original_endptr = temp_endptr; - if (*temp_endptr != '.') return result; - - // Parsing halted on a '.'. Perhaps we're in a different locale? Let's - // try to replace the '.' with a locale-specific radix character and - // try again. - string localized = LocalizeRadix(text, temp_endptr); - const char* localized_cstr = localized.c_str(); - char* localized_endptr; - result = strtod(localized_cstr, &localized_endptr); - if ((localized_endptr - localized_cstr) > - (temp_endptr - text)) { - // This attempt got further, so replacing the decimal must have helped. - // Update original_endptr to point at the right location. - if (original_endptr != NULL) { - // size_diff is non-zero if the localized radix has multiple bytes. - int size_diff = localized.size() - strlen(text); - // const_cast is necessary to match the strtod() interface. - *original_endptr = const_cast<char*>( - text + (localized_endptr - localized_cstr - size_diff)); - } - } - - return result; -} - -} // namespace io -} // namespace protobuf -} // namespace google diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h deleted file mode 100644 index c2efc8d..0000000 --- a/src/google/protobuf/io/strtod.h +++ /dev/null @@ -1,50 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// A locale-independent version of strtod(), used to parse floating -// point default values in .proto files, where the decimal separator -// is always a dot. - -#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__ -#define GOOGLE_PROTOBUF_IO_STRTOD_H__ - -namespace google { -namespace protobuf { -namespace io { - -// A locale-independent version of the standard strtod(), which always -// uses a dot as the decimal separator. -double NoLocaleStrtod(const char* str, char** endptr); - -} // namespace io -} // namespace protobuf - -} // namespace google -#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__ diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index ef2de30..38fa351 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -89,12 +89,8 @@ // exactly pretty. #include <google/protobuf/io/tokenizer.h> -#include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stringprintf.h> -#include <google/protobuf/io/strtod.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/strutil.h> -#include <google/protobuf/stubs/stl_util.h> namespace google { namespace protobuf { @@ -122,8 +118,6 @@ namespace { CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'); -CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' || - c == '\r' || c == '\v' || c == '\f'); CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0'); @@ -193,16 +187,12 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input, read_error_(false), line_(0), column_(0), - record_target_(NULL), - record_start_(-1), + token_start_(-1), allow_f_after_float_(false), - comment_style_(CPP_COMMENT_STYLE), - require_space_after_number_(true), - allow_multiline_strings_(false) { + comment_style_(CPP_COMMENT_STYLE) { current_.line = 0; current_.column = 0; - current_.end_column = 0; current_.type = TYPE_START; Refresh(); @@ -247,9 +237,9 @@ void Tokenizer::Refresh() { } // If we're in a token, append the rest of the buffer to it. - if (record_target_ != NULL && record_start_ < buffer_size_) { - record_target_->append(buffer_ + record_start_, buffer_size_ - record_start_); - record_start_ = 0; + if (token_start_ >= 0 && token_start_ < buffer_size_) { + current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_); + token_start_ = 0; } const void* data = NULL; @@ -270,34 +260,23 @@ void Tokenizer::Refresh() { current_char_ = buffer_[0]; } -inline void Tokenizer::RecordTo(string* target) { - record_target_ = target; - record_start_ = buffer_pos_; -} - -inline void Tokenizer::StopRecording() { - // Note: The if() is necessary because some STL implementations crash when - // you call string::append(NULL, 0), presumably because they are trying to - // be helpful by detecting the NULL pointer, even though there's nothing - // wrong with reading zero bytes from NULL. - if (buffer_pos_ != record_start_) { - record_target_->append(buffer_ + record_start_, buffer_pos_ - record_start_); - } - record_target_ = NULL; - record_start_ = -1; -} - inline void Tokenizer::StartToken() { + token_start_ = buffer_pos_; current_.type = TYPE_START; // Just for the sake of initializing it. current_.text.clear(); current_.line = line_; current_.column = column_; - RecordTo(¤t_.text); } inline void Tokenizer::EndToken() { - StopRecording(); - current_.end_column = column_; + // Note: The if() is necessary because some STL implementations crash when + // you call string::append(NULL, 0), presumably because they are trying to + // be helpful by detecting the NULL pointer, even though there's nothing + // wrong with reading zero bytes from NULL. + if (buffer_pos_ != token_start_) { + current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_); + } + token_start_ = -1; } // ------------------------------------------------------------------- @@ -353,16 +332,9 @@ void Tokenizer::ConsumeString(char delimiter) { while (true) { switch (current_char_) { case '\0': - AddError("Unexpected end of string."); - return; - case '\n': { - if (!allow_multiline_strings_) { - AddError("String literals cannot cross line boundaries."); - return; - } - NextChar(); - break; + AddError("String literals cannot cross line boundaries."); + return; } case '\\': { @@ -379,27 +351,6 @@ void Tokenizer::ConsumeString(char delimiter) { AddError("Expected hex digits for escape sequence."); } // Possibly followed by another hex digit, but again we don't care. - } else if (TryConsume('u')) { - if (!TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>()) { - AddError("Expected four hex digits for \\u escape sequence."); - } - } else if (TryConsume('U')) { - // We expect 8 hex digits; but only the range up to 0x10ffff is - // legal. - if (!TryConsume('0') || - !TryConsume('0') || - !(TryConsume('0') || TryConsume('1')) || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>() || - !TryConsumeOne<HexDigit>()) { - AddError("Expected eight hex digits up to 10ffff for \\U escape " - "sequence"); - } } else { AddError("Invalid escape sequence in string literal."); } @@ -459,7 +410,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, } } - if (LookingAt<Letter>() && require_space_after_number_) { + if (LookingAt<Letter>()) { AddError("Need space between number and identifier."); } else if (current_char_ == '.') { if (is_float) { @@ -473,51 +424,26 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, return is_float ? TYPE_FLOAT : TYPE_INTEGER; } -void Tokenizer::ConsumeLineComment(string* content) { - if (content != NULL) RecordTo(content); - +void Tokenizer::ConsumeLineComment() { while (current_char_ != '\0' && current_char_ != '\n') { NextChar(); } TryConsume('\n'); - - if (content != NULL) StopRecording(); } -void Tokenizer::ConsumeBlockComment(string* content) { +void Tokenizer::ConsumeBlockComment() { int start_line = line_; int start_column = column_ - 2; - if (content != NULL) RecordTo(content); - while (true) { while (current_char_ != '\0' && current_char_ != '*' && - current_char_ != '/' && - current_char_ != '\n') { + current_char_ != '/') { NextChar(); } - if (TryConsume('\n')) { - if (content != NULL) StopRecording(); - - // Consume leading whitespace and asterisk; - ConsumeZeroOrMore<WhitespaceNoNewline>(); - if (TryConsume('*')) { - if (TryConsume('/')) { - // End of comment. - break; - } - } - - if (content != NULL) RecordTo(content); - } else if (TryConsume('*') && TryConsume('/')) { + if (TryConsume('*') && TryConsume('/')) { // End of comment. - if (content != NULL) { - StopRecording(); - // Strip trailing "*/". - content->erase(content->size() - 2); - } break; } else if (TryConsume('/') && current_char_ == '*') { // Note: We didn't consume the '*' because if there is a '/' after it @@ -528,59 +454,42 @@ void Tokenizer::ConsumeBlockComment(string* content) { AddError("End-of-file inside block comment."); error_collector_->AddError( start_line, start_column, " Comment started here."); - if (content != NULL) StopRecording(); break; } } } -Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() { - if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) { - if (TryConsume('/')) { - return LINE_COMMENT; - } else if (TryConsume('*')) { - return BLOCK_COMMENT; - } else { - // Oops, it was just a slash. Return it. - current_.type = TYPE_SYMBOL; - current_.text = "/"; - current_.line = line_; - current_.column = column_ - 1; - current_.end_column = column_; - return SLASH_NOT_COMMENT; - } - } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) { - return LINE_COMMENT; - } else { - return NO_COMMENT; - } -} - // ------------------------------------------------------------------- bool Tokenizer::Next() { - previous_ = current_; + TokenType last_token_type = current_.type; + + // Did we skip any characters after the last token? + bool skipped_stuff = false; while (!read_error_) { - ConsumeZeroOrMore<Whitespace>(); - - switch (TryConsumeCommentStart()) { - case LINE_COMMENT: - ConsumeLineComment(NULL); - continue; - case BLOCK_COMMENT: - ConsumeBlockComment(NULL); - continue; - case SLASH_NOT_COMMENT: + if (TryConsumeOne<Whitespace>()) { + ConsumeZeroOrMore<Whitespace>(); + + } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) { + // Starting a comment? + if (TryConsume('/')) { + ConsumeLineComment(); + } else if (TryConsume('*')) { + ConsumeBlockComment(); + } else { + // Oops, it was just a slash. Return it. + current_.type = TYPE_SYMBOL; + current_.text = "/"; + current_.line = line_; + current_.column = column_ - 1; return true; - case NO_COMMENT: - break; - } + } - // Check for EOF before continuing. - if (read_error_) break; + } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) { + ConsumeLineComment(); - if (LookingAt<Unprintable>() || current_char_ == '\0') { + } else if (LookingAt<Unprintable>() || current_char_ == '\0') { AddError("Invalid control characters encountered in text."); NextChar(); // Skip more unprintable characters, too. But, remember that '\0' is @@ -608,9 +517,7 @@ bool Tokenizer::Next() { if (TryConsumeOne<Digit>()) { // It's a floating-point number. - if (previous_.type == TYPE_IDENTIFIER && - current_.line == previous_.line && - current_.column == previous_.end_column) { + if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) { // We don't accept syntax like "blah.123". error_collector_->AddError(line_, column_ - 2, "Need space between identifier and decimal point."); @@ -628,12 +535,6 @@ bool Tokenizer::Next() { ConsumeString('\''); current_.type = TYPE_STRING; } else { - // Check if the high order bit is set. - if (current_char_ & 0x80) { - error_collector_->AddError(line_, column_, - StringPrintf("Interpreting non ascii codepoint %d.", - static_cast<unsigned char>(current_char_))); - } NextChar(); current_.type = TYPE_SYMBOL; } @@ -641,6 +542,8 @@ bool Tokenizer::Next() { EndToken(); return true; } + + skipped_stuff = true; } // EOF @@ -648,199 +551,9 @@ bool Tokenizer::Next() { current_.text.clear(); current_.line = line_; current_.column = column_; - current_.end_column = column_; return false; } -namespace { - -// Helper class for collecting comments and putting them in the right places. -// -// This basically just buffers the most recent comment until it can be decided -// exactly where that comment should be placed. When Flush() is called, the -// current comment goes into either prev_trailing_comments or detached_comments. -// When the CommentCollector is destroyed, the last buffered comment goes into -// next_leading_comments. -class CommentCollector { - public: - CommentCollector(string* prev_trailing_comments, - vector<string>* detached_comments, - string* next_leading_comments) - : prev_trailing_comments_(prev_trailing_comments), - detached_comments_(detached_comments), - next_leading_comments_(next_leading_comments), - has_comment_(false), - is_line_comment_(false), - can_attach_to_prev_(true) { - if (prev_trailing_comments != NULL) prev_trailing_comments->clear(); - if (detached_comments != NULL) detached_comments->clear(); - if (next_leading_comments != NULL) next_leading_comments->clear(); - } - - ~CommentCollector() { - // Whatever is in the buffer is a leading comment. - if (next_leading_comments_ != NULL && has_comment_) { - comment_buffer_.swap(*next_leading_comments_); - } - } - - // About to read a line comment. Get the comment buffer pointer in order to - // read into it. - string* GetBufferForLineComment() { - // We want to combine with previous line comments, but not block comments. - if (has_comment_ && !is_line_comment_) { - Flush(); - } - has_comment_ = true; - is_line_comment_ = true; - return &comment_buffer_; - } - - // About to read a block comment. Get the comment buffer pointer in order to - // read into it. - string* GetBufferForBlockComment() { - if (has_comment_) { - Flush(); - } - has_comment_ = true; - is_line_comment_ = false; - return &comment_buffer_; - } - - void ClearBuffer() { - comment_buffer_.clear(); - has_comment_ = false; - } - - // Called once we know that the comment buffer is complete and is *not* - // connected to the next token. - void Flush() { - if (has_comment_) { - if (can_attach_to_prev_) { - if (prev_trailing_comments_ != NULL) { - prev_trailing_comments_->append(comment_buffer_); - } - can_attach_to_prev_ = false; - } else { - if (detached_comments_ != NULL) { - detached_comments_->push_back(comment_buffer_); - } - } - ClearBuffer(); - } - } - - void DetachFromPrev() { - can_attach_to_prev_ = false; - } - - private: - string* prev_trailing_comments_; - vector<string>* detached_comments_; - string* next_leading_comments_; - - string comment_buffer_; - - // True if any comments were read into comment_buffer_. This can be true even - // if comment_buffer_ is empty, namely if the comment was "/**/". - bool has_comment_; - - // Is the comment in the comment buffer a line comment? - bool is_line_comment_; - - // Is it still possible that we could be reading a comment attached to the - // previous token? - bool can_attach_to_prev_; -}; - -} // namespace - -bool Tokenizer::NextWithComments(string* prev_trailing_comments, - vector<string>* detached_comments, - string* next_leading_comments) { - CommentCollector collector(prev_trailing_comments, detached_comments, - next_leading_comments); - - if (current_.type == TYPE_START) { - collector.DetachFromPrev(); - } else { - // A comment appearing on the same line must be attached to the previous - // declaration. - ConsumeZeroOrMore<WhitespaceNoNewline>(); - switch (TryConsumeCommentStart()) { - case LINE_COMMENT: - ConsumeLineComment(collector.GetBufferForLineComment()); - - // Don't allow comments on subsequent lines to be attached to a trailing - // comment. - collector.Flush(); - break; - case BLOCK_COMMENT: - ConsumeBlockComment(collector.GetBufferForBlockComment()); - - ConsumeZeroOrMore<WhitespaceNoNewline>(); - if (!TryConsume('\n')) { - // Oops, the next token is on the same line. If we recorded a comment - // we really have no idea which token it should be attached to. - collector.ClearBuffer(); - return Next(); - } - - // Don't allow comments on subsequent lines to be attached to a trailing - // comment. - collector.Flush(); - break; - case SLASH_NOT_COMMENT: - return true; - case NO_COMMENT: - if (!TryConsume('\n')) { - // The next token is on the same line. There are no comments. - return Next(); - } - break; - } - } - - // OK, we are now on the line *after* the previous token. - while (true) { - ConsumeZeroOrMore<WhitespaceNoNewline>(); - - switch (TryConsumeCommentStart()) { - case LINE_COMMENT: - ConsumeLineComment(collector.GetBufferForLineComment()); - break; - case BLOCK_COMMENT: - ConsumeBlockComment(collector.GetBufferForBlockComment()); - - // Consume the rest of the line so that we don't interpret it as a - // blank line the next time around the loop. - ConsumeZeroOrMore<WhitespaceNoNewline>(); - TryConsume('\n'); - break; - case SLASH_NOT_COMMENT: - return true; - case NO_COMMENT: - if (TryConsume('\n')) { - // Completely blank line. - collector.Flush(); - collector.DetachFromPrev(); - } else { - bool result = Next(); - if (!result || - current_.text == "}" || - current_.text == "]" || - current_.text == ")") { - // It looks like we're at the end of a scope. In this case it - // makes no sense to attach a comment to the following token. - collector.Flush(); - } - return result; - } - break; - } - } -} - // ------------------------------------------------------------------- // Token-parsing helpers. Remember that these don't need to report // errors since any errors should already have been reported while @@ -910,138 +623,17 @@ double Tokenizer::ParseFloat(const string& text) { return result; } -// Helper to append a Unicode code point to a string as UTF8, without bringing -// in any external dependencies. -static void AppendUTF8(uint32 code_point, string* output) { - uint32 tmp = 0; - int len = 0; - if (code_point <= 0x7f) { - tmp = code_point; - len = 1; - } else if (code_point <= 0x07ff) { - tmp = 0x0000c080 | - ((code_point & 0x07c0) << 2) | - (code_point & 0x003f); - len = 2; - } else if (code_point <= 0xffff) { - tmp = 0x00e08080 | - ((code_point & 0xf000) << 4) | - ((code_point & 0x0fc0) << 2) | - (code_point & 0x003f); - len = 3; - } else if (code_point <= 0x1fffff) { - tmp = 0xf0808080 | - ((code_point & 0x1c0000) << 6) | - ((code_point & 0x03f000) << 4) | - ((code_point & 0x000fc0) << 2) | - (code_point & 0x003f); - len = 4; - } else { - // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is - // normally only defined up to there as well. - StringAppendF(output, "\\U%08x", code_point); - return; - } - tmp = ghtonl(tmp); - output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len); -} - -// Try to read <len> hex digits from ptr, and stuff the numeric result into -// *result. Returns true if that many digits were successfully consumed. -static bool ReadHexDigits(const char* ptr, int len, uint32* result) { - *result = 0; - if (len == 0) return false; - for (const char* end = ptr + len; ptr < end; ++ptr) { - if (*ptr == '\0') return false; - *result = (*result << 4) + DigitValue(*ptr); - } - return true; -} - -// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range -// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail -// surrogate. These numbers are in a reserved range of Unicode code points, so -// if we encounter such a pair we know how to parse it and convert it into a -// single code point. -static const uint32 kMinHeadSurrogate = 0xd800; -static const uint32 kMaxHeadSurrogate = 0xdc00; -static const uint32 kMinTrailSurrogate = 0xdc00; -static const uint32 kMaxTrailSurrogate = 0xe000; - -static inline bool IsHeadSurrogate(uint32 code_point) { - return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate); -} - -static inline bool IsTrailSurrogate(uint32 code_point) { - return (code_point >= kMinTrailSurrogate) && - (code_point < kMaxTrailSurrogate); -} - -// Combine a head and trail surrogate into a single Unicode code point. -static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) { - GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate)); - GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate)); - return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) | - (trail_surrogate - kMinTrailSurrogate)); -} - -// Convert the escape sequence parameter to a number of expected hex digits. -static inline int UnicodeLength(char key) { - if (key == 'u') return 4; - if (key == 'U') return 8; - return 0; -} - -// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt -// to parse that sequence. On success, returns a pointer to the first char -// beyond that sequence, and fills in *code_point. On failure, returns ptr -// itself. -static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) { - const char* p = ptr; - // Fetch the code point. - const int len = UnicodeLength(*p++); - if (!ReadHexDigits(p, len, code_point)) - return ptr; - p += len; - - // Check if the code point we read is a "head surrogate." If so, then we - // expect it to be immediately followed by another code point which is a valid - // "trail surrogate," and together they form a UTF-16 pair which decodes into - // a single Unicode point. Trail surrogates may only use \u, not \U. - if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') { - uint32 trail_surrogate; - if (ReadHexDigits(p + 2, 4, &trail_surrogate) && - IsTrailSurrogate(trail_surrogate)) { - *code_point = AssembleUTF16(*code_point, trail_surrogate); - p += 6; - } - // If this failed, then we just emit the head surrogate as a code point. - // It's bogus, but so is the string. - } - - return p; -} - -// The text string must begin and end with single or double quote -// characters. void Tokenizer::ParseStringAppend(const string& text, string* output) { - // Reminder: text[0] is always a quote character. (If text is - // empty, it's invalid, so we'll just return). - const size_t text_size = text.size(); - if (text_size == 0) { + // Reminder: text[0] is always the quote character. (If text is + // empty, it's invalid, so we'll just return.) + if (text.empty()) { GOOGLE_LOG(DFATAL) << " Tokenizer::ParseStringAppend() passed text that could not" " have been tokenized as a string: " << CEscape(text); return; } - // Reserve room for new string. The branch is necessary because if - // there is already space available the reserve() call might - // downsize the output. - const size_t new_len = text_size + output->size(); - if (new_len > output->capacity()) { - output->reserve(new_len); - } + output->reserve(output->size() + text.size()); // Loop through the string copying characters to "output" and // interpreting escape sequences. Note that any invalid escape @@ -1079,47 +671,19 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) { } output->push_back(static_cast<char>(code)); - } else if (*ptr == 'u' || *ptr == 'U') { - uint32 unicode; - const char* end = FetchUnicodePoint(ptr, &unicode); - if (end == ptr) { - // Failure: Just dump out what we saw, don't try to parse it. - output->push_back(*ptr); - } else { - AppendUTF8(unicode, output); - ptr = end - 1; // Because we're about to ++ptr. - } } else { // Some other escape code. output->push_back(TranslateEscape(*ptr)); } - } else if (*ptr == text[0] && ptr[1] == '\0') { - // Ignore final quote matching the starting quote. + } else if (*ptr == text[0]) { + // Ignore quote matching the starting quote. } else { output->push_back(*ptr); } } -} -template<typename CharacterClass> -static bool AllInClass(const string& s) { - for (int i = 0; i < s.size(); ++i) { - if (!CharacterClass::InClass(s[i])) - return false; - } - return true; -} - -bool Tokenizer::IsIdentifier(const string& text) { - // Mirrors IDENTIFIER definition in Tokenizer::Next() above. - if (text.size() == 0) - return false; - if (!Letter::InClass(text.at(0))) - return false; - if (!AllInClass<Alphanumeric>(text.substr(1))) - return false; - return true; + return; } } // namespace io diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h index 8c6220a..d115161 100644 --- a/src/google/protobuf/io/tokenizer.h +++ b/src/google/protobuf/io/tokenizer.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -38,7 +38,6 @@ #define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ #include <string> -#include <vector> #include <google/protobuf/stubs/common.h> namespace google { @@ -67,8 +66,7 @@ class LIBPROTOBUF_EXPORT ErrorCollector { // Indicates that there was a warning in the input at the given line and // column numbers. The numbers are zero-based, so you may want to add // 1 to each before printing them. - virtual void AddWarning(int /* line */, int /* column */, - const string& /* message */) { } + virtual void AddWarning(int line, int column, const string& message) { } private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); @@ -124,68 +122,16 @@ class LIBPROTOBUF_EXPORT Tokenizer { // the token within the input stream. They are zero-based. int line; int column; - int end_column; }; // Get the current token. This is updated when Next() is called. Before // the first call to Next(), current() has type TYPE_START and no contents. const Token& current(); - // Return the previous token -- i.e. what current() returned before the - // previous call to Next(). - const Token& previous(); - // Advance to the next token. Returns false if the end of the input is // reached. bool Next(); - // Like Next(), but also collects comments which appear between the previous - // and next tokens. - // - // Comments which appear to be attached to the previous token are stored - // in *prev_tailing_comments. Comments which appear to be attached to the - // next token are stored in *next_leading_comments. Comments appearing in - // between which do not appear to be attached to either will be added to - // detached_comments. Any of these parameters can be NULL to simply discard - // the comments. - // - // A series of line comments appearing on consecutive lines, with no other - // tokens appearing on those lines, will be treated as a single comment. - // - // Only the comment content is returned; comment markers (e.g. //) are - // stripped out. For block comments, leading whitespace and an asterisk will - // be stripped from the beginning of each line other than the first. Newlines - // are included in the output. - // - // Examples: - // - // optional int32 foo = 1; // Comment attached to foo. - // // Comment attached to bar. - // optional int32 bar = 2; - // - // optional string baz = 3; - // // Comment attached to baz. - // // Another line attached to baz. - // - // // Comment attached to qux. - // // - // // Another line attached to qux. - // optional double qux = 4; - // - // // Detached comment. This is not attached to qux or corge - // // because there are blank lines separating it from both. - // - // optional string corge = 5; - // /* Block comment attached - // * to corge. Leading asterisks - // * will be removed. */ - // /* Block comment attached to - // * grault. */ - // optional int32 grault = 6; - bool NextWithComments(string* prev_trailing_comments, - vector<string>* detached_comments, - string* next_leading_comments); - // Parse helpers --------------------------------------------------- // Parses a TYPE_FLOAT token. This never fails, so long as the text actually @@ -229,27 +175,11 @@ class LIBPROTOBUF_EXPORT Tokenizer { // Sets the comment style. void set_comment_style(CommentStyle style) { comment_style_ = style; } - // Whether to require whitespace between a number and a field name. - // Default is true. Do not use this; for Google-internal cleanup only. - void set_require_space_after_number(bool require) { - require_space_after_number_ = require; - } - - // Whether to allow string literals to span multiple lines. Default is false. - // Do not use this; for Google-internal cleanup only. - void set_allow_multiline_strings(bool allow) { - allow_multiline_strings_ = allow; - } - - // External helper: validate an identifier. - static bool IsIdentifier(const string& text); - // ----------------------------------------------------------------- private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); Token current_; // Returned by current(). - Token previous_; // Returned by previous(). ZeroCopyInputStream* input_; ErrorCollector* error_collector_; @@ -264,18 +194,15 @@ class LIBPROTOBUF_EXPORT Tokenizer { int line_; int column_; - // String to which text should be appended as we advance through it. - // Call RecordTo(&str) to start recording and StopRecording() to stop. - // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the - // position within the current buffer where recording started. - string* record_target_; - int record_start_; + // Position in buffer_ where StartToken() was called. If the token + // started in the previous buffer, this is zero, and current_.text already + // contains the part of the token from the previous buffer. If not + // currently parsing a token, this is -1. + int token_start_; // Options. bool allow_f_after_float_; CommentStyle comment_style_; - bool require_space_after_number_; - bool allow_multiline_strings_; // Since we count columns we need to interpret tabs somehow. We'll take // the standard 8-character definition for lack of any way to do better. @@ -290,9 +217,6 @@ class LIBPROTOBUF_EXPORT Tokenizer { // Read a new buffer from the input. void Refresh(); - inline void RecordTo(string* target); - inline void StopRecording(); - // Called when the current character is the first character of a new // token (not including whitespace or comments). inline void StartToken(); @@ -325,28 +249,9 @@ class LIBPROTOBUF_EXPORT Tokenizer { TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); // Consume the rest of a line. - void ConsumeLineComment(string* content); + void ConsumeLineComment(); // Consume until "*/". - void ConsumeBlockComment(string* content); - - enum NextCommentStatus { - // Started a line comment. - LINE_COMMENT, - - // Started a block comment. - BLOCK_COMMENT, - - // Consumed a slash, then realized it wasn't a comment. current_ has - // been filled in with a slash token. The caller should return it. - SLASH_NOT_COMMENT, - - // We do not appear to be starting a comment here. - NO_COMMENT - }; - - // If we're at the start of a new comment, consume it and return what kind - // of comment it is. - NextCommentStatus TryConsumeCommentStart(); + void ConsumeBlockComment(); // ----------------------------------------------------------------- // These helper methods make the parsing code more readable. The @@ -386,10 +291,6 @@ inline const Tokenizer::Token& Tokenizer::current() { return current_; } -inline const Tokenizer::Token& Tokenizer::previous() { - return previous_; -} - inline void Tokenizer::ParseString(const string& text, string* output) { output->clear(); ParseStringAppend(text, output); diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index de096fb..358ec56 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -32,10 +32,9 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. -#include <limits.h> -#include <math.h> - #include <vector> +#include <math.h> +#include <limits.h> #include <google/protobuf/io/tokenizer.h> #include <google/protobuf/io/zero_copy_stream_impl.h> @@ -258,7 +257,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) { EXPECT_EQ("", tokenizer.current().text); EXPECT_EQ(0, tokenizer.current().line); EXPECT_EQ(0, tokenizer.current().column); - EXPECT_EQ(0, tokenizer.current().end_column); // Parse the token. ASSERT_TRUE(tokenizer.Next()); @@ -270,8 +268,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) { // Check that it is located at the beginning of the input EXPECT_EQ(0, tokenizer.current().line); EXPECT_EQ(0, tokenizer.current().column); - EXPECT_EQ(kSimpleTokenCases_case.input.size(), - tokenizer.current().end_column); // There should be no more input. EXPECT_FALSE(tokenizer.Next()); @@ -281,8 +277,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) { EXPECT_EQ("", tokenizer.current().text); EXPECT_EQ(0, tokenizer.current().line); EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column); - EXPECT_EQ(kSimpleTokenCases_case.input.size(), - tokenizer.current().end_column); // There should be no errors. EXPECT_TRUE(error_collector.text_.empty()); @@ -345,77 +339,76 @@ MultiTokenCase kMultiTokenCases[] = { // Test all token types at the same time. { "foo 1 1.2 + 'bar'", { - { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0, 3 }, - { Tokenizer::TYPE_INTEGER , "1" , 0, 4, 5 }, - { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6, 9 }, - { Tokenizer::TYPE_SYMBOL , "+" , 0, 10, 11 }, - { Tokenizer::TYPE_STRING , "'bar'", 0, 12, 17 }, - { Tokenizer::TYPE_END , "" , 0, 17, 17 }, + { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0 }, + { Tokenizer::TYPE_INTEGER , "1" , 0, 4 }, + { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6 }, + { Tokenizer::TYPE_SYMBOL , "+" , 0, 10 }, + { Tokenizer::TYPE_STRING , "'bar'", 0, 12 }, + { Tokenizer::TYPE_END , "" , 0, 17 }, }}, // Test that consecutive symbols are parsed as separate tokens. { "!@+%", { - { Tokenizer::TYPE_SYMBOL , "!" , 0, 0, 1 }, - { Tokenizer::TYPE_SYMBOL , "@" , 0, 1, 2 }, - { Tokenizer::TYPE_SYMBOL , "+" , 0, 2, 3 }, - { Tokenizer::TYPE_SYMBOL , "%" , 0, 3, 4 }, - { Tokenizer::TYPE_END , "" , 0, 4, 4 }, + { Tokenizer::TYPE_SYMBOL , "!" , 0, 0 }, + { Tokenizer::TYPE_SYMBOL , "@" , 0, 1 }, + { Tokenizer::TYPE_SYMBOL , "+" , 0, 2 }, + { Tokenizer::TYPE_SYMBOL , "%" , 0, 3 }, + { Tokenizer::TYPE_END , "" , 0, 4 }, }}, // Test that newlines affect line numbers correctly. { "foo bar\nrab oof", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4, 7 }, - { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4, 7 }, - { Tokenizer::TYPE_END , "" , 1, 7, 7 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4 }, + { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4 }, + { Tokenizer::TYPE_END , "" , 1, 7 }, }}, // Test that tabs affect column numbers correctly. { "foo\tbar \tbaz", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8, 11 }, - { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19 }, - { Tokenizer::TYPE_END , "" , 0, 19, 19 }, - }}, - - // Test that tabs in string literals affect column numbers correctly. - { "\"foo\tbar\" baz", { - { Tokenizer::TYPE_STRING , "\"foo\tbar\"", 0, 0, 12 }, - { Tokenizer::TYPE_IDENTIFIER, "baz" , 0, 13, 16 }, - { Tokenizer::TYPE_END , "" , 0, 16, 16 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8 }, + { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 }, + { Tokenizer::TYPE_END , "" , 0, 19 }, }}, // Test that line comments are ignored. { "foo // This is a comment\n" "bar // This is another comment", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0, 3 }, - { Tokenizer::TYPE_END , "" , 1, 30, 30 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0 }, + { Tokenizer::TYPE_END , "" , 1, 30 }, }}, // Test that block comments are ignored. { "foo /* This is a block comment */ bar", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37 }, - { Tokenizer::TYPE_END , "" , 0, 37, 37 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 }, + { Tokenizer::TYPE_END , "" , 0, 37 }, }}, // Test that sh-style comments are not ignored by default. { "foo # bar\n" "baz", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_SYMBOL , "#" , 0, 4, 5 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9 }, - { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3 }, - { Tokenizer::TYPE_END , "" , 1, 3, 3 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_SYMBOL , "#" , 0, 4 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6 }, + { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 }, + { Tokenizer::TYPE_END , "" , 1, 3 }, + }}, + + // Bytes with the high-order bit set should not be seen as control characters. + { "\300", { + { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 }, + { Tokenizer::TYPE_END , "" , 0, 1 }, }}, // Test all whitespace chars { "foo\n\t\r\v\fbar", { - { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 }, - { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14 }, - { Tokenizer::TYPE_END , "" , 1, 14, 14 }, + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11 }, + { Tokenizer::TYPE_END , "" , 1, 14 }, }}, }; @@ -432,7 +425,6 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { EXPECT_EQ("", tokenizer.current().text); EXPECT_EQ(0, tokenizer.current().line); EXPECT_EQ(0, tokenizer.current().column); - EXPECT_EQ(0, tokenizer.current().end_column); // Loop through all expected tokens. int i = 0; @@ -442,8 +434,6 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text); - Tokenizer::Token previous = tokenizer.current(); - // Next() should only return false when it hits the end token. if (token.type != Tokenizer::TYPE_END) { ASSERT_TRUE(tokenizer.Next()); @@ -451,19 +441,11 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { ASSERT_FALSE(tokenizer.Next()); } - // Check that the previous token is set correctly. - EXPECT_EQ(previous.type, tokenizer.previous().type); - EXPECT_EQ(previous.text, tokenizer.previous().text); - EXPECT_EQ(previous.line, tokenizer.previous().line); - EXPECT_EQ(previous.column, tokenizer.previous().column); - EXPECT_EQ(previous.end_column, tokenizer.previous().end_column); - // Check that the token matches the expected one. EXPECT_EQ(token.type, tokenizer.current().type); EXPECT_EQ(token.text, tokenizer.current().text); EXPECT_EQ(token.line, tokenizer.current().line); EXPECT_EQ(token.column, tokenizer.current().column); - EXPECT_EQ(token.end_column, tokenizer.current().end_column); } while (token.type != Tokenizer::TYPE_END); @@ -509,217 +491,6 @@ TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) { // ------------------------------------------------------------------- -// In each case, the input is expected to have two tokens named "prev" and -// "next" with comments in between. -struct DocCommentCase { - string input; - - const char* prev_trailing_comments; - const char* detached_comments[10]; - const char* next_leading_comments; -}; - -inline ostream& operator<<(ostream& out, - const DocCommentCase& test_case) { - return out << CEscape(test_case.input); -} - -DocCommentCase kDocCommentCases[] = { - { - "prev next", - - "", - {}, - "" - }, - - { - "prev /* ignored */ next", - - "", - {}, - "" - }, - - { - "prev // trailing comment\n" - "next", - - " trailing comment\n", - {}, - "" - }, - - { - "prev\n" - "// leading comment\n" - "// line 2\n" - "next", - - "", - {}, - " leading comment\n" - " line 2\n" - }, - - { - "prev\n" - "// trailing comment\n" - "// line 2\n" - "\n" - "next", - - " trailing comment\n" - " line 2\n", - {}, - "" - }, - - { - "prev // trailing comment\n" - "// leading comment\n" - "// line 2\n" - "next", - - " trailing comment\n", - {}, - " leading comment\n" - " line 2\n" - }, - - { - "prev /* trailing block comment */\n" - "/* leading block comment\n" - " * line 2\n" - " * line 3 */" - "next", - - " trailing block comment ", - {}, - " leading block comment\n" - " line 2\n" - " line 3 " - }, - - { - "prev\n" - "/* trailing block comment\n" - " * line 2\n" - " * line 3\n" - " */\n" - "/* leading block comment\n" - " * line 2\n" - " * line 3 */" - "next", - - " trailing block comment\n" - " line 2\n" - " line 3\n", - {}, - " leading block comment\n" - " line 2\n" - " line 3 " - }, - - { - "prev\n" - "// trailing comment\n" - "\n" - "// detached comment\n" - "// line 2\n" - "\n" - "// second detached comment\n" - "/* third detached comment\n" - " * line 2 */\n" - "// leading comment\n" - "next", - - " trailing comment\n", - { - " detached comment\n" - " line 2\n", - " second detached comment\n", - " third detached comment\n" - " line 2 " - }, - " leading comment\n" - }, - - { - "prev /**/\n" - "\n" - "// detached comment\n" - "\n" - "// leading comment\n" - "next", - - "", - { - " detached comment\n" - }, - " leading comment\n" - }, - - { - "prev /**/\n" - "// leading comment\n" - "next", - - "", - {}, - " leading comment\n" - }, - }; - -TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) { - // Set up the tokenizer. - TestInputStream input(kDocCommentCases_case.input.data(), - kDocCommentCases_case.input.size(), - kBlockSizes_case); - TestErrorCollector error_collector; - Tokenizer tokenizer(&input, &error_collector); - - // Set up a second tokenizer where we'll pass all NULLs to NextWithComments(). - TestInputStream input2(kDocCommentCases_case.input.data(), - kDocCommentCases_case.input.size(), - kBlockSizes_case); - Tokenizer tokenizer2(&input2, &error_collector); - - tokenizer.Next(); - tokenizer2.Next(); - - EXPECT_EQ("prev", tokenizer.current().text); - EXPECT_EQ("prev", tokenizer2.current().text); - - string prev_trailing_comments; - vector<string> detached_comments; - string next_leading_comments; - tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments, - &next_leading_comments); - tokenizer2.NextWithComments(NULL, NULL, NULL); - EXPECT_EQ("next", tokenizer.current().text); - EXPECT_EQ("next", tokenizer2.current().text); - - EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments, - prev_trailing_comments); - - for (int i = 0; i < detached_comments.size(); i++) { - ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases)); - ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL); - EXPECT_EQ(kDocCommentCases_case.detached_comments[i], - detached_comments[i]); - } - - // Verify that we matched all the detached comments. - EXPECT_EQ(NULL, - kDocCommentCases_case.detached_comments[detached_comments.size()]); - - EXPECT_EQ(kDocCommentCases_case.next_leading_comments, - next_leading_comments); -} - -// ------------------------------------------------------------------- - // Test parse helpers. It's not really worth setting up a full data-driven // test here. TEST_F(TokenizerTest, ParseInteger) { @@ -735,7 +506,7 @@ TEST_F(TokenizerTest, ParseInteger) { EXPECT_EQ(0, ParseInteger("0x")); uint64 i; -#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet // Test invalid integers that will never be tokenized as integers. EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i), "passed text that could not have been tokenized as an integer"); @@ -747,7 +518,7 @@ TEST_F(TokenizerTest, ParseInteger) { "passed text that could not have been tokenized as an integer"); EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i), "passed text that could not have been tokenized as an integer"); -#endif // PROTOBUF_HAS_DEATH_TEST +#endif // GTEST_HAS_DEATH_TEST // Test overflows. EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i)); @@ -790,7 +561,7 @@ TEST_F(TokenizerTest, ParseFloat) { EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999")); EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999")); -#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet // Test invalid integers that will never be tokenized as integers. EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"), "passed text that could not have been tokenized as a float"); @@ -798,7 +569,7 @@ TEST_F(TokenizerTest, ParseFloat) { "passed text that could not have been tokenized as a float"); EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"), "passed text that could not have been tokenized as a float"); -#endif // PROTOBUF_HAS_DEATH_TEST +#endif // GTEST_HAS_DEATH_TEST } TEST_F(TokenizerTest, ParseString) { @@ -820,27 +591,11 @@ TEST_F(TokenizerTest, ParseString) { Tokenizer::ParseString("'\\", &output); EXPECT_EQ("\\", output); - // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode - // characters. - Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output); - EXPECT_EQ("$¢€𤭢XX", output); - // Same thing encoded using UTF16. - Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output); - EXPECT_EQ("$¢€𤭢XX", output); - // Here's some broken UTF16; there's a head surrogate with no tail surrogate. - // We just output this as if it were UTF8; it's not a defined code point, but - // it has a defined encoding. - Tokenizer::ParseString("'\\ud852XX'", &output); - EXPECT_EQ("\xed\xa1\x92XX", output); - // Malformed escape: Demons may fly out of the nose. - Tokenizer::ParseString("\\u0", &output); - EXPECT_EQ("u0", output); - // Test invalid strings that will never be tokenized as strings. -#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output), "passed text that could not have been tokenized as a string"); -#endif // PROTOBUF_HAS_DEATH_TEST +#endif // GTEST_HAS_DEATH_TEST } TEST_F(TokenizerTest, ParseStringAppend) { @@ -877,15 +632,9 @@ ErrorCase kErrorCases[] = { { "'\\x' foo", true, "0:3: Expected hex digits for escape sequence.\n" }, { "'foo", false, - "0:4: Unexpected end of string.\n" }, + "0:4: String literals cannot cross line boundaries.\n" }, { "'bar\nfoo", true, "0:4: String literals cannot cross line boundaries.\n" }, - { "'\\u01' foo", true, - "0:5: Expected four hex digits for \\u escape sequence.\n" }, - { "'\\u01' foo", true, - "0:5: Expected four hex digits for \\u escape sequence.\n" }, - { "'\\uXYZ' foo", true, - "0:3: Expected four hex digits for \\u escape sequence.\n" }, // Integer errors. { "123foo", true, @@ -945,10 +694,6 @@ ErrorCase kErrorCases[] = { "0:0: Invalid control characters encountered in text.\n" }, { string("\0\0foo", 5), true, "0:0: Invalid control characters encountered in text.\n" }, - - // Check error from high order bits set - { "\300foo", true, - "0:0: Interpreting non ascii codepoint 192.\n" }, }; TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) { @@ -966,7 +711,7 @@ TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) { } // Check that the errors match what was expected. - EXPECT_EQ(kErrorCases_case.errors, error_collector.text_); + EXPECT_EQ(error_collector.text_, kErrorCases_case.errors); // If the error was recoverable, make sure we saw "foo" after it. if (kErrorCases_case.recoverable) { @@ -992,7 +737,6 @@ TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) { EXPECT_EQ(strlen("foo"), input.ByteCount()); } - } // namespace } // namespace io } // namespace protobuf diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc index f77c768..dad6ff1 100644 --- a/src/google/protobuf/io/zero_copy_stream.cc +++ b/src/google/protobuf/io/zero_copy_stream.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -34,7 +34,6 @@ #include <google/protobuf/io/zero_copy_stream.h> -#include <google/protobuf/stubs/common.h> namespace google { namespace protobuf { @@ -44,14 +43,6 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {} ZeroCopyOutputStream::~ZeroCopyOutputStream() {} -bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */, - int /* size */) { - GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. " - "Reaching here usually means a ZeroCopyOutputStream " - "implementation bug."; - return false; -} - } // namespace io } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h index 52650fc..db5326f 100644 --- a/src/google/protobuf/io/zero_copy_stream.h +++ b/src/google/protobuf/io/zero_copy_stream.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -226,16 +226,6 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { // Returns the total number of bytes written since this object was created. virtual int64 ByteCount() const = 0; - // Write a given chunk of data to the output. Some output streams may - // implement this in a way that avoids copying. Check AllowsAliasing() before - // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is - // called on a stream that does not allow aliasing. - // - // NOTE: It is caller's responsibility to ensure that the chunk of memory - // remains live until all of the data has been consumed from the stream. - virtual bool WriteAliasedRaw(const void* data, int size); - virtual bool AllowsAliasing() const { return false; } - private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream); diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc index f7901b2..1384c74 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.cc +++ b/src/google/protobuf/io/zero_copy_stream_impl.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -46,8 +46,7 @@ #include <google/protobuf/io/zero_copy_stream_impl.h> #include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stl_util.h> - +#include <google/protobuf/stubs/stl_util-inl.h> namespace google { namespace protobuf { @@ -413,9 +412,7 @@ int64 ConcatenatingInputStream::ByteCount() const { LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input, int64 limit) - : input_(input), limit_(limit) { - prior_bytes_read_ = input_->ByteCount(); -} + : input_(input), limit_(limit) {} LimitingInputStream::~LimitingInputStream() { // If we overshot the limit, back up. @@ -459,9 +456,9 @@ bool LimitingInputStream::Skip(int count) { int64 LimitingInputStream::ByteCount() const { if (limit_ < 0) { - return input_->ByteCount() + limit_ - prior_bytes_read_; + return input_->ByteCount() + limit_; } else { - return input_->ByteCount() - prior_bytes_read_; + return input_->ByteCount(); } } diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h index 0746fa6..9fedb00 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl.h +++ b/src/google/protobuf/io/zero_copy_stream_impl.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -344,7 +344,6 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream { private: ZeroCopyInputStream* input_; int64 limit_; // Decreases as we go, becomes negative if we overshoot. - int64 prior_bytes_read_; // Bytes read on underlying stream at construction GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); }; diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc index 58aff0e..e801251 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc +++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -32,13 +32,9 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. -#include <google/protobuf/io/zero_copy_stream_impl_lite.h> - -#include <algorithm> -#include <limits> - +#include <google/protobuf/io/zero_copy_stream_impl.h> #include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stl_util.h> +#include <google/protobuf/stubs/stl_util-inl.h> namespace google { namespace protobuf { @@ -163,23 +159,15 @@ bool StringOutputStream::Next(void** data, int* size) { // without a memory allocation this way. STLStringResizeUninitialized(target_, target_->capacity()); } else { - // Size has reached capacity, try to double the size. - if (old_size > std::numeric_limits<int>::max() / 2) { - // Can not double the size otherwise it is going to cause integer - // overflow in the expression below: old_size * 2 "; - GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for " - << "StringOutputStream."; - return false; - } - // Double the size, also make sure that the new size is at least - // kMinimumSize. + // Size has reached capacity, so double the size. Also make sure + // that the new size is at least kMinimumSize. STLStringResizeUninitialized( target_, max(old_size * 2, kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. } - *data = mutable_string_data(target_) + old_size; + *data = string_as_array(target_) + old_size; *size = target_->size() - old_size; return true; } diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h index e18da72..153f543 100644 --- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h +++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -48,7 +48,6 @@ #include <iosfwd> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> -#include <google/protobuf/stubs/stl_util.h> namespace google { @@ -334,19 +333,6 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea // =================================================================== -// Return a pointer to mutable characters underlying the given string. The -// return value is valid until the next time the string is resized. We -// trust the caller to treat the return value as an array of length s->size(). -inline char* mutable_string_data(string* s) { -#ifdef LANG_CXX11 - // This should be simpler & faster than string_as_array() because the latter - // is guaranteed to return NULL when *s is empty, so it has to check for that. - return &(*s)[0]; -#else - return string_as_array(s); -#endif -} - } // namespace io } // namespace protobuf diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc index bf978cc..8229ee6 100644 --- a/src/google/protobuf/io/zero_copy_stream_unittest.cc +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -1,6 +1,6 @@ // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ +// http://code.google.com/p/protobuf/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -61,7 +61,6 @@ #include <sstream> #include <google/protobuf/io/zero_copy_stream_impl.h> -#include <google/protobuf/io/coded_stream.h> #if HAVE_ZLIB #include <google/protobuf/io/gzip_stream.h> @@ -286,57 +285,6 @@ TEST_F(IoTest, ArrayIo) { } } -TEST_F(IoTest, TwoSessionWrite) { - // Test that two concatenated write sessions read correctly - - static const char* strA = "0123456789"; - static const char* strB = "WhirledPeas"; - const int kBufferSize = 2*1024; - uint8* buffer = new uint8[kBufferSize]; - char* temp_buffer = new char[40]; - - for (int i = 0; i < kBlockSizeCount; i++) { - for (int j = 0; j < kBlockSizeCount; j++) { - ArrayOutputStream* output = - new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]); - CodedOutputStream* coded_output = new CodedOutputStream(output); - coded_output->WriteVarint32(strlen(strA)); - coded_output->WriteRaw(strA, strlen(strA)); - delete coded_output; // flush - int64 pos = output->ByteCount(); - delete output; - output = new ArrayOutputStream( - buffer + pos, kBufferSize - pos, kBlockSizes[i]); - coded_output = new CodedOutputStream(output); - coded_output->WriteVarint32(strlen(strB)); - coded_output->WriteRaw(strB, strlen(strB)); - delete coded_output; // flush - int64 size = pos + output->ByteCount(); - delete output; - - ArrayInputStream* input = - new ArrayInputStream(buffer, size, kBlockSizes[j]); - CodedInputStream* coded_input = new CodedInputStream(input); - uint32 insize; - EXPECT_TRUE(coded_input->ReadVarint32(&insize)); - EXPECT_EQ(strlen(strA), insize); - EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); - EXPECT_EQ(0, memcmp(temp_buffer, strA, insize)); - - EXPECT_TRUE(coded_input->ReadVarint32(&insize)); - EXPECT_EQ(strlen(strB), insize); - EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); - EXPECT_EQ(0, memcmp(temp_buffer, strB, insize)); - - delete coded_input; - delete input; - } - } - - delete [] temp_buffer; - delete [] buffer; -} - #if HAVE_ZLIB TEST_F(IoTest, GzipIo) { const int kBufferSize = 2*1024; @@ -348,49 +296,9 @@ TEST_F(IoTest, GzipIo) { int size; { ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); - GzipOutputStream::Options options; - options.format = GzipOutputStream::GZIP; - if (gzip_buffer_size != -1) { - options.buffer_size = gzip_buffer_size; - } - GzipOutputStream gzout(&output, options); - WriteStuff(&gzout); - gzout.Close(); - size = output.ByteCount(); - } - { - ArrayInputStream input(buffer, size, kBlockSizes[j]); - GzipInputStream gzin( - &input, GzipInputStream::GZIP, gzip_buffer_size); - ReadStuff(&gzin); - } - } - } - } - delete [] buffer; -} - -TEST_F(IoTest, GzipIoWithFlush) { - const int kBufferSize = 2*1024; - uint8* buffer = new uint8[kBufferSize]; - // We start with i = 4 as we want a block size > 6. With block size <= 6 - // Flush() fills up the entire 2K buffer with flush markers and the test - // fails. See documentation for Flush() for more detail. - for (int i = 4; i < kBlockSizeCount; i++) { - for (int j = 0; j < kBlockSizeCount; j++) { - for (int z = 0; z < kBlockSizeCount; z++) { - int gzip_buffer_size = kBlockSizes[z]; - int size; - { - ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); - GzipOutputStream::Options options; - options.format = GzipOutputStream::GZIP; - if (gzip_buffer_size != -1) { - options.buffer_size = gzip_buffer_size; - } - GzipOutputStream gzout(&output, options); + GzipOutputStream gzout( + &output, GzipOutputStream::GZIP, gzip_buffer_size); WriteStuff(&gzout); - EXPECT_TRUE(gzout.Flush()); gzout.Close(); size = output.ByteCount(); } @@ -406,64 +314,6 @@ TEST_F(IoTest, GzipIoWithFlush) { delete [] buffer; } -TEST_F(IoTest, GzipIoContiguousFlushes) { - const int kBufferSize = 2*1024; - uint8* buffer = new uint8[kBufferSize]; - - int block_size = kBlockSizes[4]; - int gzip_buffer_size = block_size; - int size; - - ArrayOutputStream output(buffer, kBufferSize, block_size); - GzipOutputStream::Options options; - options.format = GzipOutputStream::GZIP; - if (gzip_buffer_size != -1) { - options.buffer_size = gzip_buffer_size; - } - GzipOutputStream gzout(&output, options); - WriteStuff(&gzout); - EXPECT_TRUE(gzout.Flush()); - EXPECT_TRUE(gzout.Flush()); - gzout.Close(); - size = output.ByteCount(); - - ArrayInputStream input(buffer, size, block_size); - GzipInputStream gzin( - &input, GzipInputStream::GZIP, gzip_buffer_size); - ReadStuff(&gzin); - - delete [] buffer; -} - -TEST_F(IoTest, GzipIoReadAfterFlush) { - const int kBufferSize = 2*1024; - uint8* buffer = new uint8[kBufferSize]; - - int block_size = kBlockSizes[4]; - int gzip_buffer_size = block_size; - int size; - ArrayOutputStream output(buffer, kBufferSize, block_size); - GzipOutputStream::Options options; - options.format = GzipOutputStream::GZIP; - if (gzip_buffer_size != -1) { - options.buffer_size = gzip_buffer_size; - } - - GzipOutputStream gzout(&output, options); - WriteStuff(&gzout); - EXPECT_TRUE(gzout.Flush()); - size = output.ByteCount(); - - ArrayInputStream input(buffer, size, block_size); - GzipInputStream gzin( - &input, GzipInputStream::GZIP, gzip_buffer_size); - ReadStuff(&gzin); - - gzout.Close(); - - delete [] buffer; -} - TEST_F(IoTest, ZlibIo) { const int kBufferSize = 2*1024; uint8* buffer = new uint8[kBufferSize]; @@ -474,12 +324,8 @@ TEST_F(IoTest, ZlibIo) { int size; { ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); - GzipOutputStream::Options options; - options.format = GzipOutputStream::ZLIB; - if (gzip_buffer_size != -1) { - options.buffer_size = gzip_buffer_size; - } - GzipOutputStream gzout(&output, options); + GzipOutputStream gzout( + &output, GzipOutputStream::ZLIB, gzip_buffer_size); WriteStuff(&gzout); gzout.Close(); size = output.ByteCount(); @@ -502,9 +348,7 @@ TEST_F(IoTest, ZlibIoInputAutodetect) { int size; { ArrayOutputStream output(buffer, kBufferSize); - GzipOutputStream::Options options; - options.format = GzipOutputStream::ZLIB; - GzipOutputStream gzout(&output, options); + GzipOutputStream gzout(&output, GzipOutputStream::ZLIB); WriteStuff(&gzout); gzout.Close(); size = output.ByteCount(); @@ -516,9 +360,7 @@ TEST_F(IoTest, ZlibIoInputAutodetect) { } { ArrayOutputStream output(buffer, kBufferSize); - GzipOutputStream::Options options; - options.format = GzipOutputStream::GZIP; - GzipOutputStream gzout(&output, options); + GzipOutputStream gzout(&output, GzipOutputStream::GZIP); WriteStuff(&gzout); gzout.Close(); size = output.ByteCount(); @@ -560,10 +402,9 @@ TEST_F(IoTest, CompressionOptions) { // Some ad-hoc testing of compression options. string golden; - GOOGLE_CHECK_OK(File::GetContents( - TestSourceDir() + - "/google/protobuf/testdata/golden_message", - &golden, true)); + File::ReadFileToStringOrDie( + TestSourceDir() + "/google/protobuf/testdata/golden_message", + &golden); GzipOutputStream::Options options; string gzip_compressed = Compress(golden, options); @@ -591,71 +432,6 @@ TEST_F(IoTest, CompressionOptions) { EXPECT_TRUE(Uncompress(gzip_compressed) == golden); EXPECT_TRUE(Uncompress(zlib_compressed) == golden); } - -TEST_F(IoTest, TwoSessionWriteGzip) { - // Test that two concatenated gzip streams can be read correctly - - static const char* strA = "0123456789"; - static const char* strB = "QuickBrownFox"; - const int kBufferSize = 2*1024; - uint8* buffer = new uint8[kBufferSize]; - char* temp_buffer = new char[40]; - - for (int i = 0; i < kBlockSizeCount; i++) { - for (int j = 0; j < kBlockSizeCount; j++) { - ArrayOutputStream* output = - new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]); - GzipOutputStream* gzout = new GzipOutputStream(output); - CodedOutputStream* coded_output = new CodedOutputStream(gzout); - int32 outlen = strlen(strA) + 1; - coded_output->WriteVarint32(outlen); - coded_output->WriteRaw(strA, outlen); - delete coded_output; // flush - delete gzout; // flush - int64 pos = output->ByteCount(); - delete output; - output = new ArrayOutputStream( - buffer + pos, kBufferSize - pos, kBlockSizes[i]); - gzout = new GzipOutputStream(output); - coded_output = new CodedOutputStream(gzout); - outlen = strlen(strB) + 1; - coded_output->WriteVarint32(outlen); - coded_output->WriteRaw(strB, outlen); - delete coded_output; // flush - delete gzout; // flush - int64 size = pos + output->ByteCount(); - delete output; - - ArrayInputStream* input = - new ArrayInputStream(buffer, size, kBlockSizes[j]); - GzipInputStream* gzin = new GzipInputStream(input); - CodedInputStream* coded_input = new CodedInputStream(gzin); - uint32 insize; - EXPECT_TRUE(coded_input->ReadVarint32(&insize)); - EXPECT_EQ(strlen(strA) + 1, insize); - EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); - EXPECT_EQ(0, memcmp(temp_buffer, strA, insize)) - << "strA=" << strA << " in=" << temp_buffer; - - EXPECT_TRUE(coded_input->ReadVarint32(&insize)); - EXPECT_EQ(strlen(strB) + 1, insize); - EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); - EXPECT_EQ(0, memcmp(temp_buffer, strB, insize)) - << " out_block_size=" << kBlockSizes[i] - << " in_block_size=" << kBlockSizes[j] - << " pos=" << pos - << " size=" << size - << " strB=" << strB << " in=" << temp_buffer; - - delete coded_input; - delete gzin; - delete input; - } - } - - delete [] temp_buffer; - delete [] buffer; -} #endif // There is no string input, only string output. Also, it doesn't support @@ -924,26 +700,6 @@ TEST_F(IoTest, LimitingInputStream) { ReadStuff(&input); } -// Checks that ByteCount works correctly for LimitingInputStreams where the -// underlying stream has already been read. -TEST_F(IoTest, LimitingInputStreamByteCount) { - const int kHalfBufferSize = 128; - const int kBufferSize = kHalfBufferSize * 2; - uint8 buffer[kBufferSize]; - - // Set up input. Only allow half to be read at once. - ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize); - const void* data; - int size; - EXPECT_TRUE(array_input.Next(&data, &size)); - EXPECT_EQ(kHalfBufferSize, array_input.ByteCount()); - // kHalfBufferSize - 1 to test limiting logic as well. - LimitingInputStream input(&array_input, kHalfBufferSize - 1); - EXPECT_EQ(0, input.ByteCount()); - EXPECT_TRUE(input.Next(&data, &size)); - EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount()); -} - // Check that a zero-size array doesn't confuse the code. TEST(ZeroSizeArray, Input) { ArrayInputStream input(NULL, 0); |