summaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/io
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r--src/google/protobuf/io/coded_stream.cc164
-rw-r--r--src/google/protobuf/io/coded_stream.h218
-rw-r--r--src/google/protobuf/io/coded_stream_inl.h13
-rw-r--r--src/google/protobuf/io/coded_stream_unittest.cc283
-rw-r--r--src/google/protobuf/io/gzip_stream.cc62
-rw-r--r--src/google/protobuf/io/gzip_stream.h16
-rwxr-xr-xsrc/google/protobuf/io/gzip_stream_unittest.sh2
-rw-r--r--src/google/protobuf/io/package_info.h2
-rw-r--r--src/google/protobuf/io/printer.cc20
-rw-r--r--src/google/protobuf/io/printer.h8
-rw-r--r--src/google/protobuf/io/printer_unittest.cc32
-rw-r--r--src/google/protobuf/io/strtod.cc113
-rw-r--r--src/google/protobuf/io/strtod.h50
-rw-r--r--src/google/protobuf/io/tokenizer.cc546
-rw-r--r--src/google/protobuf/io/tokenizer.h117
-rw-r--r--src/google/protobuf/io/tokenizer_unittest.cc358
-rw-r--r--src/google/protobuf/io/zero_copy_stream.cc11
-rw-r--r--src/google/protobuf/io/zero_copy_stream.h12
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.cc13
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.h3
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl_lite.cc24
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl_lite.h16
-rw-r--r--src/google/protobuf/io/zero_copy_stream_unittest.cc264
23 files changed, 280 insertions, 2067 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index 5344975..6a91a13 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -43,7 +43,7 @@
#include <limits.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
+#include <google/protobuf/stubs/stl_util-inl.h>
namespace google {
@@ -56,36 +56,10 @@ static const int kMaxVarintBytes = 10;
static const int kMaxVarint32Bytes = 5;
-inline bool NextNonEmpty(ZeroCopyInputStream* input,
- const void** data, int* size) {
- bool success;
- do {
- success = input->Next(data, size);
- } while (success && *size == 0);
- return success;
-}
-
} // namespace
// CodedInputStream ==================================================
-CodedInputStream::~CodedInputStream() {
- if (input_ != NULL) {
- BackUpInputToCurrentPosition();
- }
-
- if (total_bytes_warning_threshold_ == -2) {
- GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_;
- }
-}
-
-// Static.
-int CodedInputStream::default_recursion_limit_ = 100;
-
-
-void CodedOutputStream::EnableAliasing(bool enabled) {
- aliasing_enabled_ = enabled && output_->AllowsAliasing();
-}
void CodedInputStream::BackUpInputToCurrentPosition() {
int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
@@ -115,7 +89,8 @@ inline void CodedInputStream::RecomputeBufferLimits() {
CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
// Current position relative to the beginning of the stream.
- int current_position = CurrentPosition();
+ int current_position = total_bytes_read_ -
+ (BufferSize() + buffer_size_after_limit_);
Limit old_limit = current_limit_;
@@ -149,9 +124,10 @@ void CodedInputStream::PopLimit(Limit limit) {
legitimate_message_end_ = false;
}
-int CodedInputStream::BytesUntilLimit() const {
+int CodedInputStream::BytesUntilLimit() {
if (current_limit_ == INT_MAX) return -1;
- int current_position = CurrentPosition();
+ int current_position = total_bytes_read_ -
+ (BufferSize() + buffer_size_after_limit_);
return current_limit_ - current_position;
}
@@ -160,22 +136,13 @@ void CodedInputStream::SetTotalBytesLimit(
int total_bytes_limit, int warning_threshold) {
// Make sure the limit isn't already past, since this could confuse other
// code.
- int current_position = CurrentPosition();
+ int current_position = total_bytes_read_ -
+ (BufferSize() + buffer_size_after_limit_);
total_bytes_limit_ = max(current_position, total_bytes_limit);
- if (warning_threshold >= 0) {
- total_bytes_warning_threshold_ = warning_threshold;
- } else {
- // warning_threshold is negative
- total_bytes_warning_threshold_ = -1;
- }
+ total_bytes_warning_threshold_ = warning_threshold;
RecomputeBufferLimits();
}
-int CodedInputStream::BytesUntilTotalBytesLimit() const {
- if (total_bytes_limit_ == INT_MAX) return -1;
- return total_bytes_limit_ - CurrentPosition();
-}
-
void CodedInputStream::PrintTotalBytesLimitError() {
GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
"big (more than " << total_bytes_limit_
@@ -256,14 +223,6 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
buffer->clear();
}
- int closest_limit = min(current_limit_, total_bytes_limit_);
- if (closest_limit != INT_MAX) {
- int bytes_to_limit = closest_limit - CurrentPosition();
- if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
- buffer->reserve(size);
- }
- }
-
int current_buffer_size;
while ((current_buffer_size = BufferSize()) < size) {
// Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
@@ -330,16 +289,11 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) {
uint32 b;
uint32 result;
- b = *(ptr++); result = b ; if (!(b & 0x80)) goto done;
- result -= 0x80;
- b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done;
- result -= 0x80 << 7;
- b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
- result -= 0x80 << 14;
- b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done;
- result -= 0x80 << 21;
- b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done;
- // "result -= 0x80 << 28" is irrevelant.
+ b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
// If the input is larger than 32 bits, we still need to read it all
// and discard the high-order bits.
@@ -369,8 +323,8 @@ bool CodedInputStream::ReadVarint32Slow(uint32* value) {
bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
if (BufferSize() >= kMaxVarintBytes ||
- // Optimization: We're also safe if the buffer is non-empty and it ends
- // with a byte that would terminate a varint.
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
(buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
const uint8* end = ReadVarint32FromArray(buffer_, value);
if (end == NULL) return false;
@@ -405,17 +359,16 @@ uint32 CodedInputStream::ReadTagSlow() {
// For the slow path, just do a 64-bit read. Try to optimize for one-byte tags
// again, since we have now refreshed the buffer.
- uint64 result = 0;
+ uint64 result;
if (!ReadVarint64(&result)) return 0;
return static_cast<uint32>(result);
}
uint32 CodedInputStream::ReadTagFallback() {
- const int buf_size = BufferSize();
- if (buf_size >= kMaxVarintBytes ||
- // Optimization: We're also safe if the buffer is non-empty and it ends
- // with a byte that would terminate a varint.
- (buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
+ if (BufferSize() >= kMaxVarintBytes ||
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
uint32 tag;
const uint8* end = ReadVarint32FromArray(buffer_, &tag);
if (end == NULL) {
@@ -426,9 +379,7 @@ uint32 CodedInputStream::ReadTagFallback() {
} else {
// We are commonly at a limit when attempting to read tags. Try to quickly
// detect this case without making another function call.
- if ((buf_size == 0) &&
- ((buffer_size_after_limit_ > 0) ||
- (total_bytes_read_ == current_limit_)) &&
+ if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 &&
// Make sure that the limit we hit is not total_bytes_limit_, since
// in that case we still need to call Refresh() so that it prints an
// error.
@@ -466,8 +417,8 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
if (BufferSize() >= kMaxVarintBytes ||
- // Optimization: We're also safe if the buffer is non-empty and it ends
- // with a byte that would terminate a varint.
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
(buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
// Fast path: We have enough bytes left in the buffer to guarantee that
// this read won't cross the end, so we can skip the checks.
@@ -479,30 +430,20 @@ bool CodedInputStream::ReadVarint64Fallback(uint64* value) {
// processors.
uint32 part0 = 0, part1 = 0, part2 = 0;
- b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done;
- part0 -= 0x80;
- b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done;
- part0 -= 0x80 << 7;
- b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
- part0 -= 0x80 << 14;
- b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
- part0 -= 0x80 << 21;
- b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done;
- part1 -= 0x80;
- b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done;
- part1 -= 0x80 << 7;
- b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
- part1 -= 0x80 << 14;
- b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
- part1 -= 0x80 << 21;
- b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done;
- part2 -= 0x80;
- b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done;
- // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
+ b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
// We have overrun the maximum size of a varint (10 bytes). The data
// must be corrupt.
- return false;
+ return NULL;
done:
Advance(ptr - buffer_);
@@ -542,13 +483,13 @@ bool CodedInputStream::Refresh() {
"CodedInputStream::SetTotalBytesLimit() in "
"google/protobuf/io/coded_stream.h.";
- // Don't warn again for this stream, and print total size at the end.
- total_bytes_warning_threshold_ = -2;
+ // Don't warn again for this stream.
+ total_bytes_warning_threshold_ = -1;
}
const void* void_buffer;
int buffer_size;
- if (NextNonEmpty(input_, &void_buffer, &buffer_size)) {
+ if (input_->Next(&void_buffer, &buffer_size)) {
buffer_ = reinterpret_cast<const uint8*>(void_buffer);
buffer_end_ = buffer_ + buffer_size;
GOOGLE_CHECK_GE(buffer_size, 0);
@@ -587,8 +528,7 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
buffer_(NULL),
buffer_size_(0),
total_bytes_(0),
- had_error_(false),
- aliasing_enabled_(false) {
+ had_error_(false) {
// Eagerly Refresh() so buffer space is immediately available.
Refresh();
// The Refresh() may have failed. If the client doesn't write any data,
@@ -642,23 +582,6 @@ uint8* CodedOutputStream::WriteRawToArray(
}
-void CodedOutputStream::WriteAliasedRaw(const void* data, int size) {
- if (size < buffer_size_
- ) {
- WriteRaw(data, size);
- } else {
- if (buffer_size_ > 0) {
- output_->BackUp(buffer_size_);
- total_bytes_ -= buffer_size_;
- buffer_ = NULL;
- buffer_size_ = 0;
- }
-
- total_bytes_ += size;
- had_error_ |= !output_->WriteAliasedRaw(data, size);
- }
-}
-
void CodedOutputStream::WriteLittleEndian32(uint32 value) {
uint8 bytes[sizeof(value)];
@@ -902,13 +825,6 @@ int CodedOutputStream::VarintSize64(uint64 value) {
}
}
-uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
- uint8* target) {
- GOOGLE_DCHECK_LE(str.size(), kuint32max);
- target = WriteVarint32ToArray(str.size(), target);
- return WriteStringToArray(str, target);
-}
-
} // namespace io
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
index 81fabb1..e5f6161 100644
--- a/src/google/protobuf/io/coded_stream.h
+++ b/src/google/protobuf/io/coded_stream.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -110,27 +110,14 @@
#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
#include <string>
-#ifdef _MSC_VER
- #if defined(_M_IX86) && \
- !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
- #define PROTOBUF_LITTLE_ENDIAN 1
- #endif
- #if _MSC_VER >= 1300
- // If MSVC has "/RTCc" set, it will complain about truncating casts at
- // runtime. This file contains some intentional truncating casts.
- #pragma runtime_checks("c", off)
- #endif
-#else
- #include <sys/param.h> // __BYTE_ORDER
- #if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && \
- !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
- #define PROTOBUF_LITTLE_ENDIAN 1
- #endif
-#endif
+#ifndef _MSC_VER
+#include <sys/param.h>
+#endif // !_MSC_VER
#include <google/protobuf/stubs/common.h>
-
+#include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro
namespace google {
+
namespace protobuf {
class DescriptorPool;
@@ -170,9 +157,6 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// successfully and the stream's byte limit.
~CodedInputStream();
- // Return true if this CodedInputStream reads from a flat array instead of
- // a ZeroCopyInputStream.
- inline bool IsFlat() const;
// Skips a number of bytes. Returns false if an underlying read error
// occurs.
@@ -233,22 +217,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Read a tag. This calls ReadVarint32() and returns the result, or returns
// zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
// the last tag value, which can be checked with LastTagWas().
- // Always inline because this is only called in one place per parse loop
+ // Always inline because this is only called in once place per parse loop
// but it is called for every iteration of said loop, so it should be fast.
// GCC doesn't want to inline this by default.
uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
- // This usually a faster alternative to ReadTag() when cutoff is a manifest
- // constant. It does particularly well for cutoff >= 127. The first part
- // of the return value is the tag that was read, though it can also be 0 in
- // the cases where ReadTag() would return 0. If the second part is true
- // then the tag is known to be in [0, cutoff]. If not, the tag either is
- // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
- // because that can arise in several ways, and for best performance we want
- // to avoid an extra "is tag == 0?" check here.)
- inline std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff)
- GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
-
// Usually returns true if calling ReadVarint32() now would produce the given
// value. Will always return false if ReadVarint32() would not return the
// given value. If ExpectTag() returns true, it also advances past
@@ -275,8 +248,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// zero, and ConsumedEntireMessage() will return true.
bool ExpectAtEnd();
- // If the last call to ReadTag() or ReadTagWithCutoff() returned the
- // given value, returns true. Otherwise, returns false;
+ // If the last call to ReadTag() returned the given value, returns true.
+ // Otherwise, returns false;
//
// This is needed because parsers for some types of embedded messages
// (with field type TYPE_GROUP) don't actually know that they've reached the
@@ -325,10 +298,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Returns the number of bytes left until the nearest limit on the
// stack is hit, or -1 if no limits are in place.
- int BytesUntilLimit() const;
-
- // Returns current position relative to the beginning of the input stream.
- int CurrentPosition() const;
+ int BytesUntilLimit();
// Total Bytes Limit -----------------------------------------------
// To prevent malicious users from sending excessively large messages
@@ -344,9 +314,8 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// cause integer overflows is 512MB. The default limit is 64MB. Apps
// should set shorter limits if possible. If warning_threshold is not -1,
// a warning will be printed to stderr after warning_threshold bytes are
- // read. For backwards compatibility all negative values get squashed to -1,
- // as other negative values might have special internal meanings.
- // An error will always be printed to stderr if the limit is reached.
+ // read. An error will always be printed to stderr if the limit is
+ // reached.
//
// This is unrelated to PushLimit()/PopLimit().
//
@@ -367,20 +336,15 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// something unusual.
void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
- // The Total Bytes Limit minus the Current Position, or -1 if there
- // is no Total Bytes Limit.
- int BytesUntilTotalBytesLimit() const;
-
// Recursion Limit -------------------------------------------------
// To prevent corrupt or malicious messages from causing stack overflows,
// we must keep track of the depth of recursion when parsing embedded
// messages and groups. CodedInputStream keeps track of this because it
// is the only object that is passed down the stack during parsing.
- // Sets the maximum recursion depth. The default is 100.
+ // Sets the maximum recursion depth. The default is 64.
void SetRecursionLimit(int limit);
-
// Increments the current recursion depth. Returns true if the depth is
// under the limit, false if it has gone over.
bool IncrementRecursionDepth();
@@ -456,8 +420,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
//
// Note that this feature is ignored when parsing "lite" messages as they do
// not have descriptors.
- void SetExtensionRegistry(const DescriptorPool* pool,
- MessageFactory* factory);
+ void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory);
// Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
// has been provided.
@@ -481,7 +444,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
int overflow_bytes_;
// LastTagWas() stuff.
- uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
+ uint32 last_tag_; // result of last ReadTag().
// This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
// at EOF, or by ExpectAtEnd() when it returns true. This happens when we
@@ -506,11 +469,6 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// Maximum number of bytes to read, period. This is unrelated to
// current_limit_. Set using SetTotalBytesLimit().
int total_bytes_limit_;
-
- // If positive/0: Limit for bytes read after which a warning due to size
- // should be logged.
- // If -1: Printing of warning disabled. Can be set by client.
- // If -2: Internal: Limit has been reached, print full size when destructing.
int total_bytes_warning_threshold_;
// Current recursion depth, controlled by IncrementRecursionDepth() and
@@ -563,13 +521,12 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
bool ReadStringFallback(string* buffer, int size);
// Return the size of the buffer.
- int BufferSize() const;
+ uint32 BufferSize() const;
static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
-
- static int default_recursion_limit_; // 100 by default.
+ static const int kDefaultRecursionLimit = 64;
};
// Class which encodes and writes binary data which is composed of varint-
@@ -597,7 +554,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream {
// char text[] = "Hello world!";
//
// int coded_size = sizeof(magic_number) +
-// CodedOutputStream::VarintSize32(strlen(text)) +
+// CodedOutputStream::Varint32Size(strlen(text)) +
// strlen(text);
//
// uint8* buffer =
@@ -653,9 +610,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// Write raw bytes, copying them from the given buffer.
void WriteRaw(const void* buffer, int size);
- // Like WriteRaw() but will try to write aliased data if aliasing is
- // turned on.
- void WriteRawMaybeAliased(const void* data, int size);
// Like WriteRaw() but writing directly to the target array.
// This is _not_ inlined, as the compiler often optimizes memcpy into inline
// copy loops. Since this gets called by every field with string or bytes
@@ -667,21 +621,8 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
void WriteString(const string& str);
// Like WriteString() but writing directly to the target array.
static uint8* WriteStringToArray(const string& str, uint8* target);
- // Write the varint-encoded size of str followed by str.
- static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
- // Instructs the CodedOutputStream to allow the underlying
- // ZeroCopyOutputStream to hold pointers to the original structure instead of
- // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
- // underlying stream does not support aliasing, then enabling it has no
- // affect. For now, this only affects the behavior of
- // WriteRawMaybeAliased().
- //
- // NOTE: It is caller's responsibility to ensure that the chunk of memory
- // remains live until all of the data has been consumed from the stream.
- void EnableAliasing(bool enabled);
-
// Write a 32-bit little-endian integer.
void WriteLittleEndian32(uint32 value);
// Like WriteLittleEndian32() but writing directly to the target array.
@@ -726,21 +667,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// If negative, 10 bytes. Otheriwse, same as VarintSize32().
static int VarintSize32SignExtended(int32 value);
- // Compile-time equivalent of VarintSize32().
- template <uint32 Value>
- struct StaticVarintSize32 {
- static const int value =
- (Value < (1 << 7))
- ? 1
- : (Value < (1 << 14))
- ? 2
- : (Value < (1 << 21))
- ? 3
- : (Value < (1 << 28))
- ? 4
- : 5;
- };
-
// Returns the total number of bytes written since this object was created.
inline int ByteCount() const;
@@ -756,7 +682,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
int buffer_size_;
int total_bytes_; // Sum of sizes of all buffers seen so far.
bool had_error_; // Whether an error occurred during output.
- bool aliasing_enabled_; // See EnableAliasing().
// Advance the buffer by a given number of bytes.
void Advance(int amount);
@@ -765,10 +690,6 @@ class LIBPROTOBUF_EXPORT CodedOutputStream {
// Advance(buffer_size_).
bool Refresh();
- // Like WriteRaw() but may avoid copying if the underlying
- // ZeroCopyOutputStream supports it.
- void WriteAliasedRaw(const void* buffer, int size);
-
static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target);
// Always-inlined versions of WriteVarint* functions so that code can be
@@ -814,7 +735,8 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) {
inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
const uint8* buffer,
uint32* value) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(value, buffer, sizeof(*value));
return buffer + sizeof(*value);
#else
@@ -829,7 +751,8 @@ inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
const uint8* buffer,
uint64* value) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(value, buffer, sizeof(*value));
return buffer + sizeof(*value);
#else
@@ -848,8 +771,9 @@ inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
}
inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
- if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
memcpy(value, buffer_, sizeof(*value));
Advance(sizeof(*value));
return true;
@@ -862,8 +786,9 @@ inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
}
inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
- if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) {
memcpy(value, buffer_, sizeof(*value));
Advance(sizeof(*value));
return true;
@@ -886,45 +811,6 @@ inline uint32 CodedInputStream::ReadTag() {
}
}
-inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
- uint32 cutoff) {
- // In performance-sensitive code we can expect cutoff to be a compile-time
- // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
- // compile time.
- if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
- // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
- // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
- // is large enough then is it better to check for the two-byte case first?
- if (static_cast<int8>(buffer_[0]) > 0) {
- const uint32 kMax1ByteVarint = 0x7f;
- uint32 tag = last_tag_ = buffer_[0];
- Advance(1);
- return make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
- }
- // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
- // and tag is two bytes. The latter is tested by bitwise-and-not of the
- // first byte and the second byte.
- if (cutoff >= 0x80 &&
- GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
- GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
- const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
- uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
- Advance(2);
- // It might make sense to test for tag == 0 now, but it is so rare that
- // that we don't bother. A varint-encoded 0 should be one byte unless
- // the encoder lost its mind. The second part of the return value of
- // this function is allowed to be either true or false if the tag is 0,
- // so we don't have to check for tag == 0. We may need to check whether
- // it exceeds cutoff.
- bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
- return make_pair(tag, at_or_below_cutoff);
- }
- }
- // Slow path
- last_tag_ = ReadTagFallback();
- return make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
-}
-
inline bool CodedInputStream::LastTagWas(uint32 expected) {
return last_tag_ == expected;
}
@@ -981,9 +867,7 @@ inline bool CodedInputStream::ExpectAtEnd() {
// If we are at a limit we know no more bytes can be read. Otherwise, it's
// hard to say without calling Refresh(), and we'd rather not do that.
- if (buffer_ == buffer_end_ &&
- ((buffer_size_after_limit_ != 0) ||
- (total_bytes_read_ == current_limit_))) {
+ if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) {
last_tag_ = 0; // Pretend we called ReadTag()...
legitimate_message_end_ = true; // ... and it hit EOF.
return true;
@@ -992,10 +876,6 @@ inline bool CodedInputStream::ExpectAtEnd() {
}
}
-inline int CodedInputStream::CurrentPosition() const {
- return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
-}
-
inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
if (buffer_size_ < size) {
return NULL;
@@ -1035,7 +915,8 @@ inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
uint8* target) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(target, &value, sizeof(value));
#else
target[0] = static_cast<uint8>(value);
@@ -1048,7 +929,8 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
uint8* target) {
-#if defined(PROTOBUF_LITTLE_ENDIAN)
+#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \
+ defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN
memcpy(target, &value, sizeof(value));
#else
uint32 part0 = static_cast<uint32>(value);
@@ -1101,21 +983,12 @@ inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
}
inline void CodedOutputStream::WriteString(const string& str) {
- WriteRaw(str.data(), static_cast<int>(str.size()));
-}
-
-inline void CodedOutputStream::WriteRawMaybeAliased(
- const void* data, int size) {
- if (aliasing_enabled_) {
- WriteAliasedRaw(data, size);
- } else {
- WriteRaw(data, size);
- }
+ WriteRaw(str.data(), str.size());
}
inline uint8* CodedOutputStream::WriteStringToArray(
const string& str, uint8* target) {
- return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
+ return WriteRawToArray(str.data(), str.size(), target);
}
inline int CodedOutputStream::ByteCount() const {
@@ -1144,7 +1017,7 @@ inline void CodedInputStream::DecrementRecursionDepth() {
if (recursion_depth_ > 0) --recursion_depth_;
}
-inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
+inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool,
MessageFactory* factory) {
extension_pool_ = pool;
extension_factory_ = factory;
@@ -1158,7 +1031,7 @@ inline MessageFactory* CodedInputStream::GetExtensionFactory() {
return extension_factory_;
}
-inline int CodedInputStream::BufferSize() const {
+inline uint32 CodedInputStream::BufferSize() const {
return buffer_end_ - buffer_;
}
@@ -1171,12 +1044,12 @@ inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
last_tag_(0),
legitimate_message_end_(false),
aliasing_enabled_(false),
- current_limit_(kint32max),
+ current_limit_(INT_MAX),
buffer_size_after_limit_(0),
total_bytes_limit_(kDefaultTotalBytesLimit),
total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
recursion_depth_(0),
- recursion_limit_(default_recursion_limit_),
+ recursion_limit_(kDefaultRecursionLimit),
extension_pool_(NULL),
extension_factory_(NULL) {
// Eagerly Refresh() so buffer space is immediately available.
@@ -1197,24 +1070,21 @@ inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
total_bytes_limit_(kDefaultTotalBytesLimit),
total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
recursion_depth_(0),
- recursion_limit_(default_recursion_limit_),
+ recursion_limit_(kDefaultRecursionLimit),
extension_pool_(NULL),
extension_factory_(NULL) {
// Note that setting current_limit_ == size is important to prevent some
// code paths from trying to access input_ and segfaulting.
}
-inline bool CodedInputStream::IsFlat() const {
- return input_ == NULL;
+inline CodedInputStream::~CodedInputStream() {
+ if (input_ != NULL) {
+ BackUpInputToCurrentPosition();
+ }
}
} // namespace io
} // namespace protobuf
-
-#if defined(_MSC_VER) && _MSC_VER >= 1300
- #pragma runtime_checks("c", restore)
-#endif // _MSC_VER
-
} // namespace google
#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
index 88c14ca..e9799d4 100644
--- a/src/google/protobuf/io/coded_stream_inl.h
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -37,9 +37,8 @@
#define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include <string>
-#include <google/protobuf/stubs/stl_util.h>
+#include <google/protobuf/stubs/stl_util-inl.h>
namespace google {
namespace protobuf {
@@ -51,12 +50,8 @@ inline bool CodedInputStream::InternalReadStringInline(string* buffer,
if (BufferSize() >= size) {
STLStringResizeUninitialized(buffer, size);
- // When buffer is empty, string_as_array(buffer) will return NULL but memcpy
- // requires non-NULL pointers even when size is 0. Hench this check.
- if (size > 0) {
- memcpy(mutable_string_data(buffer), buffer_, size);
- Advance(size);
- }
+ memcpy(string_as_array(buffer), buffer_, size);
+ Advance(size);
return true;
}
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
index f4cb5ea..7d29833 100644
--- a/src/google/protobuf/io/coded_stream_unittest.cc
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -44,6 +44,7 @@
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/stubs/strutil.h>
// This declares an unsigned long long integer literal in a portable way.
@@ -124,13 +125,6 @@ namespace {
class CodedStreamTest : public testing::Test {
protected:
- // Helper method used by tests for bytes warning. See implementation comment
- // for further information.
- static void SetupTotalBytesLimitWarningTest(
- int total_bytes_limit, int warning_threshold,
- vector<string>* out_errors, vector<string>* out_warnings);
-
- // Buffer used during most of the tests. This assumes tests run sequentially.
static const int kBufferSize = 1024 * 64;
static uint8 buffer_[kBufferSize];
};
@@ -214,33 +208,6 @@ TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) {
EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
}
-// This is the regression test that verifies that there is no issues
-// with the empty input buffers handling.
-TEST_F(CodedStreamTest, EmptyInputBeforeEos) {
- class In : public ZeroCopyInputStream {
- public:
- In() : count_(0) {}
- private:
- virtual bool Next(const void** data, int* size) {
- *data = NULL;
- *size = 0;
- return count_++ < 2;
- }
- virtual void BackUp(int count) {
- GOOGLE_LOG(FATAL) << "Tests never call this.";
- }
- virtual bool Skip(int count) {
- GOOGLE_LOG(FATAL) << "Tests never call this.";
- return false;
- }
- virtual int64 ByteCount() const { return 0; }
- int count_;
- } in;
- CodedInputStream input(&in);
- input.ReadTag();
- EXPECT_TRUE(input.ConsumedEntireMessage());
-}
-
TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
// Leave one byte at the beginning of the buffer so we can read it
// to force the first buffer to be loaded.
@@ -682,197 +649,14 @@ TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
}
-TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes));
- EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit());
-
- string str;
- EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
- EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes),
- coded_input.BytesUntilTotalBytesLimit());
- EXPECT_EQ(kRawBytes, str);
- // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
- EXPECT_GE(str.capacity(), strlen(kRawBytes));
- }
-
- EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
-}
-
-TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(sizeof(buffer_));
-
- string str;
- EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
- EXPECT_EQ(kRawBytes, str);
- // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
- EXPECT_GE(str.capacity(), strlen(kRawBytes));
- }
-
- EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
-}
-
-TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
-
- string str;
- EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
- EXPECT_EQ(kRawBytes, str);
- // Note: this check depends on string class implementation. It
- // expects that string will allocate more than strlen(kRawBytes)
- // if the content of kRawBytes is appended to string in small
- // chunks.
- // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
- EXPECT_GE(str.capacity(), strlen(kRawBytes));
- }
-
- EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
-}
-
-TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(sizeof(buffer_));
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, -1));
- // Note: this check depends on string class implementation. It
- // expects that string will always allocate the same amount of
- // memory for an empty string.
- EXPECT_EQ(string().capacity(), str.capacity());
- }
-}
-
-TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(sizeof(buffer_));
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
- EXPECT_GT(1 << 30, str.capacity());
- }
-}
-
-TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(16);
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
- // Note: this check depends on string class implementation. It
- // expects that string will allocate less than strlen(kRawBytes)
- // for an empty string.
- EXPECT_GT(strlen(kRawBytes), str.capacity());
- }
-}
-
-TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.SetTotalBytesLimit(16, 16);
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
- // Note: this check depends on string class implementation. It
- // expects that string will allocate less than strlen(kRawBytes)
- // for an empty string.
- EXPECT_GT(strlen(kRawBytes), str.capacity());
- }
-}
-
-TEST_F(CodedStreamTest,
- ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(sizeof(buffer_));
- coded_input.SetTotalBytesLimit(16, 16);
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
- // Note: this check depends on string class implementation. It
- // expects that string will allocate less than strlen(kRawBytes)
- // for an empty string.
- EXPECT_GT(strlen(kRawBytes), str.capacity());
- }
-}
-
-TEST_F(CodedStreamTest,
- ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) {
- memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
- // Buffer size in the input must be smaller than sizeof(kRawBytes),
- // otherwise check against capacity will fail as ReadStringInline()
- // will handle the reading and will reserve the memory as needed.
- ArrayInputStream input(buffer_, sizeof(buffer_), 32);
-
- {
- CodedInputStream coded_input(&input);
- coded_input.PushLimit(16);
- coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_));
- EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit());
-
- string str;
- EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
- // Note: this check depends on string class implementation. It
- // expects that string will allocate less than strlen(kRawBytes)
- // for an empty string.
- EXPECT_GT(strlen(kRawBytes), str.capacity());
- }
-}
-
// -------------------------------------------------------------------
// Skip
const char kSkipTestBytes[] =
"<Before skipping><To be skipped><After skipping>";
+const char kSkipOutputTestBytes[] =
+ "-----------------<To be skipped>----------------";
TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) {
memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes));
@@ -1163,11 +947,9 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
ArrayInputStream input(buffer_, sizeof(buffer_));
CodedInputStream coded_input(&input);
coded_input.SetTotalBytesLimit(16, -1);
- EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
string str;
EXPECT_TRUE(coded_input.ReadString(&str, 16));
- EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
vector<string> errors;
@@ -1182,9 +964,7 @@ TEST_F(CodedStreamTest, TotalBytesLimit) {
"A protocol message was rejected because it was too big", errors[0]);
coded_input.SetTotalBytesLimit(32, -1);
- EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
EXPECT_TRUE(coded_input.ReadString(&str, 16));
- EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
}
TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
@@ -1215,60 +995,6 @@ TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
EXPECT_FALSE(coded_input.ConsumedEntireMessage());
}
-// This method is used by the tests below.
-// It constructs a CodedInputStream with the given limits and tries to read 2KiB
-// of data from it. Then it returns the logged errors and warnings in the given
-// vectors.
-void CodedStreamTest::SetupTotalBytesLimitWarningTest(
- int total_bytes_limit, int warning_threshold,
- vector<string>* out_errors, vector<string>* out_warnings) {
- ArrayInputStream raw_input(buffer_, sizeof(buffer_), 128);
-
- ScopedMemoryLog scoped_log;
- {
- CodedInputStream input(&raw_input);
- input.SetTotalBytesLimit(total_bytes_limit, warning_threshold);
- string str;
- EXPECT_TRUE(input.ReadString(&str, 2048));
- }
-
- *out_errors = scoped_log.GetMessages(ERROR);
- *out_warnings = scoped_log.GetMessages(WARNING);
-}
-
-TEST_F(CodedStreamTest, TotalBytesLimitWarning) {
- vector<string> errors;
- vector<string> warnings;
- SetupTotalBytesLimitWarningTest(10240, 1024, &errors, &warnings);
-
- EXPECT_EQ(0, errors.size());
-
- ASSERT_EQ(2, warnings.size());
- EXPECT_PRED_FORMAT2(testing::IsSubstring,
- "Reading dangerously large protocol message. If the message turns out to "
- "be larger than 10240 bytes, parsing will be halted for security reasons.",
- warnings[0]);
- EXPECT_PRED_FORMAT2(testing::IsSubstring,
- "The total number of bytes read was 2048",
- warnings[1]);
-}
-
-TEST_F(CodedStreamTest, TotalBytesLimitWarningDisabled) {
- vector<string> errors;
- vector<string> warnings;
-
- // Test with -1
- SetupTotalBytesLimitWarningTest(10240, -1, &errors, &warnings);
- EXPECT_EQ(0, errors.size());
- EXPECT_EQ(0, warnings.size());
-
- // Test again with -2, expecting the same result
- SetupTotalBytesLimitWarningTest(10240, -2, &errors, &warnings);
- EXPECT_EQ(0, errors.size());
- EXPECT_EQ(0, warnings.size());
-}
-
-
TEST_F(CodedStreamTest, RecursionLimit) {
ArrayInputStream input(buffer_, sizeof(buffer_));
CodedInputStream coded_input(&input);
@@ -1306,7 +1032,6 @@ TEST_F(CodedStreamTest, RecursionLimit) {
EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7
}
-
class ReallyBigInputStream : public ZeroCopyInputStream {
public:
ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {}
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
index ee28696..84d277f 100644
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -73,17 +73,6 @@ GzipInputStream::~GzipInputStream() {
zerror_ = inflateEnd(&zcontext_);
}
-static inline int internalInflateInit2(
- z_stream* zcontext, GzipInputStream::Format format) {
- int windowBitsFormat = 0;
- switch (format) {
- case GzipInputStream::GZIP: windowBitsFormat = 16; break;
- case GzipInputStream::AUTO: windowBitsFormat = 32; break;
- case GzipInputStream::ZLIB: windowBitsFormat = 0; break;
- }
- return inflateInit2(zcontext, /* windowBits */15 | windowBitsFormat);
-}
-
int GzipInputStream::Inflate(int flush) {
if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
// previous inflate filled output buffer. don't change input params yet.
@@ -100,7 +89,14 @@ int GzipInputStream::Inflate(int flush) {
zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
zcontext_.avail_in = in_size;
if (first) {
- int error = internalInflateInit2(&zcontext_, format_);
+ int windowBitsFormat = 0;
+ switch (format_) {
+ case GZIP: windowBitsFormat = 16; break;
+ case AUTO: windowBitsFormat = 32; break;
+ case ZLIB: windowBitsFormat = 0; break;
+ }
+ int error = inflateInit2(&zcontext_,
+ /* windowBits */15 | windowBitsFormat);
if (error != Z_OK) {
return error;
}
@@ -131,21 +127,9 @@ bool GzipInputStream::Next(const void** data, int* size) {
return true;
}
if (zerror_ == Z_STREAM_END) {
- if (zcontext_.next_out != NULL) {
- // sub_stream_ may have concatenated streams to follow
- zerror_ = inflateEnd(&zcontext_);
- if (zerror_ != Z_OK) {
- return false;
- }
- zerror_ = internalInflateInit2(&zcontext_, format_);
- if (zerror_ != Z_OK) {
- return false;
- }
- } else {
- *data = NULL;
- *size = 0;
- return false;
- }
+ *data = NULL;
+ *size = 0;
+ return false;
}
zerror_ = Inflate(Z_NO_FLUSH);
if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
@@ -199,6 +183,16 @@ GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream,
Init(sub_stream, options);
}
+GzipOutputStream::GzipOutputStream(
+ ZeroCopyOutputStream* sub_stream, Format format, int buffer_size) {
+ Options options;
+ options.format = format;
+ if (buffer_size != -1) {
+ options.buffer_size = buffer_size;
+ }
+ Init(sub_stream, options);
+}
+
void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream,
const Options& options) {
sub_stream_ = sub_stream;
@@ -257,7 +251,8 @@ int GzipOutputStream::Deflate(int flush) {
}
error = deflate(&zcontext_, flush);
} while (error == Z_OK && zcontext_.avail_out == 0);
- if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) {
+ if (((flush == Z_FULL_FLUSH) || (flush == Z_FINISH))
+ && (zcontext_.avail_out != sub_data_size_)) {
// Notify lower layer of data.
sub_stream_->BackUp(zcontext_.avail_out);
// We don't own the buffer anymore.
@@ -299,11 +294,10 @@ int64 GzipOutputStream::ByteCount() const {
}
bool GzipOutputStream::Flush() {
- zerror_ = Deflate(Z_FULL_FLUSH);
- // Return true if the flush succeeded or if it was a no-op.
- return (zerror_ == Z_OK) ||
- (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 &&
- zcontext_.avail_out != 0);
+ do {
+ zerror_ = Deflate(Z_FULL_FLUSH);
+ } while (zerror_ == Z_OK);
+ return zerror_ == Z_OK;
}
bool GzipOutputStream::Close() {
diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h
index c7ccc26..65dbc5b 100644
--- a/src/google/protobuf/io/gzip_stream.h
+++ b/src/google/protobuf/io/gzip_stream.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -45,7 +45,6 @@
#include <zlib.h>
-#include <google/protobuf/stubs/common.h>
#include <google/protobuf/io/zero_copy_stream.h>
namespace google {
@@ -145,6 +144,12 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
ZeroCopyOutputStream* sub_stream,
const Options& options);
+ // DEPRECATED: Use one of the above constructors instead.
+ GzipOutputStream(
+ ZeroCopyOutputStream* sub_stream,
+ Format format,
+ int buffer_size = -1) GOOGLE_ATTRIBUTE_DEPRECATED;
+
virtual ~GzipOutputStream();
// Return last error message or NULL if no error.
@@ -160,13 +165,6 @@ class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
// necessary.
// Compression may be less efficient stopping and starting around flushes.
// Returns true if no error.
- //
- // Please ensure that block size is > 6. Here is an excerpt from the zlib
- // doc that explains why:
- //
- // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out
- // is greater than six to avoid repeated flush markers due to
- // avail_out == 0 on return.
bool Flush();
// Writes out all data and closes the gzip stream.
diff --git a/src/google/protobuf/io/gzip_stream_unittest.sh b/src/google/protobuf/io/gzip_stream_unittest.sh
index 16251a9..6e8a094 100755
--- a/src/google/protobuf/io/gzip_stream_unittest.sh
+++ b/src/google/protobuf/io/gzip_stream_unittest.sh
@@ -2,7 +2,7 @@
#
# Protocol Buffers - Google's data interchange format
# Copyright 2009 Google Inc. All rights reserved.
-# https://developers.google.com/protocol-buffers/
+# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
diff --git a/src/google/protobuf/io/package_info.h b/src/google/protobuf/io/package_info.h
index dc1fc91..7a7a4e7 100644
--- a/src/google/protobuf/io/package_info.h
+++ b/src/google/protobuf/io/package_info.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
index c8df417..c7d3074 100644
--- a/src/google/protobuf/io/printer.cc
+++ b/src/google/protobuf/io/printer.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -35,6 +35,7 @@
#include <google/protobuf/io/printer.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/strutil.h>
namespace google {
namespace protobuf {
@@ -50,8 +51,8 @@ Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter)
}
Printer::~Printer() {
- // Only BackUp() if we have called Next() at least once and never failed.
- if (buffer_size_ > 0 && !failed_) {
+ // Only BackUp() if we're sure we've successfully called Next() at least once.
+ if (buffer_size_ > 0) {
output_->BackUp(buffer_size_);
}
}
@@ -131,17 +132,6 @@ void Printer::Print(const char* text,
Print(vars, text);
}
-void Printer::Print(const char* text,
- const char* variable1, const string& value1,
- const char* variable2, const string& value2,
- const char* variable3, const string& value3) {
- map<string, string> vars;
- vars[variable1] = value1;
- vars[variable2] = value2;
- vars[variable3] = value3;
- Print(vars, text);
-}
-
void Printer::Indent() {
indent_ += " ";
}
@@ -168,7 +158,7 @@ void Printer::WriteRaw(const char* data, int size) {
if (failed_) return;
if (size == 0) return;
- if (at_start_of_line_ && (size > 0) && (data[0] != '\n')) {
+ if (at_start_of_line_) {
// Insert an indent.
at_start_of_line_ = false;
WriteRaw(indent_.data(), indent_.size());
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
index f06cbf2..de08538 100644
--- a/src/google/protobuf/io/printer.h
+++ b/src/google/protobuf/io/printer.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -82,11 +82,7 @@ class LIBPROTOBUF_EXPORT Printer {
// Like the first Print(), except the substitutions are given as parameters.
void Print(const char* text, const char* variable1, const string& value1,
const char* variable2, const string& value2);
- // Like the first Print(), except the substitutions are given as parameters.
- void Print(const char* text, const char* variable1, const string& value1,
- const char* variable2, const string& value2,
- const char* variable3, const string& value3);
- // TODO(kenton): Overloaded versions with more variables? Three seems
+ // TODO(kenton): Overloaded versions with more variables? Two seems
// to be enough.
// Indent text by two spaces. After calling Indent(), two spaces will be
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
index 1331a8d..580a53d 100644
--- a/src/google/protobuf/io/printer_unittest.cc
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -220,7 +220,7 @@ TEST(Printer, Indenting) {
}
// Death tests do not work on Windows as of yet.
-#ifdef PROTOBUF_HAS_DEATH_TEST
+#ifdef GTEST_HAS_DEATH_TEST
TEST(Printer, Death) {
char buffer[8192];
@@ -231,33 +231,9 @@ TEST(Printer, Death) {
EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
}
-#endif // PROTOBUF_HAS_DEATH_TEST
+#endif // GTEST_HAS_DEATH_TEST
-TEST(Printer, WriteFailurePartial) {
- char buffer[17];
-
- ArrayOutputStream output(buffer, sizeof(buffer));
- Printer printer(&output, '$');
-
- // Print 16 bytes to almost fill the buffer (should not fail).
- printer.Print("0123456789abcdef");
- EXPECT_FALSE(printer.failed());
-
- // Try to print 2 chars. Only one fits.
- printer.Print("<>");
- EXPECT_TRUE(printer.failed());
-
- // Anything else should fail too.
- printer.Print(" ");
- EXPECT_TRUE(printer.failed());
- printer.Print("blah");
- EXPECT_TRUE(printer.failed());
-
- // Buffer should contain the first 17 bytes written.
- EXPECT_EQ("0123456789abcdef<", string(buffer, sizeof(buffer)));
-}
-
-TEST(Printer, WriteFailureExact) {
+TEST(Printer, WriteFailure) {
char buffer[16];
ArrayOutputStream output(buffer, sizeof(buffer));
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
deleted file mode 100644
index 5697343..0000000
--- a/src/google/protobuf/io/strtod.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <google/protobuf/io/strtod.h>
-
-#include <cstdio>
-#include <cstring>
-#include <string>
-
-#include <google/protobuf/stubs/common.h>
-
-namespace google {
-namespace protobuf {
-namespace io {
-
-// ----------------------------------------------------------------------
-// NoLocaleStrtod()
-// This code will make you cry.
-// ----------------------------------------------------------------------
-
-namespace {
-
-// Returns a string identical to *input except that the character pointed to
-// by radix_pos (which should be '.') is replaced with the locale-specific
-// radix character.
-string LocalizeRadix(const char* input, const char* radix_pos) {
- // Determine the locale-specific radix character by calling sprintf() to
- // print the number 1.5, then stripping off the digits. As far as I can
- // tell, this is the only portable, thread-safe way to get the C library
- // to divuldge the locale's radix character. No, localeconv() is NOT
- // thread-safe.
- char temp[16];
- int size = sprintf(temp, "%.1f", 1.5);
- GOOGLE_CHECK_EQ(temp[0], '1');
- GOOGLE_CHECK_EQ(temp[size-1], '5');
- GOOGLE_CHECK_LE(size, 6);
-
- // Now replace the '.' in the input with it.
- string result;
- result.reserve(strlen(input) + size - 3);
- result.append(input, radix_pos);
- result.append(temp + 1, size - 2);
- result.append(radix_pos + 1);
- return result;
-}
-
-} // namespace
-
-double NoLocaleStrtod(const char* text, char** original_endptr) {
- // We cannot simply set the locale to "C" temporarily with setlocale()
- // as this is not thread-safe. Instead, we try to parse in the current
- // locale first. If parsing stops at a '.' character, then this is a
- // pretty good hint that we're actually in some other locale in which
- // '.' is not the radix character.
-
- char* temp_endptr;
- double result = strtod(text, &temp_endptr);
- if (original_endptr != NULL) *original_endptr = temp_endptr;
- if (*temp_endptr != '.') return result;
-
- // Parsing halted on a '.'. Perhaps we're in a different locale? Let's
- // try to replace the '.' with a locale-specific radix character and
- // try again.
- string localized = LocalizeRadix(text, temp_endptr);
- const char* localized_cstr = localized.c_str();
- char* localized_endptr;
- result = strtod(localized_cstr, &localized_endptr);
- if ((localized_endptr - localized_cstr) >
- (temp_endptr - text)) {
- // This attempt got further, so replacing the decimal must have helped.
- // Update original_endptr to point at the right location.
- if (original_endptr != NULL) {
- // size_diff is non-zero if the localized radix has multiple bytes.
- int size_diff = localized.size() - strlen(text);
- // const_cast is necessary to match the strtod() interface.
- *original_endptr = const_cast<char*>(
- text + (localized_endptr - localized_cstr - size_diff));
- }
- }
-
- return result;
-}
-
-} // namespace io
-} // namespace protobuf
-} // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
deleted file mode 100644
index c2efc8d..0000000
--- a/src/google/protobuf/io/strtod.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A locale-independent version of strtod(), used to parse floating
-// point default values in .proto files, where the decimal separator
-// is always a dot.
-
-#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__
-#define GOOGLE_PROTOBUF_IO_STRTOD_H__
-
-namespace google {
-namespace protobuf {
-namespace io {
-
-// A locale-independent version of the standard strtod(), which always
-// uses a dot as the decimal separator.
-double NoLocaleStrtod(const char* str, char** endptr);
-
-} // namespace io
-} // namespace protobuf
-
-} // namespace google
-#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index ef2de30..38fa351 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -89,12 +89,8 @@
// exactly pretty.
#include <google/protobuf/io/tokenizer.h>
-#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stringprintf.h>
-#include <google/protobuf/io/strtod.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/strutil.h>
-#include <google/protobuf/stubs/stl_util.h>
namespace google {
namespace protobuf {
@@ -122,8 +118,6 @@ namespace {
CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
c == '\r' || c == '\v' || c == '\f');
-CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
- c == '\r' || c == '\v' || c == '\f');
CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
@@ -193,16 +187,12 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
read_error_(false),
line_(0),
column_(0),
- record_target_(NULL),
- record_start_(-1),
+ token_start_(-1),
allow_f_after_float_(false),
- comment_style_(CPP_COMMENT_STYLE),
- require_space_after_number_(true),
- allow_multiline_strings_(false) {
+ comment_style_(CPP_COMMENT_STYLE) {
current_.line = 0;
current_.column = 0;
- current_.end_column = 0;
current_.type = TYPE_START;
Refresh();
@@ -247,9 +237,9 @@ void Tokenizer::Refresh() {
}
// If we're in a token, append the rest of the buffer to it.
- if (record_target_ != NULL && record_start_ < buffer_size_) {
- record_target_->append(buffer_ + record_start_, buffer_size_ - record_start_);
- record_start_ = 0;
+ if (token_start_ >= 0 && token_start_ < buffer_size_) {
+ current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_);
+ token_start_ = 0;
}
const void* data = NULL;
@@ -270,34 +260,23 @@ void Tokenizer::Refresh() {
current_char_ = buffer_[0];
}
-inline void Tokenizer::RecordTo(string* target) {
- record_target_ = target;
- record_start_ = buffer_pos_;
-}
-
-inline void Tokenizer::StopRecording() {
- // Note: The if() is necessary because some STL implementations crash when
- // you call string::append(NULL, 0), presumably because they are trying to
- // be helpful by detecting the NULL pointer, even though there's nothing
- // wrong with reading zero bytes from NULL.
- if (buffer_pos_ != record_start_) {
- record_target_->append(buffer_ + record_start_, buffer_pos_ - record_start_);
- }
- record_target_ = NULL;
- record_start_ = -1;
-}
-
inline void Tokenizer::StartToken() {
+ token_start_ = buffer_pos_;
current_.type = TYPE_START; // Just for the sake of initializing it.
current_.text.clear();
current_.line = line_;
current_.column = column_;
- RecordTo(&current_.text);
}
inline void Tokenizer::EndToken() {
- StopRecording();
- current_.end_column = column_;
+ // Note: The if() is necessary because some STL implementations crash when
+ // you call string::append(NULL, 0), presumably because they are trying to
+ // be helpful by detecting the NULL pointer, even though there's nothing
+ // wrong with reading zero bytes from NULL.
+ if (buffer_pos_ != token_start_) {
+ current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_);
+ }
+ token_start_ = -1;
}
// -------------------------------------------------------------------
@@ -353,16 +332,9 @@ void Tokenizer::ConsumeString(char delimiter) {
while (true) {
switch (current_char_) {
case '\0':
- AddError("Unexpected end of string.");
- return;
-
case '\n': {
- if (!allow_multiline_strings_) {
- AddError("String literals cannot cross line boundaries.");
- return;
- }
- NextChar();
- break;
+ AddError("String literals cannot cross line boundaries.");
+ return;
}
case '\\': {
@@ -379,27 +351,6 @@ void Tokenizer::ConsumeString(char delimiter) {
AddError("Expected hex digits for escape sequence.");
}
// Possibly followed by another hex digit, but again we don't care.
- } else if (TryConsume('u')) {
- if (!TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>()) {
- AddError("Expected four hex digits for \\u escape sequence.");
- }
- } else if (TryConsume('U')) {
- // We expect 8 hex digits; but only the range up to 0x10ffff is
- // legal.
- if (!TryConsume('0') ||
- !TryConsume('0') ||
- !(TryConsume('0') || TryConsume('1')) ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>() ||
- !TryConsumeOne<HexDigit>()) {
- AddError("Expected eight hex digits up to 10ffff for \\U escape "
- "sequence");
- }
} else {
AddError("Invalid escape sequence in string literal.");
}
@@ -459,7 +410,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
}
}
- if (LookingAt<Letter>() && require_space_after_number_) {
+ if (LookingAt<Letter>()) {
AddError("Need space between number and identifier.");
} else if (current_char_ == '.') {
if (is_float) {
@@ -473,51 +424,26 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
return is_float ? TYPE_FLOAT : TYPE_INTEGER;
}
-void Tokenizer::ConsumeLineComment(string* content) {
- if (content != NULL) RecordTo(content);
-
+void Tokenizer::ConsumeLineComment() {
while (current_char_ != '\0' && current_char_ != '\n') {
NextChar();
}
TryConsume('\n');
-
- if (content != NULL) StopRecording();
}
-void Tokenizer::ConsumeBlockComment(string* content) {
+void Tokenizer::ConsumeBlockComment() {
int start_line = line_;
int start_column = column_ - 2;
- if (content != NULL) RecordTo(content);
-
while (true) {
while (current_char_ != '\0' &&
current_char_ != '*' &&
- current_char_ != '/' &&
- current_char_ != '\n') {
+ current_char_ != '/') {
NextChar();
}
- if (TryConsume('\n')) {
- if (content != NULL) StopRecording();
-
- // Consume leading whitespace and asterisk;
- ConsumeZeroOrMore<WhitespaceNoNewline>();
- if (TryConsume('*')) {
- if (TryConsume('/')) {
- // End of comment.
- break;
- }
- }
-
- if (content != NULL) RecordTo(content);
- } else if (TryConsume('*') && TryConsume('/')) {
+ if (TryConsume('*') && TryConsume('/')) {
// End of comment.
- if (content != NULL) {
- StopRecording();
- // Strip trailing "*/".
- content->erase(content->size() - 2);
- }
break;
} else if (TryConsume('/') && current_char_ == '*') {
// Note: We didn't consume the '*' because if there is a '/' after it
@@ -528,59 +454,42 @@ void Tokenizer::ConsumeBlockComment(string* content) {
AddError("End-of-file inside block comment.");
error_collector_->AddError(
start_line, start_column, " Comment started here.");
- if (content != NULL) StopRecording();
break;
}
}
}
-Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() {
- if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
- if (TryConsume('/')) {
- return LINE_COMMENT;
- } else if (TryConsume('*')) {
- return BLOCK_COMMENT;
- } else {
- // Oops, it was just a slash. Return it.
- current_.type = TYPE_SYMBOL;
- current_.text = "/";
- current_.line = line_;
- current_.column = column_ - 1;
- current_.end_column = column_;
- return SLASH_NOT_COMMENT;
- }
- } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
- return LINE_COMMENT;
- } else {
- return NO_COMMENT;
- }
-}
-
// -------------------------------------------------------------------
bool Tokenizer::Next() {
- previous_ = current_;
+ TokenType last_token_type = current_.type;
+
+ // Did we skip any characters after the last token?
+ bool skipped_stuff = false;
while (!read_error_) {
- ConsumeZeroOrMore<Whitespace>();
-
- switch (TryConsumeCommentStart()) {
- case LINE_COMMENT:
- ConsumeLineComment(NULL);
- continue;
- case BLOCK_COMMENT:
- ConsumeBlockComment(NULL);
- continue;
- case SLASH_NOT_COMMENT:
+ if (TryConsumeOne<Whitespace>()) {
+ ConsumeZeroOrMore<Whitespace>();
+
+ } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
+ // Starting a comment?
+ if (TryConsume('/')) {
+ ConsumeLineComment();
+ } else if (TryConsume('*')) {
+ ConsumeBlockComment();
+ } else {
+ // Oops, it was just a slash. Return it.
+ current_.type = TYPE_SYMBOL;
+ current_.text = "/";
+ current_.line = line_;
+ current_.column = column_ - 1;
return true;
- case NO_COMMENT:
- break;
- }
+ }
- // Check for EOF before continuing.
- if (read_error_) break;
+ } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
+ ConsumeLineComment();
- if (LookingAt<Unprintable>() || current_char_ == '\0') {
+ } else if (LookingAt<Unprintable>() || current_char_ == '\0') {
AddError("Invalid control characters encountered in text.");
NextChar();
// Skip more unprintable characters, too. But, remember that '\0' is
@@ -608,9 +517,7 @@ bool Tokenizer::Next() {
if (TryConsumeOne<Digit>()) {
// It's a floating-point number.
- if (previous_.type == TYPE_IDENTIFIER &&
- current_.line == previous_.line &&
- current_.column == previous_.end_column) {
+ if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) {
// We don't accept syntax like "blah.123".
error_collector_->AddError(line_, column_ - 2,
"Need space between identifier and decimal point.");
@@ -628,12 +535,6 @@ bool Tokenizer::Next() {
ConsumeString('\'');
current_.type = TYPE_STRING;
} else {
- // Check if the high order bit is set.
- if (current_char_ & 0x80) {
- error_collector_->AddError(line_, column_,
- StringPrintf("Interpreting non ascii codepoint %d.",
- static_cast<unsigned char>(current_char_)));
- }
NextChar();
current_.type = TYPE_SYMBOL;
}
@@ -641,6 +542,8 @@ bool Tokenizer::Next() {
EndToken();
return true;
}
+
+ skipped_stuff = true;
}
// EOF
@@ -648,199 +551,9 @@ bool Tokenizer::Next() {
current_.text.clear();
current_.line = line_;
current_.column = column_;
- current_.end_column = column_;
return false;
}
-namespace {
-
-// Helper class for collecting comments and putting them in the right places.
-//
-// This basically just buffers the most recent comment until it can be decided
-// exactly where that comment should be placed. When Flush() is called, the
-// current comment goes into either prev_trailing_comments or detached_comments.
-// When the CommentCollector is destroyed, the last buffered comment goes into
-// next_leading_comments.
-class CommentCollector {
- public:
- CommentCollector(string* prev_trailing_comments,
- vector<string>* detached_comments,
- string* next_leading_comments)
- : prev_trailing_comments_(prev_trailing_comments),
- detached_comments_(detached_comments),
- next_leading_comments_(next_leading_comments),
- has_comment_(false),
- is_line_comment_(false),
- can_attach_to_prev_(true) {
- if (prev_trailing_comments != NULL) prev_trailing_comments->clear();
- if (detached_comments != NULL) detached_comments->clear();
- if (next_leading_comments != NULL) next_leading_comments->clear();
- }
-
- ~CommentCollector() {
- // Whatever is in the buffer is a leading comment.
- if (next_leading_comments_ != NULL && has_comment_) {
- comment_buffer_.swap(*next_leading_comments_);
- }
- }
-
- // About to read a line comment. Get the comment buffer pointer in order to
- // read into it.
- string* GetBufferForLineComment() {
- // We want to combine with previous line comments, but not block comments.
- if (has_comment_ && !is_line_comment_) {
- Flush();
- }
- has_comment_ = true;
- is_line_comment_ = true;
- return &comment_buffer_;
- }
-
- // About to read a block comment. Get the comment buffer pointer in order to
- // read into it.
- string* GetBufferForBlockComment() {
- if (has_comment_) {
- Flush();
- }
- has_comment_ = true;
- is_line_comment_ = false;
- return &comment_buffer_;
- }
-
- void ClearBuffer() {
- comment_buffer_.clear();
- has_comment_ = false;
- }
-
- // Called once we know that the comment buffer is complete and is *not*
- // connected to the next token.
- void Flush() {
- if (has_comment_) {
- if (can_attach_to_prev_) {
- if (prev_trailing_comments_ != NULL) {
- prev_trailing_comments_->append(comment_buffer_);
- }
- can_attach_to_prev_ = false;
- } else {
- if (detached_comments_ != NULL) {
- detached_comments_->push_back(comment_buffer_);
- }
- }
- ClearBuffer();
- }
- }
-
- void DetachFromPrev() {
- can_attach_to_prev_ = false;
- }
-
- private:
- string* prev_trailing_comments_;
- vector<string>* detached_comments_;
- string* next_leading_comments_;
-
- string comment_buffer_;
-
- // True if any comments were read into comment_buffer_. This can be true even
- // if comment_buffer_ is empty, namely if the comment was "/**/".
- bool has_comment_;
-
- // Is the comment in the comment buffer a line comment?
- bool is_line_comment_;
-
- // Is it still possible that we could be reading a comment attached to the
- // previous token?
- bool can_attach_to_prev_;
-};
-
-} // namespace
-
-bool Tokenizer::NextWithComments(string* prev_trailing_comments,
- vector<string>* detached_comments,
- string* next_leading_comments) {
- CommentCollector collector(prev_trailing_comments, detached_comments,
- next_leading_comments);
-
- if (current_.type == TYPE_START) {
- collector.DetachFromPrev();
- } else {
- // A comment appearing on the same line must be attached to the previous
- // declaration.
- ConsumeZeroOrMore<WhitespaceNoNewline>();
- switch (TryConsumeCommentStart()) {
- case LINE_COMMENT:
- ConsumeLineComment(collector.GetBufferForLineComment());
-
- // Don't allow comments on subsequent lines to be attached to a trailing
- // comment.
- collector.Flush();
- break;
- case BLOCK_COMMENT:
- ConsumeBlockComment(collector.GetBufferForBlockComment());
-
- ConsumeZeroOrMore<WhitespaceNoNewline>();
- if (!TryConsume('\n')) {
- // Oops, the next token is on the same line. If we recorded a comment
- // we really have no idea which token it should be attached to.
- collector.ClearBuffer();
- return Next();
- }
-
- // Don't allow comments on subsequent lines to be attached to a trailing
- // comment.
- collector.Flush();
- break;
- case SLASH_NOT_COMMENT:
- return true;
- case NO_COMMENT:
- if (!TryConsume('\n')) {
- // The next token is on the same line. There are no comments.
- return Next();
- }
- break;
- }
- }
-
- // OK, we are now on the line *after* the previous token.
- while (true) {
- ConsumeZeroOrMore<WhitespaceNoNewline>();
-
- switch (TryConsumeCommentStart()) {
- case LINE_COMMENT:
- ConsumeLineComment(collector.GetBufferForLineComment());
- break;
- case BLOCK_COMMENT:
- ConsumeBlockComment(collector.GetBufferForBlockComment());
-
- // Consume the rest of the line so that we don't interpret it as a
- // blank line the next time around the loop.
- ConsumeZeroOrMore<WhitespaceNoNewline>();
- TryConsume('\n');
- break;
- case SLASH_NOT_COMMENT:
- return true;
- case NO_COMMENT:
- if (TryConsume('\n')) {
- // Completely blank line.
- collector.Flush();
- collector.DetachFromPrev();
- } else {
- bool result = Next();
- if (!result ||
- current_.text == "}" ||
- current_.text == "]" ||
- current_.text == ")") {
- // It looks like we're at the end of a scope. In this case it
- // makes no sense to attach a comment to the following token.
- collector.Flush();
- }
- return result;
- }
- break;
- }
- }
-}
-
// -------------------------------------------------------------------
// Token-parsing helpers. Remember that these don't need to report
// errors since any errors should already have been reported while
@@ -910,138 +623,17 @@ double Tokenizer::ParseFloat(const string& text) {
return result;
}
-// Helper to append a Unicode code point to a string as UTF8, without bringing
-// in any external dependencies.
-static void AppendUTF8(uint32 code_point, string* output) {
- uint32 tmp = 0;
- int len = 0;
- if (code_point <= 0x7f) {
- tmp = code_point;
- len = 1;
- } else if (code_point <= 0x07ff) {
- tmp = 0x0000c080 |
- ((code_point & 0x07c0) << 2) |
- (code_point & 0x003f);
- len = 2;
- } else if (code_point <= 0xffff) {
- tmp = 0x00e08080 |
- ((code_point & 0xf000) << 4) |
- ((code_point & 0x0fc0) << 2) |
- (code_point & 0x003f);
- len = 3;
- } else if (code_point <= 0x1fffff) {
- tmp = 0xf0808080 |
- ((code_point & 0x1c0000) << 6) |
- ((code_point & 0x03f000) << 4) |
- ((code_point & 0x000fc0) << 2) |
- (code_point & 0x003f);
- len = 4;
- } else {
- // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
- // normally only defined up to there as well.
- StringAppendF(output, "\\U%08x", code_point);
- return;
- }
- tmp = ghtonl(tmp);
- output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
-}
-
-// Try to read <len> hex digits from ptr, and stuff the numeric result into
-// *result. Returns true if that many digits were successfully consumed.
-static bool ReadHexDigits(const char* ptr, int len, uint32* result) {
- *result = 0;
- if (len == 0) return false;
- for (const char* end = ptr + len; ptr < end; ++ptr) {
- if (*ptr == '\0') return false;
- *result = (*result << 4) + DigitValue(*ptr);
- }
- return true;
-}
-
-// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range
-// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail
-// surrogate. These numbers are in a reserved range of Unicode code points, so
-// if we encounter such a pair we know how to parse it and convert it into a
-// single code point.
-static const uint32 kMinHeadSurrogate = 0xd800;
-static const uint32 kMaxHeadSurrogate = 0xdc00;
-static const uint32 kMinTrailSurrogate = 0xdc00;
-static const uint32 kMaxTrailSurrogate = 0xe000;
-
-static inline bool IsHeadSurrogate(uint32 code_point) {
- return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate);
-}
-
-static inline bool IsTrailSurrogate(uint32 code_point) {
- return (code_point >= kMinTrailSurrogate) &&
- (code_point < kMaxTrailSurrogate);
-}
-
-// Combine a head and trail surrogate into a single Unicode code point.
-static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) {
- GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate));
- GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate));
- return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) |
- (trail_surrogate - kMinTrailSurrogate));
-}
-
-// Convert the escape sequence parameter to a number of expected hex digits.
-static inline int UnicodeLength(char key) {
- if (key == 'u') return 4;
- if (key == 'U') return 8;
- return 0;
-}
-
-// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
-// to parse that sequence. On success, returns a pointer to the first char
-// beyond that sequence, and fills in *code_point. On failure, returns ptr
-// itself.
-static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) {
- const char* p = ptr;
- // Fetch the code point.
- const int len = UnicodeLength(*p++);
- if (!ReadHexDigits(p, len, code_point))
- return ptr;
- p += len;
-
- // Check if the code point we read is a "head surrogate." If so, then we
- // expect it to be immediately followed by another code point which is a valid
- // "trail surrogate," and together they form a UTF-16 pair which decodes into
- // a single Unicode point. Trail surrogates may only use \u, not \U.
- if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') {
- uint32 trail_surrogate;
- if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
- IsTrailSurrogate(trail_surrogate)) {
- *code_point = AssembleUTF16(*code_point, trail_surrogate);
- p += 6;
- }
- // If this failed, then we just emit the head surrogate as a code point.
- // It's bogus, but so is the string.
- }
-
- return p;
-}
-
-// The text string must begin and end with single or double quote
-// characters.
void Tokenizer::ParseStringAppend(const string& text, string* output) {
- // Reminder: text[0] is always a quote character. (If text is
- // empty, it's invalid, so we'll just return).
- const size_t text_size = text.size();
- if (text_size == 0) {
+ // Reminder: text[0] is always the quote character. (If text is
+ // empty, it's invalid, so we'll just return.)
+ if (text.empty()) {
GOOGLE_LOG(DFATAL)
<< " Tokenizer::ParseStringAppend() passed text that could not"
" have been tokenized as a string: " << CEscape(text);
return;
}
- // Reserve room for new string. The branch is necessary because if
- // there is already space available the reserve() call might
- // downsize the output.
- const size_t new_len = text_size + output->size();
- if (new_len > output->capacity()) {
- output->reserve(new_len);
- }
+ output->reserve(output->size() + text.size());
// Loop through the string copying characters to "output" and
// interpreting escape sequences. Note that any invalid escape
@@ -1079,47 +671,19 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
}
output->push_back(static_cast<char>(code));
- } else if (*ptr == 'u' || *ptr == 'U') {
- uint32 unicode;
- const char* end = FetchUnicodePoint(ptr, &unicode);
- if (end == ptr) {
- // Failure: Just dump out what we saw, don't try to parse it.
- output->push_back(*ptr);
- } else {
- AppendUTF8(unicode, output);
- ptr = end - 1; // Because we're about to ++ptr.
- }
} else {
// Some other escape code.
output->push_back(TranslateEscape(*ptr));
}
- } else if (*ptr == text[0] && ptr[1] == '\0') {
- // Ignore final quote matching the starting quote.
+ } else if (*ptr == text[0]) {
+ // Ignore quote matching the starting quote.
} else {
output->push_back(*ptr);
}
}
-}
-template<typename CharacterClass>
-static bool AllInClass(const string& s) {
- for (int i = 0; i < s.size(); ++i) {
- if (!CharacterClass::InClass(s[i]))
- return false;
- }
- return true;
-}
-
-bool Tokenizer::IsIdentifier(const string& text) {
- // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
- if (text.size() == 0)
- return false;
- if (!Letter::InClass(text.at(0)))
- return false;
- if (!AllInClass<Alphanumeric>(text.substr(1)))
- return false;
- return true;
+ return;
}
} // namespace io
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
index 8c6220a..d115161 100644
--- a/src/google/protobuf/io/tokenizer.h
+++ b/src/google/protobuf/io/tokenizer.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -38,7 +38,6 @@
#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__
#include <string>
-#include <vector>
#include <google/protobuf/stubs/common.h>
namespace google {
@@ -67,8 +66,7 @@ class LIBPROTOBUF_EXPORT ErrorCollector {
// Indicates that there was a warning in the input at the given line and
// column numbers. The numbers are zero-based, so you may want to add
// 1 to each before printing them.
- virtual void AddWarning(int /* line */, int /* column */,
- const string& /* message */) { }
+ virtual void AddWarning(int line, int column, const string& message) { }
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
@@ -124,68 +122,16 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// the token within the input stream. They are zero-based.
int line;
int column;
- int end_column;
};
// Get the current token. This is updated when Next() is called. Before
// the first call to Next(), current() has type TYPE_START and no contents.
const Token& current();
- // Return the previous token -- i.e. what current() returned before the
- // previous call to Next().
- const Token& previous();
-
// Advance to the next token. Returns false if the end of the input is
// reached.
bool Next();
- // Like Next(), but also collects comments which appear between the previous
- // and next tokens.
- //
- // Comments which appear to be attached to the previous token are stored
- // in *prev_tailing_comments. Comments which appear to be attached to the
- // next token are stored in *next_leading_comments. Comments appearing in
- // between which do not appear to be attached to either will be added to
- // detached_comments. Any of these parameters can be NULL to simply discard
- // the comments.
- //
- // A series of line comments appearing on consecutive lines, with no other
- // tokens appearing on those lines, will be treated as a single comment.
- //
- // Only the comment content is returned; comment markers (e.g. //) are
- // stripped out. For block comments, leading whitespace and an asterisk will
- // be stripped from the beginning of each line other than the first. Newlines
- // are included in the output.
- //
- // Examples:
- //
- // optional int32 foo = 1; // Comment attached to foo.
- // // Comment attached to bar.
- // optional int32 bar = 2;
- //
- // optional string baz = 3;
- // // Comment attached to baz.
- // // Another line attached to baz.
- //
- // // Comment attached to qux.
- // //
- // // Another line attached to qux.
- // optional double qux = 4;
- //
- // // Detached comment. This is not attached to qux or corge
- // // because there are blank lines separating it from both.
- //
- // optional string corge = 5;
- // /* Block comment attached
- // * to corge. Leading asterisks
- // * will be removed. */
- // /* Block comment attached to
- // * grault. */
- // optional int32 grault = 6;
- bool NextWithComments(string* prev_trailing_comments,
- vector<string>* detached_comments,
- string* next_leading_comments);
-
// Parse helpers ---------------------------------------------------
// Parses a TYPE_FLOAT token. This never fails, so long as the text actually
@@ -229,27 +175,11 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Sets the comment style.
void set_comment_style(CommentStyle style) { comment_style_ = style; }
- // Whether to require whitespace between a number and a field name.
- // Default is true. Do not use this; for Google-internal cleanup only.
- void set_require_space_after_number(bool require) {
- require_space_after_number_ = require;
- }
-
- // Whether to allow string literals to span multiple lines. Default is false.
- // Do not use this; for Google-internal cleanup only.
- void set_allow_multiline_strings(bool allow) {
- allow_multiline_strings_ = allow;
- }
-
- // External helper: validate an identifier.
- static bool IsIdentifier(const string& text);
-
// -----------------------------------------------------------------
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
Token current_; // Returned by current().
- Token previous_; // Returned by previous().
ZeroCopyInputStream* input_;
ErrorCollector* error_collector_;
@@ -264,18 +194,15 @@ class LIBPROTOBUF_EXPORT Tokenizer {
int line_;
int column_;
- // String to which text should be appended as we advance through it.
- // Call RecordTo(&str) to start recording and StopRecording() to stop.
- // E.g. StartToken() calls RecordTo(&current_.text). record_start_ is the
- // position within the current buffer where recording started.
- string* record_target_;
- int record_start_;
+ // Position in buffer_ where StartToken() was called. If the token
+ // started in the previous buffer, this is zero, and current_.text already
+ // contains the part of the token from the previous buffer. If not
+ // currently parsing a token, this is -1.
+ int token_start_;
// Options.
bool allow_f_after_float_;
CommentStyle comment_style_;
- bool require_space_after_number_;
- bool allow_multiline_strings_;
// Since we count columns we need to interpret tabs somehow. We'll take
// the standard 8-character definition for lack of any way to do better.
@@ -290,9 +217,6 @@ class LIBPROTOBUF_EXPORT Tokenizer {
// Read a new buffer from the input.
void Refresh();
- inline void RecordTo(string* target);
- inline void StopRecording();
-
// Called when the current character is the first character of a new
// token (not including whitespace or comments).
inline void StartToken();
@@ -325,28 +249,9 @@ class LIBPROTOBUF_EXPORT Tokenizer {
TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot);
// Consume the rest of a line.
- void ConsumeLineComment(string* content);
+ void ConsumeLineComment();
// Consume until "*/".
- void ConsumeBlockComment(string* content);
-
- enum NextCommentStatus {
- // Started a line comment.
- LINE_COMMENT,
-
- // Started a block comment.
- BLOCK_COMMENT,
-
- // Consumed a slash, then realized it wasn't a comment. current_ has
- // been filled in with a slash token. The caller should return it.
- SLASH_NOT_COMMENT,
-
- // We do not appear to be starting a comment here.
- NO_COMMENT
- };
-
- // If we're at the start of a new comment, consume it and return what kind
- // of comment it is.
- NextCommentStatus TryConsumeCommentStart();
+ void ConsumeBlockComment();
// -----------------------------------------------------------------
// These helper methods make the parsing code more readable. The
@@ -386,10 +291,6 @@ inline const Tokenizer::Token& Tokenizer::current() {
return current_;
}
-inline const Tokenizer::Token& Tokenizer::previous() {
- return previous_;
-}
-
inline void Tokenizer::ParseString(const string& text, string* output) {
output->clear();
ParseStringAppend(text, output);
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index de096fb..358ec56 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -32,10 +32,9 @@
// Based on original Protocol Buffers design by
// Sanjay Ghemawat, Jeff Dean, and others.
-#include <limits.h>
-#include <math.h>
-
#include <vector>
+#include <math.h>
+#include <limits.h>
#include <google/protobuf/io/tokenizer.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -258,7 +257,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
EXPECT_EQ("", tokenizer.current().text);
EXPECT_EQ(0, tokenizer.current().line);
EXPECT_EQ(0, tokenizer.current().column);
- EXPECT_EQ(0, tokenizer.current().end_column);
// Parse the token.
ASSERT_TRUE(tokenizer.Next());
@@ -270,8 +268,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
// Check that it is located at the beginning of the input
EXPECT_EQ(0, tokenizer.current().line);
EXPECT_EQ(0, tokenizer.current().column);
- EXPECT_EQ(kSimpleTokenCases_case.input.size(),
- tokenizer.current().end_column);
// There should be no more input.
EXPECT_FALSE(tokenizer.Next());
@@ -281,8 +277,6 @@ TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
EXPECT_EQ("", tokenizer.current().text);
EXPECT_EQ(0, tokenizer.current().line);
EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
- EXPECT_EQ(kSimpleTokenCases_case.input.size(),
- tokenizer.current().end_column);
// There should be no errors.
EXPECT_TRUE(error_collector.text_.empty());
@@ -345,77 +339,76 @@ MultiTokenCase kMultiTokenCases[] = {
// Test all token types at the same time.
{ "foo 1 1.2 + 'bar'", {
- { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0, 3 },
- { Tokenizer::TYPE_INTEGER , "1" , 0, 4, 5 },
- { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6, 9 },
- { Tokenizer::TYPE_SYMBOL , "+" , 0, 10, 11 },
- { Tokenizer::TYPE_STRING , "'bar'", 0, 12, 17 },
- { Tokenizer::TYPE_END , "" , 0, 17, 17 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0 },
+ { Tokenizer::TYPE_INTEGER , "1" , 0, 4 },
+ { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 10 },
+ { Tokenizer::TYPE_STRING , "'bar'", 0, 12 },
+ { Tokenizer::TYPE_END , "" , 0, 17 },
}},
// Test that consecutive symbols are parsed as separate tokens.
{ "!@+%", {
- { Tokenizer::TYPE_SYMBOL , "!" , 0, 0, 1 },
- { Tokenizer::TYPE_SYMBOL , "@" , 0, 1, 2 },
- { Tokenizer::TYPE_SYMBOL , "+" , 0, 2, 3 },
- { Tokenizer::TYPE_SYMBOL , "%" , 0, 3, 4 },
- { Tokenizer::TYPE_END , "" , 0, 4, 4 },
+ { Tokenizer::TYPE_SYMBOL , "!" , 0, 0 },
+ { Tokenizer::TYPE_SYMBOL , "@" , 0, 1 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 2 },
+ { Tokenizer::TYPE_SYMBOL , "%" , 0, 3 },
+ { Tokenizer::TYPE_END , "" , 0, 4 },
}},
// Test that newlines affect line numbers correctly.
{ "foo bar\nrab oof", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4, 7 },
- { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4, 7 },
- { Tokenizer::TYPE_END , "" , 1, 7, 7 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4 },
+ { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4 },
+ { Tokenizer::TYPE_END , "" , 1, 7 },
}},
// Test that tabs affect column numbers correctly.
{ "foo\tbar \tbaz", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8, 11 },
- { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19 },
- { Tokenizer::TYPE_END , "" , 0, 19, 19 },
- }},
-
- // Test that tabs in string literals affect column numbers correctly.
- { "\"foo\tbar\" baz", {
- { Tokenizer::TYPE_STRING , "\"foo\tbar\"", 0, 0, 12 },
- { Tokenizer::TYPE_IDENTIFIER, "baz" , 0, 13, 16 },
- { Tokenizer::TYPE_END , "" , 0, 16, 16 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 },
+ { Tokenizer::TYPE_END , "" , 0, 19 },
}},
// Test that line comments are ignored.
{ "foo // This is a comment\n"
"bar // This is another comment", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0, 3 },
- { Tokenizer::TYPE_END , "" , 1, 30, 30 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0 },
+ { Tokenizer::TYPE_END , "" , 1, 30 },
}},
// Test that block comments are ignored.
{ "foo /* This is a block comment */ bar", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37 },
- { Tokenizer::TYPE_END , "" , 0, 37, 37 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 },
+ { Tokenizer::TYPE_END , "" , 0, 37 },
}},
// Test that sh-style comments are not ignored by default.
{ "foo # bar\n"
"baz", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_SYMBOL , "#" , 0, 4, 5 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9 },
- { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3 },
- { Tokenizer::TYPE_END , "" , 1, 3, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_SYMBOL , "#" , 0, 4 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 },
+ { Tokenizer::TYPE_END , "" , 1, 3 },
+ }},
+
+ // Bytes with the high-order bit set should not be seen as control characters.
+ { "\300", {
+ { Tokenizer::TYPE_SYMBOL, "\300", 0, 0 },
+ { Tokenizer::TYPE_END , "" , 0, 1 },
}},
// Test all whitespace chars
{ "foo\n\t\r\v\fbar", {
- { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
- { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14 },
- { Tokenizer::TYPE_END , "" , 1, 14, 14 },
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11 },
+ { Tokenizer::TYPE_END , "" , 1, 14 },
}},
};
@@ -432,7 +425,6 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
EXPECT_EQ("", tokenizer.current().text);
EXPECT_EQ(0, tokenizer.current().line);
EXPECT_EQ(0, tokenizer.current().column);
- EXPECT_EQ(0, tokenizer.current().end_column);
// Loop through all expected tokens.
int i = 0;
@@ -442,8 +434,6 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);
- Tokenizer::Token previous = tokenizer.current();
-
// Next() should only return false when it hits the end token.
if (token.type != Tokenizer::TYPE_END) {
ASSERT_TRUE(tokenizer.Next());
@@ -451,19 +441,11 @@ TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
ASSERT_FALSE(tokenizer.Next());
}
- // Check that the previous token is set correctly.
- EXPECT_EQ(previous.type, tokenizer.previous().type);
- EXPECT_EQ(previous.text, tokenizer.previous().text);
- EXPECT_EQ(previous.line, tokenizer.previous().line);
- EXPECT_EQ(previous.column, tokenizer.previous().column);
- EXPECT_EQ(previous.end_column, tokenizer.previous().end_column);
-
// Check that the token matches the expected one.
EXPECT_EQ(token.type, tokenizer.current().type);
EXPECT_EQ(token.text, tokenizer.current().text);
EXPECT_EQ(token.line, tokenizer.current().line);
EXPECT_EQ(token.column, tokenizer.current().column);
- EXPECT_EQ(token.end_column, tokenizer.current().end_column);
} while (token.type != Tokenizer::TYPE_END);
@@ -509,217 +491,6 @@ TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
// -------------------------------------------------------------------
-// In each case, the input is expected to have two tokens named "prev" and
-// "next" with comments in between.
-struct DocCommentCase {
- string input;
-
- const char* prev_trailing_comments;
- const char* detached_comments[10];
- const char* next_leading_comments;
-};
-
-inline ostream& operator<<(ostream& out,
- const DocCommentCase& test_case) {
- return out << CEscape(test_case.input);
-}
-
-DocCommentCase kDocCommentCases[] = {
- {
- "prev next",
-
- "",
- {},
- ""
- },
-
- {
- "prev /* ignored */ next",
-
- "",
- {},
- ""
- },
-
- {
- "prev // trailing comment\n"
- "next",
-
- " trailing comment\n",
- {},
- ""
- },
-
- {
- "prev\n"
- "// leading comment\n"
- "// line 2\n"
- "next",
-
- "",
- {},
- " leading comment\n"
- " line 2\n"
- },
-
- {
- "prev\n"
- "// trailing comment\n"
- "// line 2\n"
- "\n"
- "next",
-
- " trailing comment\n"
- " line 2\n",
- {},
- ""
- },
-
- {
- "prev // trailing comment\n"
- "// leading comment\n"
- "// line 2\n"
- "next",
-
- " trailing comment\n",
- {},
- " leading comment\n"
- " line 2\n"
- },
-
- {
- "prev /* trailing block comment */\n"
- "/* leading block comment\n"
- " * line 2\n"
- " * line 3 */"
- "next",
-
- " trailing block comment ",
- {},
- " leading block comment\n"
- " line 2\n"
- " line 3 "
- },
-
- {
- "prev\n"
- "/* trailing block comment\n"
- " * line 2\n"
- " * line 3\n"
- " */\n"
- "/* leading block comment\n"
- " * line 2\n"
- " * line 3 */"
- "next",
-
- " trailing block comment\n"
- " line 2\n"
- " line 3\n",
- {},
- " leading block comment\n"
- " line 2\n"
- " line 3 "
- },
-
- {
- "prev\n"
- "// trailing comment\n"
- "\n"
- "// detached comment\n"
- "// line 2\n"
- "\n"
- "// second detached comment\n"
- "/* third detached comment\n"
- " * line 2 */\n"
- "// leading comment\n"
- "next",
-
- " trailing comment\n",
- {
- " detached comment\n"
- " line 2\n",
- " second detached comment\n",
- " third detached comment\n"
- " line 2 "
- },
- " leading comment\n"
- },
-
- {
- "prev /**/\n"
- "\n"
- "// detached comment\n"
- "\n"
- "// leading comment\n"
- "next",
-
- "",
- {
- " detached comment\n"
- },
- " leading comment\n"
- },
-
- {
- "prev /**/\n"
- "// leading comment\n"
- "next",
-
- "",
- {},
- " leading comment\n"
- },
- };
-
-TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
- // Set up the tokenizer.
- TestInputStream input(kDocCommentCases_case.input.data(),
- kDocCommentCases_case.input.size(),
- kBlockSizes_case);
- TestErrorCollector error_collector;
- Tokenizer tokenizer(&input, &error_collector);
-
- // Set up a second tokenizer where we'll pass all NULLs to NextWithComments().
- TestInputStream input2(kDocCommentCases_case.input.data(),
- kDocCommentCases_case.input.size(),
- kBlockSizes_case);
- Tokenizer tokenizer2(&input2, &error_collector);
-
- tokenizer.Next();
- tokenizer2.Next();
-
- EXPECT_EQ("prev", tokenizer.current().text);
- EXPECT_EQ("prev", tokenizer2.current().text);
-
- string prev_trailing_comments;
- vector<string> detached_comments;
- string next_leading_comments;
- tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
- &next_leading_comments);
- tokenizer2.NextWithComments(NULL, NULL, NULL);
- EXPECT_EQ("next", tokenizer.current().text);
- EXPECT_EQ("next", tokenizer2.current().text);
-
- EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
- prev_trailing_comments);
-
- for (int i = 0; i < detached_comments.size(); i++) {
- ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases));
- ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
- EXPECT_EQ(kDocCommentCases_case.detached_comments[i],
- detached_comments[i]);
- }
-
- // Verify that we matched all the detached comments.
- EXPECT_EQ(NULL,
- kDocCommentCases_case.detached_comments[detached_comments.size()]);
-
- EXPECT_EQ(kDocCommentCases_case.next_leading_comments,
- next_leading_comments);
-}
-
-// -------------------------------------------------------------------
-
// Test parse helpers. It's not really worth setting up a full data-driven
// test here.
TEST_F(TokenizerTest, ParseInteger) {
@@ -735,7 +506,7 @@ TEST_F(TokenizerTest, ParseInteger) {
EXPECT_EQ(0, ParseInteger("0x"));
uint64 i;
-#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
// Test invalid integers that will never be tokenized as integers.
EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
"passed text that could not have been tokenized as an integer");
@@ -747,7 +518,7 @@ TEST_F(TokenizerTest, ParseInteger) {
"passed text that could not have been tokenized as an integer");
EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
"passed text that could not have been tokenized as an integer");
-#endif // PROTOBUF_HAS_DEATH_TEST
+#endif // GTEST_HAS_DEATH_TEST
// Test overflows.
EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
@@ -790,7 +561,7 @@ TEST_F(TokenizerTest, ParseFloat) {
EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
-#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
// Test invalid integers that will never be tokenized as integers.
EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
"passed text that could not have been tokenized as a float");
@@ -798,7 +569,7 @@ TEST_F(TokenizerTest, ParseFloat) {
"passed text that could not have been tokenized as a float");
EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
"passed text that could not have been tokenized as a float");
-#endif // PROTOBUF_HAS_DEATH_TEST
+#endif // GTEST_HAS_DEATH_TEST
}
TEST_F(TokenizerTest, ParseString) {
@@ -820,27 +591,11 @@ TEST_F(TokenizerTest, ParseString) {
Tokenizer::ParseString("'\\", &output);
EXPECT_EQ("\\", output);
- // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode
- // characters.
- Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output);
- EXPECT_EQ("$¢€𤭢XX", output);
- // Same thing encoded using UTF16.
- Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output);
- EXPECT_EQ("$¢€𤭢XX", output);
- // Here's some broken UTF16; there's a head surrogate with no tail surrogate.
- // We just output this as if it were UTF8; it's not a defined code point, but
- // it has a defined encoding.
- Tokenizer::ParseString("'\\ud852XX'", &output);
- EXPECT_EQ("\xed\xa1\x92XX", output);
- // Malformed escape: Demons may fly out of the nose.
- Tokenizer::ParseString("\\u0", &output);
- EXPECT_EQ("u0", output);
-
// Test invalid strings that will never be tokenized as strings.
-#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
"passed text that could not have been tokenized as a string");
-#endif // PROTOBUF_HAS_DEATH_TEST
+#endif // GTEST_HAS_DEATH_TEST
}
TEST_F(TokenizerTest, ParseStringAppend) {
@@ -877,15 +632,9 @@ ErrorCase kErrorCases[] = {
{ "'\\x' foo", true,
"0:3: Expected hex digits for escape sequence.\n" },
{ "'foo", false,
- "0:4: Unexpected end of string.\n" },
+ "0:4: String literals cannot cross line boundaries.\n" },
{ "'bar\nfoo", true,
"0:4: String literals cannot cross line boundaries.\n" },
- { "'\\u01' foo", true,
- "0:5: Expected four hex digits for \\u escape sequence.\n" },
- { "'\\u01' foo", true,
- "0:5: Expected four hex digits for \\u escape sequence.\n" },
- { "'\\uXYZ' foo", true,
- "0:3: Expected four hex digits for \\u escape sequence.\n" },
// Integer errors.
{ "123foo", true,
@@ -945,10 +694,6 @@ ErrorCase kErrorCases[] = {
"0:0: Invalid control characters encountered in text.\n" },
{ string("\0\0foo", 5), true,
"0:0: Invalid control characters encountered in text.\n" },
-
- // Check error from high order bits set
- { "\300foo", true,
- "0:0: Interpreting non ascii codepoint 192.\n" },
};
TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
@@ -966,7 +711,7 @@ TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
}
// Check that the errors match what was expected.
- EXPECT_EQ(kErrorCases_case.errors, error_collector.text_);
+ EXPECT_EQ(error_collector.text_, kErrorCases_case.errors);
// If the error was recoverable, make sure we saw "foo" after it.
if (kErrorCases_case.recoverable) {
@@ -992,7 +737,6 @@ TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
EXPECT_EQ(strlen("foo"), input.ByteCount());
}
-
} // namespace
} // namespace io
} // namespace protobuf
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
index f77c768..dad6ff1 100644
--- a/src/google/protobuf/io/zero_copy_stream.cc
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -34,7 +34,6 @@
#include <google/protobuf/io/zero_copy_stream.h>
-#include <google/protobuf/stubs/common.h>
namespace google {
namespace protobuf {
@@ -44,14 +43,6 @@ ZeroCopyInputStream::~ZeroCopyInputStream() {}
ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
-bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */,
- int /* size */) {
- GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. "
- "Reaching here usually means a ZeroCopyOutputStream "
- "implementation bug.";
- return false;
-}
-
} // namespace io
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
index 52650fc..db5326f 100644
--- a/src/google/protobuf/io/zero_copy_stream.h
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -226,16 +226,6 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
// Returns the total number of bytes written since this object was created.
virtual int64 ByteCount() const = 0;
- // Write a given chunk of data to the output. Some output streams may
- // implement this in a way that avoids copying. Check AllowsAliasing() before
- // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is
- // called on a stream that does not allow aliasing.
- //
- // NOTE: It is caller's responsibility to ensure that the chunk of memory
- // remains live until all of the data has been consumed from the stream.
- virtual bool WriteAliasedRaw(const void* data, int size);
- virtual bool AllowsAliasing() const { return false; }
-
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
index f7901b2..1384c74 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -46,8 +46,7 @@
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
-
+#include <google/protobuf/stubs/stl_util-inl.h>
namespace google {
namespace protobuf {
@@ -413,9 +412,7 @@ int64 ConcatenatingInputStream::ByteCount() const {
LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
int64 limit)
- : input_(input), limit_(limit) {
- prior_bytes_read_ = input_->ByteCount();
-}
+ : input_(input), limit_(limit) {}
LimitingInputStream::~LimitingInputStream() {
// If we overshot the limit, back up.
@@ -459,9 +456,9 @@ bool LimitingInputStream::Skip(int count) {
int64 LimitingInputStream::ByteCount() const {
if (limit_ < 0) {
- return input_->ByteCount() + limit_ - prior_bytes_read_;
+ return input_->ByteCount() + limit_;
} else {
- return input_->ByteCount() - prior_bytes_read_;
+ return input_->ByteCount();
}
}
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
index 0746fa6..9fedb00 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -344,7 +344,6 @@ class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
private:
ZeroCopyInputStream* input_;
int64 limit_; // Decreases as we go, becomes negative if we overshoot.
- int64 prior_bytes_read_; // Bytes read on underlying stream at construction
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
};
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
index 58aff0e..e801251 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -32,13 +32,9 @@
// Based on original Protocol Buffers design by
// Sanjay Ghemawat, Jeff Dean, and others.
-#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
-
-#include <algorithm>
-#include <limits>
-
+#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
+#include <google/protobuf/stubs/stl_util-inl.h>
namespace google {
namespace protobuf {
@@ -163,23 +159,15 @@ bool StringOutputStream::Next(void** data, int* size) {
// without a memory allocation this way.
STLStringResizeUninitialized(target_, target_->capacity());
} else {
- // Size has reached capacity, try to double the size.
- if (old_size > std::numeric_limits<int>::max() / 2) {
- // Can not double the size otherwise it is going to cause integer
- // overflow in the expression below: old_size * 2 ";
- GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for "
- << "StringOutputStream.";
- return false;
- }
- // Double the size, also make sure that the new size is at least
- // kMinimumSize.
+ // Size has reached capacity, so double the size. Also make sure
+ // that the new size is at least kMinimumSize.
STLStringResizeUninitialized(
target_,
max(old_size * 2,
kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness.
}
- *data = mutable_string_data(target_) + old_size;
+ *data = string_as_array(target_) + old_size;
*size = target_->size() - old_size;
return true;
}
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
index e18da72..153f543 100644
--- a/src/google/protobuf/io/zero_copy_stream_impl_lite.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -48,7 +48,6 @@
#include <iosfwd>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stl_util.h>
namespace google {
@@ -334,19 +333,6 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea
// ===================================================================
-// Return a pointer to mutable characters underlying the given string. The
-// return value is valid until the next time the string is resized. We
-// trust the caller to treat the return value as an array of length s->size().
-inline char* mutable_string_data(string* s) {
-#ifdef LANG_CXX11
- // This should be simpler & faster than string_as_array() because the latter
- // is guaranteed to return NULL when *s is empty, so it has to check for that.
- return &(*s)[0];
-#else
- return string_as_array(s);
-#endif
-}
-
} // namespace io
} // namespace protobuf
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
index bf978cc..8229ee6 100644
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -61,7 +61,6 @@
#include <sstream>
#include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/io/coded_stream.h>
#if HAVE_ZLIB
#include <google/protobuf/io/gzip_stream.h>
@@ -286,57 +285,6 @@ TEST_F(IoTest, ArrayIo) {
}
}
-TEST_F(IoTest, TwoSessionWrite) {
- // Test that two concatenated write sessions read correctly
-
- static const char* strA = "0123456789";
- static const char* strB = "WhirledPeas";
- const int kBufferSize = 2*1024;
- uint8* buffer = new uint8[kBufferSize];
- char* temp_buffer = new char[40];
-
- for (int i = 0; i < kBlockSizeCount; i++) {
- for (int j = 0; j < kBlockSizeCount; j++) {
- ArrayOutputStream* output =
- new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
- CodedOutputStream* coded_output = new CodedOutputStream(output);
- coded_output->WriteVarint32(strlen(strA));
- coded_output->WriteRaw(strA, strlen(strA));
- delete coded_output; // flush
- int64 pos = output->ByteCount();
- delete output;
- output = new ArrayOutputStream(
- buffer + pos, kBufferSize - pos, kBlockSizes[i]);
- coded_output = new CodedOutputStream(output);
- coded_output->WriteVarint32(strlen(strB));
- coded_output->WriteRaw(strB, strlen(strB));
- delete coded_output; // flush
- int64 size = pos + output->ByteCount();
- delete output;
-
- ArrayInputStream* input =
- new ArrayInputStream(buffer, size, kBlockSizes[j]);
- CodedInputStream* coded_input = new CodedInputStream(input);
- uint32 insize;
- EXPECT_TRUE(coded_input->ReadVarint32(&insize));
- EXPECT_EQ(strlen(strA), insize);
- EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
- EXPECT_EQ(0, memcmp(temp_buffer, strA, insize));
-
- EXPECT_TRUE(coded_input->ReadVarint32(&insize));
- EXPECT_EQ(strlen(strB), insize);
- EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
- EXPECT_EQ(0, memcmp(temp_buffer, strB, insize));
-
- delete coded_input;
- delete input;
- }
- }
-
- delete [] temp_buffer;
- delete [] buffer;
-}
-
#if HAVE_ZLIB
TEST_F(IoTest, GzipIo) {
const int kBufferSize = 2*1024;
@@ -348,49 +296,9 @@ TEST_F(IoTest, GzipIo) {
int size;
{
ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::GZIP;
- if (gzip_buffer_size != -1) {
- options.buffer_size = gzip_buffer_size;
- }
- GzipOutputStream gzout(&output, options);
- WriteStuff(&gzout);
- gzout.Close();
- size = output.ByteCount();
- }
- {
- ArrayInputStream input(buffer, size, kBlockSizes[j]);
- GzipInputStream gzin(
- &input, GzipInputStream::GZIP, gzip_buffer_size);
- ReadStuff(&gzin);
- }
- }
- }
- }
- delete [] buffer;
-}
-
-TEST_F(IoTest, GzipIoWithFlush) {
- const int kBufferSize = 2*1024;
- uint8* buffer = new uint8[kBufferSize];
- // We start with i = 4 as we want a block size > 6. With block size <= 6
- // Flush() fills up the entire 2K buffer with flush markers and the test
- // fails. See documentation for Flush() for more detail.
- for (int i = 4; i < kBlockSizeCount; i++) {
- for (int j = 0; j < kBlockSizeCount; j++) {
- for (int z = 0; z < kBlockSizeCount; z++) {
- int gzip_buffer_size = kBlockSizes[z];
- int size;
- {
- ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::GZIP;
- if (gzip_buffer_size != -1) {
- options.buffer_size = gzip_buffer_size;
- }
- GzipOutputStream gzout(&output, options);
+ GzipOutputStream gzout(
+ &output, GzipOutputStream::GZIP, gzip_buffer_size);
WriteStuff(&gzout);
- EXPECT_TRUE(gzout.Flush());
gzout.Close();
size = output.ByteCount();
}
@@ -406,64 +314,6 @@ TEST_F(IoTest, GzipIoWithFlush) {
delete [] buffer;
}
-TEST_F(IoTest, GzipIoContiguousFlushes) {
- const int kBufferSize = 2*1024;
- uint8* buffer = new uint8[kBufferSize];
-
- int block_size = kBlockSizes[4];
- int gzip_buffer_size = block_size;
- int size;
-
- ArrayOutputStream output(buffer, kBufferSize, block_size);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::GZIP;
- if (gzip_buffer_size != -1) {
- options.buffer_size = gzip_buffer_size;
- }
- GzipOutputStream gzout(&output, options);
- WriteStuff(&gzout);
- EXPECT_TRUE(gzout.Flush());
- EXPECT_TRUE(gzout.Flush());
- gzout.Close();
- size = output.ByteCount();
-
- ArrayInputStream input(buffer, size, block_size);
- GzipInputStream gzin(
- &input, GzipInputStream::GZIP, gzip_buffer_size);
- ReadStuff(&gzin);
-
- delete [] buffer;
-}
-
-TEST_F(IoTest, GzipIoReadAfterFlush) {
- const int kBufferSize = 2*1024;
- uint8* buffer = new uint8[kBufferSize];
-
- int block_size = kBlockSizes[4];
- int gzip_buffer_size = block_size;
- int size;
- ArrayOutputStream output(buffer, kBufferSize, block_size);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::GZIP;
- if (gzip_buffer_size != -1) {
- options.buffer_size = gzip_buffer_size;
- }
-
- GzipOutputStream gzout(&output, options);
- WriteStuff(&gzout);
- EXPECT_TRUE(gzout.Flush());
- size = output.ByteCount();
-
- ArrayInputStream input(buffer, size, block_size);
- GzipInputStream gzin(
- &input, GzipInputStream::GZIP, gzip_buffer_size);
- ReadStuff(&gzin);
-
- gzout.Close();
-
- delete [] buffer;
-}
-
TEST_F(IoTest, ZlibIo) {
const int kBufferSize = 2*1024;
uint8* buffer = new uint8[kBufferSize];
@@ -474,12 +324,8 @@ TEST_F(IoTest, ZlibIo) {
int size;
{
ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::ZLIB;
- if (gzip_buffer_size != -1) {
- options.buffer_size = gzip_buffer_size;
- }
- GzipOutputStream gzout(&output, options);
+ GzipOutputStream gzout(
+ &output, GzipOutputStream::ZLIB, gzip_buffer_size);
WriteStuff(&gzout);
gzout.Close();
size = output.ByteCount();
@@ -502,9 +348,7 @@ TEST_F(IoTest, ZlibIoInputAutodetect) {
int size;
{
ArrayOutputStream output(buffer, kBufferSize);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::ZLIB;
- GzipOutputStream gzout(&output, options);
+ GzipOutputStream gzout(&output, GzipOutputStream::ZLIB);
WriteStuff(&gzout);
gzout.Close();
size = output.ByteCount();
@@ -516,9 +360,7 @@ TEST_F(IoTest, ZlibIoInputAutodetect) {
}
{
ArrayOutputStream output(buffer, kBufferSize);
- GzipOutputStream::Options options;
- options.format = GzipOutputStream::GZIP;
- GzipOutputStream gzout(&output, options);
+ GzipOutputStream gzout(&output, GzipOutputStream::GZIP);
WriteStuff(&gzout);
gzout.Close();
size = output.ByteCount();
@@ -560,10 +402,9 @@ TEST_F(IoTest, CompressionOptions) {
// Some ad-hoc testing of compression options.
string golden;
- GOOGLE_CHECK_OK(File::GetContents(
- TestSourceDir() +
- "/google/protobuf/testdata/golden_message",
- &golden, true));
+ File::ReadFileToStringOrDie(
+ TestSourceDir() + "/google/protobuf/testdata/golden_message",
+ &golden);
GzipOutputStream::Options options;
string gzip_compressed = Compress(golden, options);
@@ -591,71 +432,6 @@ TEST_F(IoTest, CompressionOptions) {
EXPECT_TRUE(Uncompress(gzip_compressed) == golden);
EXPECT_TRUE(Uncompress(zlib_compressed) == golden);
}
-
-TEST_F(IoTest, TwoSessionWriteGzip) {
- // Test that two concatenated gzip streams can be read correctly
-
- static const char* strA = "0123456789";
- static const char* strB = "QuickBrownFox";
- const int kBufferSize = 2*1024;
- uint8* buffer = new uint8[kBufferSize];
- char* temp_buffer = new char[40];
-
- for (int i = 0; i < kBlockSizeCount; i++) {
- for (int j = 0; j < kBlockSizeCount; j++) {
- ArrayOutputStream* output =
- new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
- GzipOutputStream* gzout = new GzipOutputStream(output);
- CodedOutputStream* coded_output = new CodedOutputStream(gzout);
- int32 outlen = strlen(strA) + 1;
- coded_output->WriteVarint32(outlen);
- coded_output->WriteRaw(strA, outlen);
- delete coded_output; // flush
- delete gzout; // flush
- int64 pos = output->ByteCount();
- delete output;
- output = new ArrayOutputStream(
- buffer + pos, kBufferSize - pos, kBlockSizes[i]);
- gzout = new GzipOutputStream(output);
- coded_output = new CodedOutputStream(gzout);
- outlen = strlen(strB) + 1;
- coded_output->WriteVarint32(outlen);
- coded_output->WriteRaw(strB, outlen);
- delete coded_output; // flush
- delete gzout; // flush
- int64 size = pos + output->ByteCount();
- delete output;
-
- ArrayInputStream* input =
- new ArrayInputStream(buffer, size, kBlockSizes[j]);
- GzipInputStream* gzin = new GzipInputStream(input);
- CodedInputStream* coded_input = new CodedInputStream(gzin);
- uint32 insize;
- EXPECT_TRUE(coded_input->ReadVarint32(&insize));
- EXPECT_EQ(strlen(strA) + 1, insize);
- EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
- EXPECT_EQ(0, memcmp(temp_buffer, strA, insize))
- << "strA=" << strA << " in=" << temp_buffer;
-
- EXPECT_TRUE(coded_input->ReadVarint32(&insize));
- EXPECT_EQ(strlen(strB) + 1, insize);
- EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
- EXPECT_EQ(0, memcmp(temp_buffer, strB, insize))
- << " out_block_size=" << kBlockSizes[i]
- << " in_block_size=" << kBlockSizes[j]
- << " pos=" << pos
- << " size=" << size
- << " strB=" << strB << " in=" << temp_buffer;
-
- delete coded_input;
- delete gzin;
- delete input;
- }
- }
-
- delete [] temp_buffer;
- delete [] buffer;
-}
#endif
// There is no string input, only string output. Also, it doesn't support
@@ -924,26 +700,6 @@ TEST_F(IoTest, LimitingInputStream) {
ReadStuff(&input);
}
-// Checks that ByteCount works correctly for LimitingInputStreams where the
-// underlying stream has already been read.
-TEST_F(IoTest, LimitingInputStreamByteCount) {
- const int kHalfBufferSize = 128;
- const int kBufferSize = kHalfBufferSize * 2;
- uint8 buffer[kBufferSize];
-
- // Set up input. Only allow half to be read at once.
- ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize);
- const void* data;
- int size;
- EXPECT_TRUE(array_input.Next(&data, &size));
- EXPECT_EQ(kHalfBufferSize, array_input.ByteCount());
- // kHalfBufferSize - 1 to test limiting logic as well.
- LimitingInputStream input(&array_input, kHalfBufferSize - 1);
- EXPECT_EQ(0, input.ByteCount());
- EXPECT_TRUE(input.Next(&data, &size));
- EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount());
-}
-
// Check that a zero-size array doesn't confuse the code.
TEST(ZeroSizeArray, Input) {
ArrayInputStream input(NULL, 0);