summaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/text_format.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/text_format.cc')
-rw-r--r--src/google/protobuf/text_format.cc871
1 files changed, 688 insertions, 183 deletions
diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc
index 137cbce..84cdbb5 100644
--- a/src/google/protobuf/text_format.cc
+++ b/src/google/protobuf/text_format.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// http://code.google.com/p/protobuf/
+// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -32,15 +32,18 @@
// Based on original Protocol Buffers design by
// Sanjay Ghemawat, Jeff Dean, and others.
+#include <algorithm>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stack>
#include <limits>
+#include <vector>
#include <google/protobuf/text_format.h>
#include <google/protobuf/descriptor.h>
+#include <google/protobuf/wire_format_lite.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
@@ -48,10 +51,26 @@
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/io/tokenizer.h>
#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/map_util.h>
+#include <google/protobuf/stubs/stl_util.h>
namespace google {
namespace protobuf {
+namespace {
+
+inline bool IsHexNumber(const string& str) {
+ return (str.length() >= 2 && str[0] == '0' &&
+ (str[1] == 'x' || str[1] == 'X'));
+}
+
+inline bool IsOctNumber(const string& str) {
+ return (str.length() >= 2 && str[0] == '0' &&
+ (str[1] >= '0' && str[1] < '8'));
+}
+
+} // namespace
+
string Message::DebugString() const {
string debug_string;
@@ -93,6 +112,73 @@ void Message::PrintDebugString() const {
// ===========================================================================
+// Implementation of the parse information tree class.
+TextFormat::ParseInfoTree::ParseInfoTree() { }
+
+TextFormat::ParseInfoTree::~ParseInfoTree() {
+ // Remove any nested information trees, as they are owned by this tree.
+ for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
+ STLDeleteElements(&(it->second));
+ }
+}
+
+void TextFormat::ParseInfoTree::RecordLocation(
+ const FieldDescriptor* field,
+ TextFormat::ParseLocation location) {
+ locations_[field].push_back(location);
+}
+
+TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
+ const FieldDescriptor* field) {
+ // Owned by us in the map.
+ TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
+ vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
+ GOOGLE_CHECK(trees);
+ trees->push_back(instance);
+ return instance;
+}
+
+void CheckFieldIndex(const FieldDescriptor* field, int index) {
+ if (field == NULL) { return; }
+
+ if (field->is_repeated() && index == -1) {
+ GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
+ << "Field: " << field->name();
+ } else if (!field->is_repeated() && index != -1) {
+ GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
+ << "Field: " << field->name();
+ }
+}
+
+TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
+ const FieldDescriptor* field, int index) const {
+ CheckFieldIndex(field, index);
+ if (index == -1) { index = 0; }
+
+ const vector<TextFormat::ParseLocation>* locations =
+ FindOrNull(locations_, field);
+ if (locations == NULL || index >= locations->size()) {
+ return TextFormat::ParseLocation();
+ }
+
+ return (*locations)[index];
+}
+
+TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
+ const FieldDescriptor* field, int index) const {
+ CheckFieldIndex(field, index);
+ if (index == -1) { index = 0; }
+
+ const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
+ if (trees == NULL || index >= trees->size()) {
+ return NULL;
+ }
+
+ return (*trees)[index];
+}
+
+
+// ===========================================================================
// Internal class for parsing an ASCII representation of a Protocol Message.
// This class makes use of the Protocol Message compiler's tokenizer found
// in //google/protobuf/io/tokenizer.h. Note that class's Parse
@@ -107,9 +193,10 @@ void Message::PrintDebugString() const {
class TextFormat::Parser::ParserImpl {
public:
- // Determines if repeated values for a non-repeated field are
- // permitted, e.g., the string "foo: 1 foo: 2" for a
- // required/optional field named "foo".
+ // Determines if repeated values for non-repeated fields and
+ // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
+ // required/optional field named "foo", or "baz: 1 qux: 2"
+ // where "baz" and "qux" are members of the same oneof.
enum SingularOverwritePolicy {
ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
@@ -118,12 +205,25 @@ class TextFormat::Parser::ParserImpl {
ParserImpl(const Descriptor* root_message_type,
io::ZeroCopyInputStream* input_stream,
io::ErrorCollector* error_collector,
- SingularOverwritePolicy singular_overwrite_policy)
+ TextFormat::Finder* finder,
+ ParseInfoTree* parse_info_tree,
+ SingularOverwritePolicy singular_overwrite_policy,
+ bool allow_case_insensitive_field,
+ bool allow_unknown_field,
+ bool allow_unknown_enum,
+ bool allow_field_number,
+ bool allow_relaxed_whitespace)
: error_collector_(error_collector),
+ finder_(finder),
+ parse_info_tree_(parse_info_tree),
tokenizer_error_collector_(this),
tokenizer_(input_stream, &tokenizer_error_collector_),
root_message_type_(root_message_type),
singular_overwrite_policy_(singular_overwrite_policy),
+ allow_case_insensitive_field_(allow_case_insensitive_field),
+ allow_unknown_field_(allow_unknown_field),
+ allow_unknown_enum_(allow_unknown_enum),
+ allow_field_number_(allow_field_number),
had_errors_(false) {
// For backwards-compatibility with proto1, we need to allow the 'f' suffix
// for floats.
@@ -132,6 +232,11 @@ class TextFormat::Parser::ParserImpl {
// '#' starts a comment.
tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
+ if (allow_relaxed_whitespace) {
+ tokenizer_.set_require_space_after_number(false);
+ tokenizer_.set_allow_multiline_strings(true);
+ }
+
// Consume the starting token.
tokenizer_.Next();
}
@@ -143,7 +248,7 @@ class TextFormat::Parser::ParserImpl {
// GOOGLE_LOG(ERROR)).
bool Parse(Message* output) {
// Consume fields until we cannot do so anymore.
- while(true) {
+ while (true) {
if (LookingAtType(io::Tokenizer::TYPE_END)) {
return !had_errors_;
}
@@ -228,6 +333,7 @@ class TextFormat::Parser::ParserImpl {
return true;
}
+
// Consumes the current field (as returned by the tokenizer) on the
// passed in message.
bool ConsumeField(Message* message) {
@@ -237,6 +343,8 @@ class TextFormat::Parser::ParserImpl {
string field_name;
const FieldDescriptor* field = NULL;
+ int start_line = tokenizer_.current().line;
+ int start_column = tokenizer_.current().column;
if (TryConsume("[")) {
// Extension.
@@ -249,72 +357,200 @@ class TextFormat::Parser::ParserImpl {
}
DO(Consume("]"));
- field = reflection->FindKnownExtensionByName(field_name);
+ field = (finder_ != NULL
+ ? finder_->FindExtension(message, field_name)
+ : reflection->FindKnownExtensionByName(field_name));
if (field == NULL) {
- ReportError("Extension \"" + field_name + "\" is not defined or "
- "is not an extension of \"" +
- descriptor->full_name() + "\".");
- return false;
+ if (!allow_unknown_field_) {
+ ReportError("Extension \"" + field_name + "\" is not defined or "
+ "is not an extension of \"" +
+ descriptor->full_name() + "\".");
+ return false;
+ } else {
+ ReportWarning("Extension \"" + field_name + "\" is not defined or "
+ "is not an extension of \"" +
+ descriptor->full_name() + "\".");
+ }
}
} else {
DO(ConsumeIdentifier(&field_name));
- field = descriptor->FindFieldByName(field_name);
- // Group names are expected to be capitalized as they appear in the
- // .proto file, which actually matches their type names, not their field
- // names.
- if (field == NULL) {
- string lower_field_name = field_name;
- LowerString(&lower_field_name);
- field = descriptor->FindFieldByName(lower_field_name);
- // If the case-insensitive match worked but the field is NOT a group,
- if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
+ int32 field_number;
+ if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
+ if (descriptor->IsExtensionNumber(field_number)) {
+ field = reflection->FindKnownExtensionByNumber(field_number);
+ } else {
+ field = descriptor->FindFieldByNumber(field_number);
+ }
+ } else {
+ field = descriptor->FindFieldByName(field_name);
+ // Group names are expected to be capitalized as they appear in the
+ // .proto file, which actually matches their type names, not their
+ // field names.
+ if (field == NULL) {
+ string lower_field_name = field_name;
+ LowerString(&lower_field_name);
+ field = descriptor->FindFieldByName(lower_field_name);
+ // If the case-insensitive match worked but the field is NOT a group,
+ if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
+ field = NULL;
+ }
+ }
+ // Again, special-case group names as described above.
+ if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
+ && field->message_type()->name() != field_name) {
field = NULL;
}
- }
- // Again, special-case group names as described above.
- if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
- && field->message_type()->name() != field_name) {
- field = NULL;
+
+ if (field == NULL && allow_case_insensitive_field_) {
+ string lower_field_name = field_name;
+ LowerString(&lower_field_name);
+ field = descriptor->FindFieldByLowercaseName(lower_field_name);
+ }
}
if (field == NULL) {
- ReportError("Message type \"" + descriptor->full_name() +
- "\" has no field named \"" + field_name + "\".");
- return false;
+ if (!allow_unknown_field_) {
+ ReportError("Message type \"" + descriptor->full_name() +
+ "\" has no field named \"" + field_name + "\".");
+ return false;
+ } else {
+ ReportWarning("Message type \"" + descriptor->full_name() +
+ "\" has no field named \"" + field_name + "\".");
+ }
}
}
- // Fail if the field is not repeated and it has already been specified.
- if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
- !field->is_repeated() && reflection->HasField(*message, field)) {
- ReportError("Non-repeated field \"" + field_name +
- "\" is specified multiple times.");
- return false;
+ // Skips unknown field.
+ if (field == NULL) {
+ GOOGLE_CHECK(allow_unknown_field_);
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the begining of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
+ return SkipFieldValue();
+ } else {
+ return SkipFieldMessage();
+ }
+ }
+
+ if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
+ // Fail if the field is not repeated and it has already been specified.
+ if (!field->is_repeated() && reflection->HasField(*message, field)) {
+ ReportError("Non-repeated field \"" + field_name +
+ "\" is specified multiple times.");
+ return false;
+ }
+ // Fail if the field is a member of a oneof and another member has already
+ // been specified.
+ const OneofDescriptor* oneof = field->containing_oneof();
+ if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
+ const FieldDescriptor* other_field =
+ reflection->GetOneofFieldDescriptor(*message, oneof);
+ ReportError("Field \"" + field_name + "\" is specified along with "
+ "field \"" + other_field->name() + "\", another member "
+ "of oneof \"" + oneof->name() + "\".");
+ return false;
+ }
}
// Perform special handling for embedded message types.
if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
// ':' is optional here.
TryConsume(":");
- DO(ConsumeFieldMessage(message, reflection, field));
} else {
+ // ':' is required here.
DO(Consume(":"));
+ }
+
+ if (field->is_repeated() && TryConsume("[")) {
+ // Short repeated format, e.g. "foo: [1, 2, 3]"
+ while (true) {
+ if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+ // Perform special handling for embedded message types.
+ DO(ConsumeFieldMessage(message, reflection, field));
+ } else {
+ DO(ConsumeFieldValue(message, reflection, field));
+ }
+ if (TryConsume("]")) {
+ break;
+ }
+ DO(Consume(","));
+ }
+ } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+ DO(ConsumeFieldMessage(message, reflection, field));
+ } else {
DO(ConsumeFieldValue(message, reflection, field));
}
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ TryConsume(";") || TryConsume(",");
+
if (field->options().deprecated()) {
ReportWarning("text format contains deprecated field \""
+ field_name + "\"");
}
+ // If a parse info tree exists, add the location for the parsed
+ // field.
+ if (parse_info_tree_ != NULL) {
+ RecordLocation(parse_info_tree_, field,
+ ParseLocation(start_line, start_column));
+ }
+
+ return true;
+ }
+
+ // Skips the next field including the field's name and value.
+ bool SkipField() {
+ string field_name;
+ if (TryConsume("[")) {
+ // Extension name.
+ DO(ConsumeIdentifier(&field_name));
+ while (TryConsume(".")) {
+ string part;
+ DO(ConsumeIdentifier(&part));
+ field_name += ".";
+ field_name += part;
+ }
+ DO(Consume("]"));
+ } else {
+ DO(ConsumeIdentifier(&field_name));
+ }
+
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the begining of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
+ DO(SkipFieldValue());
+ } else {
+ DO(SkipFieldMessage());
+ }
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ TryConsume(";") || TryConsume(",");
return true;
}
bool ConsumeFieldMessage(Message* message,
const Reflection* reflection,
const FieldDescriptor* field) {
+
+ // If the parse information tree is not NULL, create a nested one
+ // for the nested message.
+ ParseInfoTree* parent = parse_info_tree_;
+ if (parent != NULL) {
+ parse_info_tree_ = CreateNested(parent, field);
+ }
+
string delimeter;
if (TryConsume("<")) {
delimeter = ">";
@@ -329,6 +565,26 @@ class TextFormat::Parser::ParserImpl {
DO(ConsumeMessage(reflection->MutableMessage(message, field),
delimeter));
}
+
+ // Reset the parse information tree.
+ parse_info_tree_ = parent;
+ return true;
+ }
+
+ // Skips the whole body of a message including the begining delimeter and
+ // the ending delimeter.
+ bool SkipFieldMessage() {
+ string delimeter;
+ if (TryConsume("<")) {
+ delimeter = ">";
+ } else {
+ DO(Consume("{"));
+ delimeter = "}";
+ }
+ while (!LookingAt(">") && !LookingAt("}")) {
+ DO(SkipField());
+ }
+ DO(Consume(delimeter));
return true;
}
@@ -397,34 +653,57 @@ class TextFormat::Parser::ParserImpl {
}
case FieldDescriptor::CPPTYPE_BOOL: {
- string value;
- DO(ConsumeIdentifier(&value));
-
- if (value == "true") {
- SET_FIELD(Bool, true);
- } else if (value == "false") {
- SET_FIELD(Bool, false);
+ if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+ uint64 value;
+ DO(ConsumeUnsignedInteger(&value, 1));
+ SET_FIELD(Bool, value);
} else {
- ReportError("Invalid value for boolean field \"" + field->name()
- + "\". Value: \"" + value + "\".");
- return false;
+ string value;
+ DO(ConsumeIdentifier(&value));
+ if (value == "true" || value == "True" || value == "t") {
+ SET_FIELD(Bool, true);
+ } else if (value == "false" || value == "False" || value == "f") {
+ SET_FIELD(Bool, false);
+ } else {
+ ReportError("Invalid value for boolean field \"" + field->name()
+ + "\". Value: \"" + value + "\".");
+ return false;
+ }
}
break;
}
case FieldDescriptor::CPPTYPE_ENUM: {
string value;
- DO(ConsumeIdentifier(&value));
-
- // Find the enumeration value.
const EnumDescriptor* enum_type = field->enum_type();
- const EnumValueDescriptor* enum_value
- = enum_type->FindValueByName(value);
+ const EnumValueDescriptor* enum_value = NULL;
+
+ if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+ DO(ConsumeIdentifier(&value));
+ // Find the enumeration value.
+ enum_value = enum_type->FindValueByName(value);
+
+ } else if (LookingAt("-") ||
+ LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+ int64 int_value;
+ DO(ConsumeSignedInteger(&int_value, kint32max));
+ value = SimpleItoa(int_value); // for error reporting
+ enum_value = enum_type->FindValueByNumber(int_value);
+ } else {
+ ReportError("Expected integer or identifier.");
+ return false;
+ }
if (enum_value == NULL) {
- ReportError("Unknown enumeration value of \"" + value + "\" for "
- "field \"" + field->name() + "\".");
- return false;
+ if (!allow_unknown_enum_) {
+ ReportError("Unknown enumeration value of \"" + value + "\" for "
+ "field \"" + field->name() + "\".");
+ return false;
+ } else {
+ ReportWarning("Unknown enumeration value of \"" + value + "\" for "
+ "field \"" + field->name() + "\".");
+ return true;
+ }
}
SET_FIELD(Enum, enum_value);
@@ -442,6 +721,60 @@ class TextFormat::Parser::ParserImpl {
return true;
}
+ bool SkipFieldValue() {
+ if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
+ while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
+ tokenizer_.Next();
+ }
+ return true;
+ }
+ // Possible field values other than string:
+ // 12345 => TYPE_INTEGER
+ // -12345 => TYPE_SYMBOL + TYPE_INTEGER
+ // 1.2345 => TYPE_FLOAT
+ // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
+ // inf => TYPE_IDENTIFIER
+ // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
+ // TYPE_INTEGER => TYPE_IDENTIFIER
+ // Divides them into two group, one with TYPE_SYMBOL
+ // and the other without:
+ // Group one:
+ // 12345 => TYPE_INTEGER
+ // 1.2345 => TYPE_FLOAT
+ // inf => TYPE_IDENTIFIER
+ // TYPE_INTEGER => TYPE_IDENTIFIER
+ // Group two:
+ // -12345 => TYPE_SYMBOL + TYPE_INTEGER
+ // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
+ // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
+ // As we can see, the field value consists of an optional '-' and one of
+ // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
+ bool has_minus = TryConsume("-");
+ if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
+ !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
+ !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+ return false;
+ }
+ // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
+ // value while other combinations all generate valid values.
+ // We check if the value of this combination is valid here.
+ // TYPE_IDENTIFIER after a '-' should be one of the float values listed
+ // below:
+ // inf, inff, infinity, nan
+ if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+ string text = tokenizer_.current().text;
+ LowerString(&text);
+ if (text != "inf" &&
+ text != "infinity" &&
+ text != "nan") {
+ ReportError("Invalid float number: " + text);
+ return false;
+ }
+ }
+ tokenizer_.Next();
+ return true;
+ }
+
// Returns true if the current token's text is equal to that specified.
bool LookingAt(const string& text) {
return tokenizer_.current().text == text;
@@ -455,15 +788,23 @@ class TextFormat::Parser::ParserImpl {
// Consumes an identifier and saves its value in the identifier parameter.
// Returns false if the token is not of type IDENTFIER.
bool ConsumeIdentifier(string* identifier) {
- if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
- ReportError("Expected identifier.");
- return false;
+ if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
+ *identifier = tokenizer_.current().text;
+ tokenizer_.Next();
+ return true;
}
- *identifier = tokenizer_.current().text;
+ // If allow_field_numer_ or allow_unknown_field_ is true, we should able
+ // to parse integer identifiers.
+ if ((allow_field_number_ || allow_unknown_field_)
+ && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+ *identifier = tokenizer_.current().text;
+ tokenizer_.Next();
+ return true;
+ }
- tokenizer_.Next();
- return true;
+ ReportError("Expected identifier.");
+ return false;
}
// Consumes a string and saves its value in the text parameter.
@@ -530,6 +871,29 @@ class TextFormat::Parser::ParserImpl {
return true;
}
+ // Consumes a uint64 and saves its value in the value parameter.
+ // Accepts decimal numbers only, rejects hex or oct numbers.
+ bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
+ if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
+ ReportError("Expected integer.");
+ return false;
+ }
+
+ const string& text = tokenizer_.current().text;
+ if (IsHexNumber(text) || IsOctNumber(text)) {
+ ReportError("Expect a decimal number.");
+ return false;
+ }
+
+ if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
+ ReportError("Integer out of range.");
+ return false;
+ }
+
+ tokenizer_.Next();
+ return true;
+ }
+
// Consumes a double and saves its value in the value parameter.
// Note that since the tokenizer does not support negative numbers,
// we actually may consume an additional token (for the minus sign) in this
@@ -547,7 +911,7 @@ class TextFormat::Parser::ParserImpl {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
// We have found an integer value for the double.
uint64 integer_value;
- DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
+ DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
*value = static_cast<double>(integer_value);
} else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
@@ -559,7 +923,8 @@ class TextFormat::Parser::ParserImpl {
} else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
string text = tokenizer_.current().text;
LowerString(&text);
- if (text == "inf" || text == "infinity") {
+ if (text == "inf" ||
+ text == "infinity") {
*value = std::numeric_limits<double>::infinity();
tokenizer_.Next();
} else if (text == "nan") {
@@ -616,7 +981,7 @@ class TextFormat::Parser::ParserImpl {
explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
parser_(parser) { }
- virtual ~ParserErrorCollector() { };
+ virtual ~ParserErrorCollector() { }
virtual void AddError(int line, int column, const string& message) {
parser_->ReportError(line, column, message);
@@ -632,10 +997,16 @@ class TextFormat::Parser::ParserImpl {
};
io::ErrorCollector* error_collector_;
+ TextFormat::Finder* finder_;
+ ParseInfoTree* parse_info_tree_;
ParserErrorCollector tokenizer_error_collector_;
io::Tokenizer tokenizer_;
const Descriptor* root_message_type_;
SingularOverwritePolicy singular_overwrite_policy_;
+ const bool allow_case_insensitive_field_;
+ const bool allow_unknown_field_;
+ const bool allow_unknown_enum_;
+ const bool allow_field_number_;
bool had_errors_;
};
@@ -661,7 +1032,7 @@ class TextFormat::Printer::TextGenerator {
~TextGenerator() {
// Only BackUp() if we're sure we've successfully called Next() at least
// once.
- if (buffer_size_ > 0) {
+ if (!failed_ && buffer_size_ > 0) {
output_->BackUp(buffer_size_);
}
}
@@ -765,17 +1136,39 @@ class TextFormat::Printer::TextGenerator {
// ===========================================================================
+TextFormat::Finder::~Finder() {
+}
+
TextFormat::Parser::Parser()
: error_collector_(NULL),
- allow_partial_(false) {}
+ finder_(NULL),
+ parse_info_tree_(NULL),
+ allow_partial_(false),
+ allow_case_insensitive_field_(false),
+ allow_unknown_field_(false),
+ allow_unknown_enum_(false),
+ allow_field_number_(false),
+ allow_relaxed_whitespace_(false),
+ allow_singular_overwrites_(false) {
+}
TextFormat::Parser::~Parser() {}
bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
Message* output) {
output->Clear();
+
+ ParserImpl::SingularOverwritePolicy overwrites_policy =
+ allow_singular_overwrites_
+ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
+ : ParserImpl::FORBID_SINGULAR_OVERWRITES;
+
ParserImpl parser(output->GetDescriptor(), input, error_collector_,
- ParserImpl::FORBID_SINGULAR_OVERWRITES);
+ finder_, parse_info_tree_,
+ overwrites_policy,
+ allow_case_insensitive_field_, allow_unknown_field_,
+ allow_unknown_enum_, allow_field_number_,
+ allow_relaxed_whitespace_);
return MergeUsingImpl(input, output, &parser);
}
@@ -788,7 +1181,11 @@ bool TextFormat::Parser::ParseFromString(const string& input,
bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
Message* output) {
ParserImpl parser(output->GetDescriptor(), input, error_collector_,
- ParserImpl::ALLOW_SINGULAR_OVERWRITES);
+ finder_, parse_info_tree_,
+ ParserImpl::ALLOW_SINGULAR_OVERWRITES,
+ allow_case_insensitive_field_, allow_unknown_field_,
+ allow_unknown_enum_, allow_field_number_,
+ allow_relaxed_whitespace_);
return MergeUsingImpl(input, output, &parser);
}
@@ -798,7 +1195,7 @@ bool TextFormat::Parser::MergeFromString(const string& input,
return Merge(&input_stream, output);
}
-bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
+bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
Message* output,
ParserImpl* parser_impl) {
if (!parser_impl->Parse(output)) return false;
@@ -806,7 +1203,7 @@ bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
vector<string> missing_fields;
output->FindInitializationErrors(&missing_fields);
parser_impl->ReportError(-1, 0, "Message missing required fields: " +
- JoinStrings(missing_fields, ", "));
+ Join(missing_fields, ", "));
return false;
}
return true;
@@ -818,7 +1215,11 @@ bool TextFormat::Parser::ParseFieldValueFromString(
Message* output) {
io::ArrayInputStream input_stream(input.data(), input.size());
ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
- ParserImpl::ALLOW_SINGULAR_OVERWRITES);
+ finder_, parse_info_tree_,
+ ParserImpl::ALLOW_SINGULAR_OVERWRITES,
+ allow_case_insensitive_field_, allow_unknown_field_,
+ allow_unknown_enum_, allow_field_number_,
+ allow_relaxed_whitespace_);
return parser.ParseField(field, output);
}
@@ -844,29 +1245,138 @@ bool TextFormat::Parser::ParseFieldValueFromString(
// ===========================================================================
+// The default implementation for FieldValuePrinter. The base class just
+// does simple formatting. That way, deriving classes could decide to fallback
+// to that behavior.
+TextFormat::FieldValuePrinter::FieldValuePrinter() {}
+TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
+string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
+ return val ? "true" : "false";
+}
+string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
+ return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
+ return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
+ return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
+ return SimpleItoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
+ return SimpleFtoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
+ return SimpleDtoa(val);
+}
+string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
+ return StrCat("\"", CEscape(val), "\"");
+}
+string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
+ return PrintString(val);
+}
+string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
+ const string& name) const {
+ return name;
+}
+string TextFormat::FieldValuePrinter::PrintFieldName(
+ const Message& message,
+ const Reflection* reflection,
+ const FieldDescriptor* field) const {
+ if (field->is_extension()) {
+ // We special-case MessageSet elements for compatibility with proto1.
+ if (field->containing_type()->options().message_set_wire_format()
+ && field->type() == FieldDescriptor::TYPE_MESSAGE
+ && field->is_optional()
+ && field->extension_scope() == field->message_type()) {
+ return StrCat("[", field->message_type()->full_name(), "]");
+ } else {
+ return StrCat("[", field->full_name(), "]");
+ }
+ } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
+ // Groups must be serialized with their original capitalization.
+ return field->message_type()->name();
+ } else {
+ return field->name();
+ }
+}
+string TextFormat::FieldValuePrinter::PrintMessageStart(
+ const Message& message,
+ int field_index,
+ int field_count,
+ bool single_line_mode) const {
+ return single_line_mode ? " { " : " {\n";
+}
+string TextFormat::FieldValuePrinter::PrintMessageEnd(
+ const Message& message,
+ int field_index,
+ int field_count,
+ bool single_line_mode) const {
+ return single_line_mode ? "} " : "}\n";
+}
+
+namespace {
+// Our own specialization: for UTF8 escaped strings.
+class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
+ public:
+ virtual string PrintString(const string& val) const {
+ return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
+ }
+ virtual string PrintBytes(const string& val) const {
+ return TextFormat::FieldValuePrinter::PrintString(val);
+ }
+};
+
+} // namespace
+
TextFormat::Printer::Printer()
: initial_indent_level_(0),
single_line_mode_(false),
+ use_field_number_(false),
use_short_repeated_primitives_(false),
- utf8_string_escaping_(false) {}
+ hide_unknown_fields_(false),
+ print_message_fields_in_index_order_(false) {
+ SetUseUtf8StringEscaping(false);
+}
-TextFormat::Printer::~Printer() {}
+TextFormat::Printer::~Printer() {
+ STLDeleteValues(&custom_printers_);
+}
+
+void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
+ SetDefaultFieldValuePrinter(as_utf8
+ ? new FieldValuePrinterUtf8Escaping()
+ : new FieldValuePrinter());
+}
+
+void TextFormat::Printer::SetDefaultFieldValuePrinter(
+ const FieldValuePrinter* printer) {
+ default_field_value_printer_.reset(printer);
+}
+
+bool TextFormat::Printer::RegisterFieldValuePrinter(
+ const FieldDescriptor* field,
+ const FieldValuePrinter* printer) {
+ return field != NULL
+ && printer != NULL
+ && custom_printers_.insert(make_pair(field, printer)).second;
+}
bool TextFormat::Printer::PrintToString(const Message& message,
- string* output) {
+ string* output) const {
GOOGLE_DCHECK(output) << "output specified is NULL";
output->clear();
io::StringOutputStream output_stream(output);
- bool result = Print(message, &output_stream);
-
- return result;
+ return Print(message, &output_stream);
}
bool TextFormat::Printer::PrintUnknownFieldsToString(
const UnknownFieldSet& unknown_fields,
- string* output) {
+ string* output) const {
GOOGLE_DCHECK(output) << "output specified is NULL";
output->clear();
@@ -875,7 +1385,7 @@ bool TextFormat::Printer::PrintUnknownFieldsToString(
}
bool TextFormat::Printer::Print(const Message& message,
- io::ZeroCopyOutputStream* output) {
+ io::ZeroCopyOutputStream* output) const {
TextGenerator generator(output, initial_indent_level_);
Print(message, generator);
@@ -886,7 +1396,7 @@ bool TextFormat::Printer::Print(const Message& message,
bool TextFormat::Printer::PrintUnknownFields(
const UnknownFieldSet& unknown_fields,
- io::ZeroCopyOutputStream* output) {
+ io::ZeroCopyOutputStream* output) const {
TextGenerator generator(output, initial_indent_level_);
PrintUnknownFields(unknown_fields, generator);
@@ -895,22 +1405,37 @@ bool TextFormat::Printer::PrintUnknownFields(
return !generator.failed();
}
+namespace {
+// Comparison functor for sorting FieldDescriptors by field index.
+struct FieldIndexSorter {
+ bool operator()(const FieldDescriptor* left,
+ const FieldDescriptor* right) const {
+ return left->index() < right->index();
+ }
+};
+} // namespace
+
void TextFormat::Printer::Print(const Message& message,
- TextGenerator& generator) {
+ TextGenerator& generator) const {
const Reflection* reflection = message.GetReflection();
vector<const FieldDescriptor*> fields;
reflection->ListFields(message, &fields);
+ if (print_message_fields_in_index_order_) {
+ sort(fields.begin(), fields.end(), FieldIndexSorter());
+ }
for (int i = 0; i < fields.size(); i++) {
PrintField(message, reflection, fields[i], generator);
}
- PrintUnknownFields(reflection->GetUnknownFields(message), generator);
+ if (!hide_unknown_fields_) {
+ PrintUnknownFields(reflection->GetUnknownFields(message), generator);
+ }
}
void TextFormat::Printer::PrintFieldValueToString(
const Message& message,
const FieldDescriptor* field,
int index,
- string* output) {
+ string* output) const {
GOOGLE_DCHECK(output) << "output specified is NULL";
@@ -924,7 +1449,7 @@ void TextFormat::Printer::PrintFieldValueToString(
void TextFormat::Printer::PrintField(const Message& message,
const Reflection* reflection,
const FieldDescriptor* field,
- TextGenerator& generator) {
+ TextGenerator& generator) const {
if (use_short_repeated_primitives_ &&
field->is_repeated() &&
field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
@@ -942,35 +1467,30 @@ void TextFormat::Printer::PrintField(const Message& message,
}
for (int j = 0; j < count; ++j) {
+ const int field_index = field->is_repeated() ? j : -1;
+
PrintFieldName(message, reflection, field, generator);
if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
- if (single_line_mode_) {
- generator.Print(" { ");
- } else {
- generator.Print(" {\n");
- generator.Indent();
- }
+ const FieldValuePrinter* printer = FindWithDefault(
+ custom_printers_, field, default_field_value_printer_.get());
+ const Message& sub_message =
+ field->is_repeated()
+ ? reflection->GetRepeatedMessage(message, field, j)
+ : reflection->GetMessage(message, field);
+ generator.Print(
+ printer->PrintMessageStart(
+ sub_message, field_index, count, single_line_mode_));
+ generator.Indent();
+ Print(sub_message, generator);
+ generator.Outdent();
+ generator.Print(
+ printer->PrintMessageEnd(
+ sub_message, field_index, count, single_line_mode_));
} else {
generator.Print(": ");
- }
-
- // Write the field value.
- int field_index = j;
- if (!field->is_repeated()) {
- field_index = -1;
- }
-
- PrintFieldValue(message, reflection, field, field_index, generator);
-
- if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
- if (single_line_mode_) {
- generator.Print("} ");
- } else {
- generator.Outdent();
- generator.Print("}\n");
- }
- } else {
+ // Write the field value.
+ PrintFieldValue(message, reflection, field, field_index, generator);
if (single_line_mode_) {
generator.Print(" ");
} else {
@@ -980,10 +1500,11 @@ void TextFormat::Printer::PrintField(const Message& message,
}
}
-void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
- const Reflection* reflection,
- const FieldDescriptor* field,
- TextGenerator& generator) {
+void TextFormat::Printer::PrintShortRepeatedField(
+ const Message& message,
+ const Reflection* reflection,
+ const FieldDescriptor* field,
+ TextGenerator& generator) const {
// Print primitive repeated field in short form.
PrintFieldName(message, reflection, field, generator);
@@ -1003,27 +1524,17 @@ void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
void TextFormat::Printer::PrintFieldName(const Message& message,
const Reflection* reflection,
const FieldDescriptor* field,
- TextGenerator& generator) {
- if (field->is_extension()) {
- generator.Print("[");
- // We special-case MessageSet elements for compatibility with proto1.
- if (field->containing_type()->options().message_set_wire_format()
- && field->type() == FieldDescriptor::TYPE_MESSAGE
- && field->is_optional()
- && field->extension_scope() == field->message_type()) {
- generator.Print(field->message_type()->full_name());
- } else {
- generator.Print(field->full_name());
- }
- generator.Print("]");
- } else {
- if (field->type() == FieldDescriptor::TYPE_GROUP) {
- // Groups must be serialized with their original capitalization.
- generator.Print(field->message_type()->name());
- } else {
- generator.Print(field->name());
- }
+ TextGenerator& generator) const {
+ // if use_field_number_ is true, prints field number instead
+ // of field name.
+ if (use_field_number_) {
+ generator.Print(SimpleItoa(field->number()));
+ return;
}
+
+ const FieldValuePrinter* printer = FindWithDefault(
+ custom_printers_, field, default_field_value_printer_.get());
+ generator.Print(printer->PrintFieldName(message, reflection, field));
}
void TextFormat::Printer::PrintFieldValue(
@@ -1031,66 +1542,60 @@ void TextFormat::Printer::PrintFieldValue(
const Reflection* reflection,
const FieldDescriptor* field,
int index,
- TextGenerator& generator) {
+ TextGenerator& generator) const {
GOOGLE_DCHECK(field->is_repeated() || (index == -1))
<< "Index must be -1 for non-repeated fields";
+ const FieldValuePrinter* printer
+ = FindWithDefault(custom_printers_, field,
+ default_field_value_printer_.get());
+
switch (field->cpp_type()) {
-#define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING) \
- case FieldDescriptor::CPPTYPE_##CPPTYPE: \
- generator.Print(TO_STRING(field->is_repeated() ? \
- reflection->GetRepeated##METHOD(message, field, index) : \
- reflection->Get##METHOD(message, field))); \
- break; \
-
- OUTPUT_FIELD( INT32, Int32, SimpleItoa);
- OUTPUT_FIELD( INT64, Int64, SimpleItoa);
- OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
- OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
- OUTPUT_FIELD( FLOAT, Float, SimpleFtoa);
- OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
+#define OUTPUT_FIELD(CPPTYPE, METHOD) \
+ case FieldDescriptor::CPPTYPE_##CPPTYPE: \
+ generator.Print(printer->Print##METHOD(field->is_repeated() \
+ ? reflection->GetRepeated##METHOD(message, field, index) \
+ : reflection->Get##METHOD(message, field))); \
+ break
+
+ OUTPUT_FIELD( INT32, Int32);
+ OUTPUT_FIELD( INT64, Int64);
+ OUTPUT_FIELD(UINT32, UInt32);
+ OUTPUT_FIELD(UINT64, UInt64);
+ OUTPUT_FIELD( FLOAT, Float);
+ OUTPUT_FIELD(DOUBLE, Double);
+ OUTPUT_FIELD( BOOL, Bool);
#undef OUTPUT_FIELD
- case FieldDescriptor::CPPTYPE_STRING: {
- string scratch;
- const string& value = field->is_repeated() ?
- reflection->GetRepeatedStringReference(
- message, field, index, &scratch) :
- reflection->GetStringReference(message, field, &scratch);
-
- generator.Print("\"");
- if (utf8_string_escaping_) {
- generator.Print(strings::Utf8SafeCEscape(value));
- } else {
- generator.Print(CEscape(value));
- }
- generator.Print("\"");
-
- break;
+ case FieldDescriptor::CPPTYPE_STRING: {
+ string scratch;
+ const string& value = field->is_repeated()
+ ? reflection->GetRepeatedStringReference(
+ message, field, index, &scratch)
+ : reflection->GetStringReference(message, field, &scratch);
+ if (field->type() == FieldDescriptor::TYPE_STRING) {
+ generator.Print(printer->PrintString(value));
+ } else {
+ GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
+ generator.Print(printer->PrintBytes(value));
}
+ break;
+ }
- case FieldDescriptor::CPPTYPE_BOOL:
- if (field->is_repeated()) {
- generator.Print(reflection->GetRepeatedBool(message, field, index)
- ? "true" : "false");
- } else {
- generator.Print(reflection->GetBool(message, field)
- ? "true" : "false");
- }
- break;
-
- case FieldDescriptor::CPPTYPE_ENUM:
- generator.Print(field->is_repeated() ?
- reflection->GetRepeatedEnum(message, field, index)->name() :
- reflection->GetEnum(message, field)->name());
- break;
+ case FieldDescriptor::CPPTYPE_ENUM: {
+ const EnumValueDescriptor *enum_val = field->is_repeated()
+ ? reflection->GetRepeatedEnum(message, field, index)
+ : reflection->GetEnum(message, field);
+ generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name()));
+ break;
+ }
- case FieldDescriptor::CPPTYPE_MESSAGE:
- Print(field->is_repeated() ?
- reflection->GetRepeatedMessage(message, field, index) :
- reflection->GetMessage(message, field),
- generator);
- break;
+ case FieldDescriptor::CPPTYPE_MESSAGE:
+ Print(field->is_repeated()
+ ? reflection->GetRepeatedMessage(message, field, index)
+ : reflection->GetMessage(message, field),
+ generator);
+ break;
}
}
@@ -1143,7 +1648,7 @@ static string PaddedHex(IntType value) {
}
void TextFormat::Printer::PrintUnknownFields(
- const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
+ const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
for (int i = 0; i < unknown_fields.field_count(); i++) {
const UnknownField& field = unknown_fields.field(i);
string field_number = SimpleItoa(field.number());