summaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/wire_format.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/wire_format.cc')
-rw-r--r--src/google/protobuf/wire_format.cc106
1 files changed, 37 insertions, 69 deletions
diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc
index 6bdfcd6..831a579 100644
--- a/src/google/protobuf/wire_format.cc
+++ b/src/google/protobuf/wire_format.cc
@@ -1,6 +1,6 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -39,7 +39,6 @@
#include <google/protobuf/wire_format.h>
#include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stringprintf.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/wire_format_lite_inl.h>
#include <google/protobuf/descriptor.pb.h>
@@ -49,11 +48,12 @@
#include <google/protobuf/unknown_field_set.h>
-
namespace google {
namespace protobuf {
namespace internal {
+using internal::WireFormatLite;
+
namespace {
// This function turns out to be convenient when using some macros later.
@@ -183,8 +183,7 @@ void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
output->WriteVarint32(field.length_delimited().size());
- output->WriteRawMaybeAliased(field.length_delimited().data(),
- field.length_delimited().size());
+ output->WriteString(field.length_delimited());
break;
case UnknownField::TYPE_GROUP:
output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
@@ -240,6 +239,8 @@ void WireFormat::SerializeUnknownMessageSetItems(
// The only unknown fields that are allowed to exist in a MessageSet are
// messages, which are length-delimited.
if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
+ const string& data = field.length_delimited();
+
// Start group.
output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
@@ -249,7 +250,8 @@ void WireFormat::SerializeUnknownMessageSetItems(
// Write message.
output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
- field.SerializeLengthDelimitedNoTag(output);
+ output->WriteVarint32(data.size());
+ output->WriteString(data);
// End group.
output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
@@ -266,6 +268,8 @@ uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
// The only unknown fields that are allowed to exist in a MessageSet are
// messages, which are length-delimited.
if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
+ const string& data = field.length_delimited();
+
// Start group.
target = io::CodedOutputStream::WriteTagToArray(
WireFormatLite::kMessageSetItemStartTag, target);
@@ -279,7 +283,8 @@ uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
// Write message.
target = io::CodedOutputStream::WriteTagToArray(
WireFormatLite::kMessageSetMessageTag, target);
- target = field.SerializeLengthDelimitedNoTagToArray(target);
+ target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
+ target = io::CodedOutputStream::WriteStringToArray(data, target);
// End group.
target = io::CodedOutputStream::WriteTagToArray(
@@ -349,10 +354,9 @@ int WireFormat::ComputeUnknownMessageSetItemsSize(
if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
size += WireFormatLite::kMessageSetItemTagsSize;
size += io::CodedOutputStream::VarintSize32(field.number());
-
- int field_size = field.GetLengthDelimitedSize();
- size += io::CodedOutputStream::VarintSize32(field_size);
- size += field_size;
+ size += io::CodedOutputStream::VarintSize32(
+ field.length_delimited().size());
+ size += field.length_delimited().size();
}
}
@@ -413,37 +417,6 @@ bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
}
}
-bool WireFormat::SkipMessageSetField(io::CodedInputStream* input,
- uint32 field_number,
- UnknownFieldSet* unknown_fields) {
- uint32 length;
- if (!input->ReadVarint32(&length)) return false;
- return input->ReadString(
- unknown_fields->AddLengthDelimited(field_number), length);
-}
-
-bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
- const FieldDescriptor* field,
- Message* message,
- io::CodedInputStream* input) {
- const Reflection* message_reflection = message->GetReflection();
- if (field == NULL) {
- // We store unknown MessageSet extensions as groups.
- return SkipMessageSetField(
- input, field_number, message_reflection->MutableUnknownFields(message));
- } else if (field->is_repeated() ||
- field->type() != FieldDescriptor::TYPE_MESSAGE) {
- // This shouldn't happen as we only allow optional message extensions to
- // MessageSet.
- GOOGLE_LOG(ERROR) << "Extensions of MessageSets must be optional messages.";
- return false;
- } else {
- Message* sub_message = message_reflection->MutableMessage(
- message, field, input->GetExtensionFactory());
- return WireFormatLite::ReadMessage(input, sub_message);
- }
-}
-
bool WireFormat::ParseAndMergeField(
uint32 tag,
const FieldDescriptor* field, // May be NULL for unknown
@@ -595,8 +568,7 @@ bool WireFormat::ParseAndMergeField(
case FieldDescriptor::TYPE_STRING: {
string value;
if (!WireFormatLite::ReadString(input, &value)) return false;
- VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
- field->name().c_str());
+ VerifyUTF8String(value.data(), value.length(), PARSE);
if (field->is_repeated()) {
message_reflection->AddString(message, field, value);
} else {
@@ -660,14 +632,20 @@ bool WireFormat::ParseAndMergeMessageSetItem(
// required int32 type_id = 2;
// required data message = 3;
- uint32 last_type_id = 0;
+ // Once we see a type_id, we'll construct a fake tag for this extension
+ // which is the tag it would have had under the proto2 extensions wire
+ // format.
+ uint32 fake_tag = 0;
// Once we see a type_id, we'll look up the FieldDescriptor for the
// extension.
const FieldDescriptor* field = NULL;
// If we see message data before the type_id, we'll append it to this so
- // we can parse it later.
+ // we can parse it later. This will probably never happen in practice,
+ // as no MessageSet encoder I know of writes the message before the type ID.
+ // But, it's technically valid so we should allow it.
+ // TODO(kenton): Use a Cord instead? Do I care?
string message_data;
while (true) {
@@ -678,7 +656,8 @@ bool WireFormat::ParseAndMergeMessageSetItem(
case WireFormatLite::kMessageSetTypeIdTag: {
uint32 type_id;
if (!input->ReadVarint32(&type_id)) return false;
- last_type_id = type_id;
+ fake_tag = WireFormatLite::MakeTag(
+ type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
field = message_reflection->FindKnownExtensionByNumber(type_id);
if (!message_data.empty()) {
@@ -687,8 +666,8 @@ bool WireFormat::ParseAndMergeMessageSetItem(
io::ArrayInputStream raw_input(message_data.data(),
message_data.size());
io::CodedInputStream sub_input(&raw_input);
- if (!ParseAndMergeMessageSetField(last_type_id, field, message,
- &sub_input)) {
+ if (!ParseAndMergeField(fake_tag, field, message,
+ &sub_input)) {
return false;
}
message_data.clear();
@@ -698,20 +677,16 @@ bool WireFormat::ParseAndMergeMessageSetItem(
}
case WireFormatLite::kMessageSetMessageTag: {
- if (last_type_id == 0) {
+ if (fake_tag == 0) {
// We haven't seen a type_id yet. Append this data to message_data.
string temp;
uint32 length;
if (!input->ReadVarint32(&length)) return false;
if (!input->ReadString(&temp, length)) return false;
- io::StringOutputStream output_stream(&message_data);
- io::CodedOutputStream coded_output(&output_stream);
- coded_output.WriteVarint32(length);
- coded_output.WriteString(temp);
+ message_data.append(temp);
} else {
// Already saw type_id, so we can parse this directly.
- if (!ParseAndMergeMessageSetField(last_type_id, field, message,
- input)) {
+ if (!ParseAndMergeField(fake_tag, field, message, input)) {
return false;
}
}
@@ -859,8 +834,7 @@ void WireFormat::SerializeFieldWithCachedSizes(
message_reflection->GetRepeatedStringReference(
message, field, j, &scratch) :
message_reflection->GetStringReference(message, field, &scratch);
- VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
- field->name().c_str());
+ VerifyUTF8String(value.data(), value.length(), SERIALIZE);
WireFormatLite::WriteString(field->number(), value, output);
break;
}
@@ -1070,8 +1044,7 @@ int WireFormat::MessageSetItemByteSize(
void WireFormat::VerifyUTF8StringFallback(const char* data,
int size,
- Operation op,
- const char* field_name) {
+ Operation op) {
if (!IsStructurallyValidUTF8(data, size)) {
const char* operation_str = NULL;
switch (op) {
@@ -1083,15 +1056,10 @@ void WireFormat::VerifyUTF8StringFallback(const char* data,
break;
// no default case: have the compiler warn if a case is not covered.
}
- string quoted_field_name = "";
- if (field_name != NULL) {
- quoted_field_name = StringPrintf(" '%s'", field_name);
- }
- // no space below to avoid double space when the field name is missing.
- GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
- << "UTF-8 data when " << operation_str << " a protocol "
- << "buffer. Use the 'bytes' type if you intend to send raw "
- << "bytes. ";
+ GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+ << operation_str
+ << " protocol buffer. Strings must contain only UTF-8; "
+ "use the 'bytes' type for raw bytes.";
}
}