1 files changed, 37 insertions, 69 deletions
diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc
index 6bdfcd6..831a579 100644
--- a/src/google/protobuf/wire_format.cc
+++ b/src/google/protobuf/wire_format.cc
@@ -1,6 +1,6 @@
 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
-// https://developers.google.com/protocol-buffers/
+// http://code.google.com/p/protobuf/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -39,7 +39,6 @@
 #include <google/protobuf/wire_format.h>
 
 #include <google/protobuf/stubs/common.h>
-#include <google/protobuf/stubs/stringprintf.h>
 #include <google/protobuf/descriptor.h>
 #include <google/protobuf/wire_format_lite_inl.h>
 #include <google/protobuf/descriptor.pb.h>
@@ -49,11 +48,12 @@
 #include <google/protobuf/unknown_field_set.h>
 
 
-
 namespace google {
 namespace protobuf {
 namespace internal {
 
+using internal::WireFormatLite;
+
 namespace {
 
 // This function turns out to be convenient when using some macros later.
@@ -183,8 +183,7 @@ void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
             WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
         output->WriteVarint32(field.length_delimited().size());
-        output->WriteRawMaybeAliased(field.length_delimited().data(),
-                                     field.length_delimited().size());
+        output->WriteString(field.length_delimited());
         break;
       case UnknownField::TYPE_GROUP:
         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
@@ -240,6 +239,8 @@ void WireFormat::SerializeUnknownMessageSetItems(
     // The only unknown fields that are allowed to exist in a MessageSet are
     // messages, which are length-delimited.
     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
+      const string& data = field.length_delimited();
+
       // Start group.
       output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
 
@@ -249,7 +250,8 @@ void WireFormat::SerializeUnknownMessageSetItems(
 
       // Write message.
       output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
-      field.SerializeLengthDelimitedNoTag(output);
+      output->WriteVarint32(data.size());
+      output->WriteString(data);
 
       // End group.
       output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
@@ -266,6 +268,8 @@ uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
     // The only unknown fields that are allowed to exist in a MessageSet are
     // messages, which are length-delimited.
     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
+      const string& data = field.length_delimited();
+
       // Start group.
       target = io::CodedOutputStream::WriteTagToArray(
           WireFormatLite::kMessageSetItemStartTag, target);
@@ -279,7 +283,8 @@ uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
       // Write message.
       target = io::CodedOutputStream::WriteTagToArray(
           WireFormatLite::kMessageSetMessageTag, target);
-      target = field.SerializeLengthDelimitedNoTagToArray(target);
+      target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
+      target = io::CodedOutputStream::WriteStringToArray(data, target);
 
       // End group.
       target = io::CodedOutputStream::WriteTagToArray(
@@ -349,10 +354,9 @@ int WireFormat::ComputeUnknownMessageSetItemsSize(
     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
       size += WireFormatLite::kMessageSetItemTagsSize;
       size += io::CodedOutputStream::VarintSize32(field.number());
-
-      int field_size = field.GetLengthDelimitedSize();
-      size += io::CodedOutputStream::VarintSize32(field_size);
-      size += field_size;
+      size += io::CodedOutputStream::VarintSize32(
+        field.length_delimited().size());
+      size += field.length_delimited().size();
     }
   }
 
@@ -413,37 +417,6 @@ bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
   }
 }
 
-bool WireFormat::SkipMessageSetField(io::CodedInputStream* input,
-                                     uint32 field_number,
-                                     UnknownFieldSet* unknown_fields) {
-  uint32 length;
-  if (!input->ReadVarint32(&length)) return false;
-  return input->ReadString(
-      unknown_fields->AddLengthDelimited(field_number), length);
-}
-
-bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
-                                              const FieldDescriptor* field,
-                                              Message* message,
-                                              io::CodedInputStream* input) {
-  const Reflection* message_reflection = message->GetReflection();
-  if (field == NULL) {
-    // We store unknown MessageSet extensions as groups.
-    return SkipMessageSetField(
-        input, field_number, message_reflection->MutableUnknownFields(message));
-  } else if (field->is_repeated() ||
-             field->type() != FieldDescriptor::TYPE_MESSAGE) {
-    // This shouldn't happen as we only allow optional message extensions to
-    // MessageSet.
-    GOOGLE_LOG(ERROR) << "Extensions of MessageSets must be optional messages.";
-    return false;
-  } else {
-    Message* sub_message = message_reflection->MutableMessage(
-        message, field, input->GetExtensionFactory());
-    return WireFormatLite::ReadMessage(input, sub_message);
-  }
-}
-
 bool WireFormat::ParseAndMergeField(
     uint32 tag,
     const FieldDescriptor* field,        // May be NULL for unknown
@@ -595,8 +568,7 @@ bool WireFormat::ParseAndMergeField(
       case FieldDescriptor::TYPE_STRING: {
         string value;
         if (!WireFormatLite::ReadString(input, &value)) return false;
-        VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
-                                   field->name().c_str());
+        VerifyUTF8String(value.data(), value.length(), PARSE);
         if (field->is_repeated()) {
           message_reflection->AddString(message, field, value);
         } else {
@@ -660,14 +632,20 @@ bool WireFormat::ParseAndMergeMessageSetItem(
   //   required int32 type_id = 2;
   //   required data message = 3;
 
-  uint32 last_type_id = 0;
+  // Once we see a type_id, we'll construct a fake tag for this extension
+  // which is the tag it would have had under the proto2 extensions wire
+  // format.
+  uint32 fake_tag = 0;
 
   // Once we see a type_id, we'll look up the FieldDescriptor for the
   // extension.
   const FieldDescriptor* field = NULL;
 
   // If we see message data before the type_id, we'll append it to this so
-  // we can parse it later.
+  // we can parse it later.  This will probably never happen in practice,
+  // as no MessageSet encoder I know of writes the message before the type ID.
+  // But, it's technically valid so we should allow it.
+  // TODO(kenton):  Use a Cord instead?  Do I care?
   string message_data;
 
   while (true) {
@@ -678,7 +656,8 @@ bool WireFormat::ParseAndMergeMessageSetItem(
       case WireFormatLite::kMessageSetTypeIdTag: {
         uint32 type_id;
         if (!input->ReadVarint32(&type_id)) return false;
-        last_type_id = type_id;
+        fake_tag = WireFormatLite::MakeTag(
+            type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
         field = message_reflection->FindKnownExtensionByNumber(type_id);
 
         if (!message_data.empty()) {
@@ -687,8 +666,8 @@ bool WireFormat::ParseAndMergeMessageSetItem(
           io::ArrayInputStream raw_input(message_data.data(),
                                          message_data.size());
           io::CodedInputStream sub_input(&raw_input);
-          if (!ParseAndMergeMessageSetField(last_type_id, field, message,
-                                            &sub_input)) {
+          if (!ParseAndMergeField(fake_tag, field, message,
+                                  &sub_input)) {
             return false;
           }
           message_data.clear();
@@ -698,20 +677,16 @@ bool WireFormat::ParseAndMergeMessageSetItem(
       }
 
       case WireFormatLite::kMessageSetMessageTag: {
-        if (last_type_id == 0) {
+        if (fake_tag == 0) {
           // We haven't seen a type_id yet.  Append this data to message_data.
           string temp;
           uint32 length;
           if (!input->ReadVarint32(&length)) return false;
           if (!input->ReadString(&temp, length)) return false;
-          io::StringOutputStream output_stream(&message_data);
-          io::CodedOutputStream coded_output(&output_stream);
-          coded_output.WriteVarint32(length);
-          coded_output.WriteString(temp);
+          message_data.append(temp);
         } else {
           // Already saw type_id, so we can parse this directly.
-          if (!ParseAndMergeMessageSetField(last_type_id, field, message,
-                                            input)) {
+          if (!ParseAndMergeField(fake_tag, field, message, input)) {
             return false;
           }
         }
@@ -859,8 +834,7 @@ void WireFormat::SerializeFieldWithCachedSizes(
           message_reflection->GetRepeatedStringReference(
             message, field, j, &scratch) :
           message_reflection->GetStringReference(message, field, &scratch);
-        VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
-                                   field->name().c_str());
+        VerifyUTF8String(value.data(), value.length(), SERIALIZE);
         WireFormatLite::WriteString(field->number(), value, output);
         break;
       }
@@ -1070,8 +1044,7 @@ int WireFormat::MessageSetItemByteSize(
 
 void WireFormat::VerifyUTF8StringFallback(const char* data,
                                           int size,
-                                          Operation op,
-                                          const char* field_name) {
+                                          Operation op) {
   if (!IsStructurallyValidUTF8(data, size)) {
     const char* operation_str = NULL;
     switch (op) {
@@ -1083,15 +1056,10 @@ void WireFormat::VerifyUTF8StringFallback(const char* data,
         break;
       // no default case: have the compiler warn if a case is not covered.
     }
-    string quoted_field_name = "";
-    if (field_name != NULL) {
-      quoted_field_name = StringPrintf(" '%s'", field_name);
-    }
-    // no space below to avoid double space when the field name is missing.
-    GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
-               << "UTF-8 data when " << operation_str << " a protocol "
-               << "buffer. Use the 'bytes' type if you intend to send raw "
-               << "bytes. ";
+    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+               << operation_str
+               << " protocol buffer. Strings must contain only UTF-8; "
+                  "use the 'bytes' type for raw bytes.";
   }
 }