Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.

Note: This does not complete the work required for 78153 -- tis but the first 2/3rds. BUG=78153 TEST=Many unit tests updated and added. Review URL: http://codereview.chromium.org/6822038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
author: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
committer: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
commit: a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree: fdf872770d4cd58ee753f219475850490a008f6d /net/base/escape.cc
parent: 2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download: chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2
1 files changed, 85 insertions, 32 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 64bd107..61c3e81 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -2,11 +2,12 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <algorithm>
-
 #include "net/base/escape.h"
 
+#include <algorithm>
+
 #include "base/logging.h"
+#include "base/scoped_ptr.h"
 #include "base/string_piece.h"
 #include "base/string_util.h"
 #include "base/utf_string_conversions.h"
@@ -98,15 +99,14 @@ const char kUrlUnescape[128] = {
 };
 
 template<typename STR>
-STR UnescapeURLImpl(const STR& escaped_text,
-                    UnescapeRule::Type rules,
-                    size_t* offset_for_adjustment) {
-  size_t offset_temp = string16::npos;
-  if (!offset_for_adjustment)
-    offset_for_adjustment = &offset_temp;
-  else if (*offset_for_adjustment >= escaped_text.length())
-    *offset_for_adjustment = string16::npos;
-
+STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
+                               UnescapeRule::Type rules,
+                               std::vector<size_t>* offsets_for_adjustment) {
+  if (offsets_for_adjustment) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(escaped_text.length()));
+  }
   // Do not unescape anything, return the |escaped_text| text.
   if (rules == UnescapeRule::NONE)
     return escaped_text;
@@ -117,6 +117,7 @@ STR UnescapeURLImpl(const STR& escaped_text,
   STR result;
   result.reserve(escaped_text.length());
 
+  AdjustEncodingOffset::Adjustments adjustments;  // Locations of adjusted text.
   for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
     if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
       // Non ASCII character, append as is.
@@ -144,17 +145,9 @@ STR UnescapeURLImpl(const STR& escaped_text,
              // Additionally allow control characters if requested.
              (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
           // Use the unescaped version of the character.
-          size_t length_before_append = result.length();
+          adjustments.push_back(i);
           result.push_back(value);
           i += 2;
-
-          // Adjust offset to match length change.
-          if (*offset_for_adjustment != std::string::npos) {
-            if (*offset_for_adjustment > (length_before_append + 2))
-              *offset_for_adjustment -= 2;
-            else if (*offset_for_adjustment > length_before_append)
-              *offset_for_adjustment = std::string::npos;
-          }
         } else {
           // Keep escaped. Append a percent and we'll get the following two
           // digits on the next loops through.
@@ -174,6 +167,26 @@ STR UnescapeURLImpl(const STR& escaped_text,
     }
   }
 
+  // Make offset adjustment.
+  if (offsets_for_adjustment && !adjustments.empty()) {
+    std::for_each(offsets_for_adjustment->begin(),
+                   offsets_for_adjustment->end(),
+                   AdjustEncodingOffset(adjustments));
+  }
+
+  return result;
+}
+
+template<typename STR>
+STR UnescapeURLImpl(const STR& escaped_text,
+                    UnescapeRule::Type rules,
+                    size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
   return result;
 }
 
@@ -234,33 +247,49 @@ std::string EscapeExternalHandlerValue(const std::string& text) {
   return Escape(text, kExternalHandlerCharmap, false);
 }
 
-string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
-                                           UnescapeRule::Type rules,
-                                           size_t* offset_for_adjustment) {
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+    const std::string& text,
+    UnescapeRule::Type rules,
+    std::vector<size_t>* offsets_for_adjustment) {
   std::wstring result;
-  size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+  std::vector<size_t> original_offsets;
+  if (offsets_for_adjustment)
+    original_offsets = *offsets_for_adjustment;
   std::string unescaped_url(
-      UnescapeURLImpl(text, rules, offset_for_adjustment));
-  if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(),
-                                &result, offset_for_adjustment))
+      UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
+  if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(),
+                                &result, offsets_for_adjustment))
     return WideToUTF16Hack(result);      // Character set looks like it's valid.
 
   // Not valid.  Return the escaped version.  Undo our changes to
   // |offset_for_adjustment| since we haven't changed the string after all.
+  if (offsets_for_adjustment)
+    *offsets_for_adjustment = original_offsets;
+  return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets(
+      text, offsets_for_adjustment));
+}
+
+string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+                                           UnescapeRule::Type rules,
+                                           size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  string16 result =
+      UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
   if (offset_for_adjustment)
-    *offset_for_adjustment = original_offset;
-  return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,
-                                                   offset_for_adjustment));
+    *offset_for_adjustment = offsets[0];
+  return result;
 }
 
 std::string UnescapeURLComponent(const std::string& escaped_text,
                                  UnescapeRule::Type rules) {
-  return UnescapeURLImpl(escaped_text, rules, NULL);
+  return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL);
 }
 
 string16 UnescapeURLComponent(const string16& escaped_text,
                               UnescapeRule::Type rules) {
-  return UnescapeURLImpl(escaped_text, rules, NULL);
+  return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL);
 }
 
 
@@ -350,3 +379,27 @@ string16 UnescapeForHTML(const string16& input) {
   }
   return text;
 }
+
+AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
+  : adjustments(adjustments) {}
+
+void AdjustEncodingOffset::operator()(size_t& offset) {
+  // For each encoded character occurring before an offset subtract 2.
+  if (offset == string16::npos)
+    return;
+  size_t adjusted_offset = offset;
+  for (Adjustments::const_iterator i = adjustments.begin();
+       i != adjustments.end(); ++i) {
+    size_t location = *i;
+    if (offset <= location) {
+      offset = adjusted_offset;
+      return;
+    }
+    if (offset <= (location + 2)) {
+      offset = string16::npos;
+      return;
+    }
+    adjusted_offset -= 2;
+  }
+  offset = adjusted_offset;
+}
author	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
committer	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
commit	a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree	fdf872770d4cd58ee753f219475850490a008f6d /net/base/escape.cc
parent	2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download	chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2