Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.

Note: This does not complete the work required for 78153 -- tis but the first 2/3rds. BUG=78153 TEST=Many unit tests updated and added. Review URL: http://codereview.chromium.org/6822038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
author: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
committer: mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-04-13 00:45:39 +0000
commit: a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree: fdf872770d4cd58ee753f219475850490a008f6d /net
parent: 2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download: chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2
6 files changed, 636 insertions, 202 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 64bd107..61c3e81 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -2,11 +2,12 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <algorithm>
-
 #include "net/base/escape.h"
 
+#include <algorithm>
+
 #include "base/logging.h"
+#include "base/scoped_ptr.h"
 #include "base/string_piece.h"
 #include "base/string_util.h"
 #include "base/utf_string_conversions.h"
@@ -98,15 +99,14 @@ const char kUrlUnescape[128] = {
 };
 
 template<typename STR>
-STR UnescapeURLImpl(const STR& escaped_text,
-                    UnescapeRule::Type rules,
-                    size_t* offset_for_adjustment) {
-  size_t offset_temp = string16::npos;
-  if (!offset_for_adjustment)
-    offset_for_adjustment = &offset_temp;
-  else if (*offset_for_adjustment >= escaped_text.length())
-    *offset_for_adjustment = string16::npos;
-
+STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
+                               UnescapeRule::Type rules,
+                               std::vector<size_t>* offsets_for_adjustment) {
+  if (offsets_for_adjustment) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(escaped_text.length()));
+  }
   // Do not unescape anything, return the |escaped_text| text.
   if (rules == UnescapeRule::NONE)
     return escaped_text;
@@ -117,6 +117,7 @@ STR UnescapeURLImpl(const STR& escaped_text,
   STR result;
   result.reserve(escaped_text.length());
 
+  AdjustEncodingOffset::Adjustments adjustments;  // Locations of adjusted text.
   for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
     if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
       // Non ASCII character, append as is.
@@ -144,17 +145,9 @@ STR UnescapeURLImpl(const STR& escaped_text,
              // Additionally allow control characters if requested.
              (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
           // Use the unescaped version of the character.
-          size_t length_before_append = result.length();
+          adjustments.push_back(i);
           result.push_back(value);
           i += 2;
-
-          // Adjust offset to match length change.
-          if (*offset_for_adjustment != std::string::npos) {
-            if (*offset_for_adjustment > (length_before_append + 2))
-              *offset_for_adjustment -= 2;
-            else if (*offset_for_adjustment > length_before_append)
-              *offset_for_adjustment = std::string::npos;
-          }
         } else {
           // Keep escaped. Append a percent and we'll get the following two
           // digits on the next loops through.
@@ -174,6 +167,26 @@ STR UnescapeURLImpl(const STR& escaped_text,
     }
   }
 
+  // Make offset adjustment.
+  if (offsets_for_adjustment && !adjustments.empty()) {
+    std::for_each(offsets_for_adjustment->begin(),
+                   offsets_for_adjustment->end(),
+                   AdjustEncodingOffset(adjustments));
+  }
+
+  return result;
+}
+
+template<typename STR>
+STR UnescapeURLImpl(const STR& escaped_text,
+                    UnescapeRule::Type rules,
+                    size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
   return result;
 }
 
@@ -234,33 +247,49 @@ std::string EscapeExternalHandlerValue(const std::string& text) {
   return Escape(text, kExternalHandlerCharmap, false);
 }
 
-string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
-                                           UnescapeRule::Type rules,
-                                           size_t* offset_for_adjustment) {
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+    const std::string& text,
+    UnescapeRule::Type rules,
+    std::vector<size_t>* offsets_for_adjustment) {
   std::wstring result;
-  size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+  std::vector<size_t> original_offsets;
+  if (offsets_for_adjustment)
+    original_offsets = *offsets_for_adjustment;
   std::string unescaped_url(
-      UnescapeURLImpl(text, rules, offset_for_adjustment));
-  if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(),
-                                &result, offset_for_adjustment))
+      UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
+  if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(),
+                                &result, offsets_for_adjustment))
     return WideToUTF16Hack(result);      // Character set looks like it's valid.
 
   // Not valid.  Return the escaped version.  Undo our changes to
   // |offset_for_adjustment| since we haven't changed the string after all.
+  if (offsets_for_adjustment)
+    *offsets_for_adjustment = original_offsets;
+  return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets(
+      text, offsets_for_adjustment));
+}
+
+string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+                                           UnescapeRule::Type rules,
+                                           size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  string16 result =
+      UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
   if (offset_for_adjustment)
-    *offset_for_adjustment = original_offset;
-  return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,
-                                                   offset_for_adjustment));
+    *offset_for_adjustment = offsets[0];
+  return result;
 }
 
 std::string UnescapeURLComponent(const std::string& escaped_text,
                                  UnescapeRule::Type rules) {
-  return UnescapeURLImpl(escaped_text, rules, NULL);
+  return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL);
 }
 
 string16 UnescapeURLComponent(const string16& escaped_text,
                               UnescapeRule::Type rules) {
-  return UnescapeURLImpl(escaped_text, rules, NULL);
+  return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL);
 }
 
 
@@ -350,3 +379,27 @@ string16 UnescapeForHTML(const string16& input) {
   }
   return text;
 }
+
+AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
+  : adjustments(adjustments) {}
+
+void AdjustEncodingOffset::operator()(size_t& offset) {
+  // For each encoded character occurring before an offset subtract 2.
+  if (offset == string16::npos)
+    return;
+  size_t adjusted_offset = offset;
+  for (Adjustments::const_iterator i = adjustments.begin();
+       i != adjustments.end(); ++i) {
+    size_t location = *i;
+    if (offset <= location) {
+      offset = adjusted_offset;
+      return;
+    }
+    if (offset <= (location + 2)) {
+      offset = string16::npos;
+      return;
+    }
+    adjusted_offset -= 2;
+  }
+  offset = adjusted_offset;
+}
diff --git a/net/base/escape.h b/net/base/escape.h
index faa7bd3..f4c99a3 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -7,6 +7,7 @@
 #pragma once
 
 #include <string>
+#include <vector>
 
 #include "base/basictypes.h"
 #include "base/string16.h"
@@ -99,15 +100,20 @@ string16 UnescapeURLComponent(const string16& escaped_text,
 // Unescapes the given substring as a URL, and then tries to interpret the
 // result as being encoded as UTF-8. If the result is convertable into UTF-8, it
 // will be returned as converted. If it is not, the original escaped string will
-// be converted into a string16 and returned.
-//
-// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that
-// will be adjusted to point at the same logical place in the result string.  If
-// this isn't possible because it points into the middle of an escape sequence
-// or past the end of the string, it will be set to string16::npos.
+// be converted into a string16 and returned. (|offset[s]_for_adjustment|)
+// specifies one or more offsets into the source strings; each offset will be
+// adjusted to point at the same logical place in the result strings during
+// decoding.  If this isn't possible because an offset points past the end of
+// the source strings or into the middle of a multibyte sequence, the offending
+// offset will be set to std::wstring::npos. |offset[s]_for_adjustment| may be
+// NULL.
 string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
                                            UnescapeRule::Type rules,
                                            size_t* offset_for_adjustment);
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+    const std::string& text,
+    UnescapeRule::Type rules,
+    std::vector<size_t>* offsets_for_adjustment);
 
 // Unescape the following ampersand character codes from |text|:
 // &lt; &gt; &amp; &quot; &#39;
@@ -129,4 +135,17 @@ bool EscapeQueryParamValue(const string16& text, const char* codepage,
 // assumes the codepage is UTF8.  This is provided as a convenience.
 string16 EscapeQueryParamValueUTF8(const string16& text, bool use_plus);
 
+// Private Functions (Exposed for Unit Testing) --------------------------------
+
+// A function called by std::for_each that will adjust any offset which occurs
+// after one or more encoded characters.
+struct AdjustEncodingOffset {
+  typedef std::vector<size_t> Adjustments;
+
+  explicit AdjustEncodingOffset(const Adjustments& adjustments);
+  void operator()(size_t& offset);
+
+  const Adjustments& adjustments;
+};
+
 #endif  // NET_BASE_ESCAPE_H_
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 60d4ae3..3a8d895 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <algorithm>
 #include <string>
 
 #include "net/base/escape.h"
@@ -15,6 +16,8 @@
 
 namespace {
 
+static const size_t kNpos = string16::npos;
+
 struct EscapeCase {
   const wchar_t* input;
   const wchar_t* output;
@@ -396,3 +399,39 @@ TEST(EscapeTest, UnescapeForHTML) {
     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
   }
 }
+
+TEST(EscapeTest, AdjustEncodingOffset) {
+  // Imagine we have strings as shown in the following cases where the
+  // %XX's represent encoded characters
+
+  // 1: abc%ECdef ==> abcXdef
+  std::vector<size_t> offsets;
+  for (size_t t = 0; t < 9; ++t)
+    offsets.push_back(t);
+  AdjustEncodingOffset::Adjustments adjustments;
+  adjustments.push_back(3);
+  std::for_each(offsets.begin(), offsets.end(),
+                AdjustEncodingOffset(adjustments));
+  size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
+  EXPECT_EQ(offsets.size(), arraysize(expected_1));
+  for (size_t i = 0; i < arraysize(expected_1); ++i)
+    EXPECT_EQ(expected_1[i], offsets[i]);
+
+
+  // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
+  offsets.clear();
+  for (size_t t = 0; t < 18; ++t)
+    offsets.push_back(t);
+  adjustments.clear();
+  adjustments.push_back(0);
+  adjustments.push_back(6);
+  adjustments.push_back(9);
+  adjustments.push_back(15);
+  std::for_each(offsets.begin(), offsets.end(),
+                AdjustEncodingOffset(adjustments));
+  size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
+                         kNpos, 6, 7, 8, 9, kNpos, kNpos};
+  EXPECT_EQ(offsets.size(), arraysize(expected_2));
+  for (size_t i = 0; i < arraysize(expected_2); ++i)
+    EXPECT_EQ(expected_2[i], offsets[i]);
+}
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 1aaa98b..378ac7b 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -67,7 +67,6 @@
 #endif
 #include "unicode/datefmt.h"
 
-
 using base::Time;
 
 namespace net {
@@ -734,6 +733,80 @@ bool IDNToUnicodeOneComponent(const char16* comp,
   return false;
 }
 
+struct SubtractFromOffset {
+  explicit SubtractFromOffset(size_t amount)
+    : amount(amount) {}
+  void operator()(size_t& offset) {
+    if (offset != std::wstring::npos)
+      if (offset >= amount)
+        offset -= amount;
+      else
+        offset = std::wstring::npos;
+  }
+
+  size_t amount;
+};
+
+struct AddToOffset {
+  explicit AddToOffset(size_t amount)
+    : amount(amount) {}
+  void operator()(size_t& offset) {
+    if (offset != std::wstring::npos)
+      offset += amount;
+  }
+
+  size_t amount;
+};
+
+std::vector<size_t> OffsetsIntoSection(
+    std::vector<size_t>* offsets_for_adjustment,
+    size_t section_begin) {
+  std::vector<size_t> offsets_into_section;
+  if (offsets_for_adjustment) {
+    std::transform(offsets_for_adjustment->begin(),
+                   offsets_for_adjustment->end(),
+                   std::back_inserter(offsets_into_section),
+                   ClampComponentOffset(section_begin));
+    std::for_each(offsets_into_section.begin(), offsets_into_section.end(),
+                  SubtractFromOffset(section_begin));
+  }
+  return offsets_into_section;
+}
+
+void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section,
+                             std::vector<size_t>* offsets_for_adjustment,
+                             size_t old_section_len,
+                             size_t new_section_len,
+                             size_t section_begin) {
+  if (offsets_for_adjustment) {
+    DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());
+    std::vector<size_t>::const_iterator host_offsets_iter =
+        offsets_into_section.begin();
+    for (std::vector<size_t>::iterator offsets_iter =
+            offsets_for_adjustment->begin();
+         offsets_iter != offsets_for_adjustment->end();
+         ++offsets_iter, ++host_offsets_iter) {
+      size_t offset = *offsets_iter;
+      if (offset == std::wstring::npos || offset < section_begin) {
+        // The offset is before the host section so leave it as is.
+        continue;
+      }
+      if (offset >= section_begin + old_section_len) {
+        // The offset is after the host section so adjust by host length delta.
+        offset += new_section_len - old_section_len;
+      } else if (*host_offsets_iter != std::wstring::npos) {
+        // The offset is within the host and valid so adjust by the host
+        // reformatting offsets results.
+        offset = section_begin + *host_offsets_iter;
+      } else {
+        // The offset is invalid.
+        offset = std::wstring::npos;
+      }
+      *offsets_iter = offset;
+    }
+  }
+}
+
 // If |component| is valid, its begin is incremented by |delta|.
 void AdjustComponent(int delta, url_parse::Component* component) {
   if (!component->is_valid())
@@ -760,7 +833,7 @@ std::wstring FormatUrlInternal(const GURL& url,
                                UnescapeRule::Type unescape_rules,
                                url_parse::Parsed* new_parsed,
                                size_t* prefix_end,
-                               size_t* offset_for_adjustment);
+                               std::vector<size_t>* offsets_for_adjustment);
 
 // Helper for FormatUrl()/FormatUrlInternal().
 std::wstring FormatViewSourceUrl(const GURL& url,
@@ -769,18 +842,20 @@ std::wstring FormatViewSourceUrl(const GURL& url,
                                  UnescapeRule::Type unescape_rules,
                                  url_parse::Parsed* new_parsed,
                                  size_t* prefix_end,
-                                 size_t* offset_for_adjustment) {
+                                 std::vector<size_t>* offsets_for_adjustment) {
   DCHECK(new_parsed);
+  DCHECK(offsets_for_adjustment);
   const wchar_t* const kWideViewSource = L"view-source:";
   const size_t kViewSourceLengthPlus1 = 12;
+  std::vector<size_t> saved_offsets(*offsets_for_adjustment);
 
   GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
-  size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?
-      std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);
-  size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?
-      NULL : &temp_offset;
+  // Clamp the offsets to the source area.
+  std::for_each(offsets_for_adjustment->begin(),
+                offsets_for_adjustment->end(),
+                SubtractFromOffset(kViewSourceLengthPlus1));
   std::wstring result = FormatUrlInternal(real_url, languages, format_types,
-      unescape_rules, new_parsed, prefix_end, temp_offset_ptr);
+      unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
   result.insert(0, kWideViewSource);
 
   // Adjust position values.
@@ -794,57 +869,61 @@ std::wstring FormatViewSourceUrl(const GURL& url,
   AdjustComponents(kViewSourceLengthPlus1, new_parsed);
   if (prefix_end)
     *prefix_end += kViewSourceLengthPlus1;
-  if (temp_offset_ptr) {
-    *offset_for_adjustment = (temp_offset == std::wstring::npos) ?
-        std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);
+  std::for_each(offsets_for_adjustment->begin(),
+                offsets_for_adjustment->end(),
+                AddToOffset(kViewSourceLengthPlus1));
+  // Restore all offsets which were not affected by FormatUrlInternal.
+  DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());
+  for (size_t i = 0; i < saved_offsets.size(); ++i) {
+    if (saved_offsets[i] < kViewSourceLengthPlus1)
+      (*offsets_for_adjustment)[i] = saved_offsets[i];
   }
   return result;
 }
 
 // Appends the substring |in_component| inside of the URL |spec| to |output|,
 // and the resulting range will be filled into |out_component|. |unescape_rules|
-// defines how to clean the URL for human readability.  |offset_for_adjustment|
-// is an offset into |output| which will be adjusted based on how it maps to the
-// component being converted; if it is less than output->length(), it will be
-// untouched, and if it is greater than output->length() + in_component.len it
-// will be shortened by the difference in lengths between the input and output
-// components.  Otherwise it points into the component being converted, and is
-// adjusted to point to the same logical place in |output|.
-// |offset_for_adjustment| may not be NULL.
+// defines how to clean the URL for human readability.  |offsets_for_adjustment|
+// is an array of offsets into |output| each of which will be adjusted based on
+// how it maps to the component being converted; if it is less than
+// output->length(), it will be untouched, and if it is greater than
+// output->length() + in_component.len it will be adjusted by the difference in
+// lengths between the input and output components.  Otherwise it points into
+// the component being converted, and is adjusted to point to the same logical
+// place in |output|. |offsets_for_adjustment| may not be NULL.
 void AppendFormattedComponent(const std::string& spec,
                               const url_parse::Component& in_component,
                               UnescapeRule::Type unescape_rules,
                               std::wstring* output,
                               url_parse::Component* out_component,
-                              size_t* offset_for_adjustment) {
+                              std::vector<size_t>* offsets_for_adjustment) {
   DCHECK(output);
-  DCHECK(offset_for_adjustment);
+  DCHECK(offsets_for_adjustment);
   if (in_component.is_nonempty()) {
-    out_component->begin = static_cast<int>(output->length());
-    size_t offset_past_current_output =
-        ((*offset_for_adjustment == std::wstring::npos) ||
-         (*offset_for_adjustment < output->length())) ?
-            std::wstring::npos : (*offset_for_adjustment - output->length());
-    size_t* offset_into_component =
-        (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?
-            NULL : &offset_past_current_output;
+    size_t component_begin = output->length();
+    out_component->begin = static_cast<int>(component_begin);
+
+    // Compose a list of offsets within the component area.
+    std::vector<size_t> offsets_into_component =
+        OffsetsIntoSection(offsets_for_adjustment, component_begin);
+
     if (unescape_rules == UnescapeRule::NONE) {
-      output->append(UTF8ToWideAndAdjustOffset(
+      output->append(UTF8ToWideAndAdjustOffsets(
           spec.substr(in_component.begin, in_component.len),
-          offset_into_component));
+          &offsets_into_component));
     } else {
-      output->append(UTF16ToWideHack(UnescapeAndDecodeUTF8URLComponent(
-          spec.substr(in_component.begin, in_component.len), unescape_rules,
-          offset_into_component)));
-    }
-    out_component->len =
-        static_cast<int>(output->length()) - out_component->begin;
-    if (offset_into_component) {
-      *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ?
-          std::wstring::npos : (out_component->begin + *offset_into_component);
-    } else if (offset_past_current_output != std::wstring::npos) {
-      *offset_for_adjustment += out_component->len - in_component.len;
+      output->append(UTF16ToWideHack(
+          UnescapeAndDecodeUTF8URLComponentWithOffsets(
+              spec.substr(in_component.begin, in_component.len), unescape_rules,
+              &offsets_into_component)));
     }
+    size_t new_component_len = output->length() - component_begin;
+    out_component->len = static_cast<int>(new_component_len);
+
+    // Apply offset adjustments.
+    size_t old_component_len = static_cast<size_t>(in_component.len);
+    ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,
+        old_component_len, new_component_len, component_begin);
   } else {
     out_component->reset();
   }
@@ -858,15 +937,16 @@ std::wstring FormatUrlInternal(const GURL& url,
                                UnescapeRule::Type unescape_rules,
                                url_parse::Parsed* new_parsed,
                                size_t* prefix_end,
-                               size_t* offset_for_adjustment) {
+                               std::vector<size_t>* offsets_for_adjustment) {
   url_parse::Parsed parsed_temp;
   if (!new_parsed)
     new_parsed = &parsed_temp;
   else
     *new_parsed = url_parse::Parsed();
-  size_t offset_temp = std::wstring::npos;
-  if (!offset_for_adjustment)
-    offset_for_adjustment = &offset_temp;
+
+  std::vector<size_t> offsets_temp;
+  if (!offsets_for_adjustment)
+    offsets_for_adjustment = &offsets_temp;
 
   std::wstring url_string;
 
@@ -874,7 +954,9 @@ std::wstring FormatUrlInternal(const GURL& url,
   if (url.is_empty()) {
     if (prefix_end)
       *prefix_end = 0;
-    *offset_for_adjustment = std::wstring::npos;
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  LimitOffset<std::wstring>(0));
     return url_string;
   }
 
@@ -886,15 +968,17 @@ std::wstring FormatUrlInternal(const GURL& url,
   if (url.SchemeIs(kViewSource) &&
       !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
     return FormatViewSourceUrl(url, languages, format_types,
-        unescape_rules, new_parsed, prefix_end, offset_for_adjustment);
+        unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
   }
 
   // We handle both valid and invalid URLs (this will give us the spec
   // regardless of validity).
   const std::string& spec = url.possibly_invalid_spec();
   const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
-  if (*offset_for_adjustment >= spec.length())
-    *offset_for_adjustment = std::wstring::npos;
+  size_t spec_length = spec.length();
+  std::for_each(offsets_for_adjustment->begin(),
+                offsets_for_adjustment->end(),
+                LimitOffset<std::wstring>(spec_length));
 
   // Copy everything before the username (the scheme and the separators.)
   // These are ASCII.
@@ -922,48 +1006,47 @@ std::wstring FormatUrlInternal(const GURL& url,
     // e.g. "http://google.com:search@evil.ru/"
     new_parsed->username.reset();
     new_parsed->password.reset();
-    if ((*offset_for_adjustment != std::wstring::npos) &&
+    // Update the offsets based on removed username and/or password.
+    if (!offsets_for_adjustment->empty() &&
         (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
+      AdjustOffset::Adjustments adjustments;
       if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
         // The seeming off-by-one and off-by-two in these first two lines are to
         // account for the ':' after the username and '@' after the password.
-        if (*offset_for_adjustment >
-            static_cast<size_t>(parsed.password.end())) {
-          *offset_for_adjustment -=
-              (parsed.username.len + parsed.password.len + 2);
-        } else if (*offset_for_adjustment >
-                   static_cast<size_t>(parsed.username.begin)) {
-          *offset_for_adjustment = std::wstring::npos;
-        }
+        adjustments.push_back(AdjustOffset::Adjustment(
+            static_cast<size_t>(parsed.username.begin),
+            static_cast<size_t>(parsed.username.len + parsed.password.len +
+                2), 0));
       } else {
         const url_parse::Component* nonempty_component =
             parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
-        // The seeming off-by-one in these first two lines is to account for the
-        // '@' after the username/password.
-        if (*offset_for_adjustment >
-            static_cast<size_t>(nonempty_component->end())) {
-          *offset_for_adjustment -= (nonempty_component->len + 1);
-        } else if (*offset_for_adjustment >
-                   static_cast<size_t>(nonempty_component->begin)) {
-          *offset_for_adjustment = std::wstring::npos;
-        }
+        // The seeming off-by-one in below is to account for the '@' after the
+        // username/password.
+        adjustments.push_back(AdjustOffset::Adjustment(
+            static_cast<size_t>(nonempty_component->begin),
+            static_cast<size_t>(nonempty_component->len + 1), 0));
       }
+
+      // Make offset adjustment.
+      std::for_each(offsets_for_adjustment->begin(),
+                    offsets_for_adjustment->end(),
+                    AdjustOffset(adjustments));
     }
   } else {
     AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
-                             &new_parsed->username, offset_for_adjustment);
+                             &new_parsed->username, offsets_for_adjustment);
     if (parsed.password.is_valid())
       url_string.push_back(':');
     AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
-                             &new_parsed->password, offset_for_adjustment);
+                             &new_parsed->password, offsets_for_adjustment);
     if (parsed.username.is_valid() || parsed.password.is_valid())
       url_string.push_back('@');
   }
   if (prefix_end)
     *prefix_end = static_cast<size_t>(url_string.length());
 
-  AppendFormattedHost(url, languages, &url_string, new_parsed,
-                      offset_for_adjustment);
+  AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
+                                 offsets_for_adjustment);
 
   // Port.
   if (parsed.port.is_nonempty()) {
@@ -981,41 +1064,35 @@ std::wstring FormatUrlInternal(const GURL& url,
   if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
       !CanStripTrailingSlash(url)) {
     AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
-                             &new_parsed->path, offset_for_adjustment);
+                             &new_parsed->path, offsets_for_adjustment);
   }
   if (parsed.query.is_valid())
     url_string.push_back('?');
   AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
-                           &new_parsed->query, offset_for_adjustment);
+                           &new_parsed->query, offsets_for_adjustment);
 
   // Reference is stored in valid, unescaped UTF-8, so we can just convert.
   if (parsed.ref.is_valid()) {
     url_string.push_back('#');
-    new_parsed->ref.begin = url_string.length();
-    size_t offset_past_current_output =
-        ((*offset_for_adjustment == std::wstring::npos) ||
-         (*offset_for_adjustment < url_string.length())) ?
-            std::wstring::npos : (*offset_for_adjustment - url_string.length());
-    size_t* offset_into_ref =
-        (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?
-            NULL : &offset_past_current_output;
+    size_t ref_begin = url_string.length();
+    new_parsed->ref.begin = static_cast<int>(ref_begin);
+
+    // Compose a list of offsets within the section.
+    std::vector<size_t> offsets_into_ref =
+        OffsetsIntoSection(offsets_for_adjustment, ref_begin);
+
     if (parsed.ref.len > 0) {
-      url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,
-                                                              parsed.ref.len),
-                                                  offset_into_ref));
-    }
-    new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;
-    if (offset_into_ref) {
-      *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ?
-          std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);
-    } else if (offset_past_current_output != std::wstring::npos) {
-      // We clamped the offset near the beginning of this function to ensure it
-      // was within the input URL.  If we reach here, the input was something
-      // invalid and non-parseable such that the offset was past any component
-      // we could figure out.  In this case it won't be represented in the
-      // output string, so reset it.
-      *offset_for_adjustment = std::wstring::npos;
+      url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,
+                                                               parsed.ref.len),
+                                                   &offsets_into_ref));
     }
+    size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
+    size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
+    new_parsed->ref.len = static_cast<int>(new_ref_len);
+
+    // Apply offset adjustments.
+    ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
+        old_ref_len, new_ref_len, ref_begin);
   }
 
   // If we need to strip out http do it after the fact. This way we don't need
@@ -1023,12 +1100,11 @@ std::wstring FormatUrlInternal(const GURL& url,
   const size_t kHTTPSize = arraysize(kHTTP) - 1;
   if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {
     url_string = url_string.substr(kHTTPSize);
-    if (*offset_for_adjustment != std::wstring::npos) {
-      if (*offset_for_adjustment < kHTTPSize)
-        *offset_for_adjustment = std::wstring::npos;
-      else
-        *offset_for_adjustment -= kHTTPSize;
-    }
+    AdjustOffset::Adjustments adjustments;
+    adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  AdjustOffset(adjustments));
     if (prefix_end)
       *prefix_end -= kHTTPSize;
 
@@ -1186,21 +1262,20 @@ std::string GetHeaderParamValue(const std::string& field,
 //
 // We may want to skip this step in the case of file URLs to allow unicode
 // UNC hostnames regardless of encodings.
-std::wstring IDNToUnicode(const char* host,
-                          size_t host_len,
-                          const std::wstring& languages,
-                          size_t* offset_for_adjustment) {
+std::wstring IDNToUnicodeWithOffsets(
+    const char* host,
+    size_t host_len,
+    const std::wstring& languages,
+    std::vector<size_t>* offsets_for_adjustment) {
   // Convert the ASCII input to a wide string for ICU.
   string16 input16;
   input16.reserve(host_len);
   input16.insert(input16.end(), host, host + host_len);
 
-  string16 out16;
-  size_t output_offset = offset_for_adjustment ?
-      *offset_for_adjustment : std::wstring::npos;
-
   // Do each component of the host separately, since we enforce script matching
   // on a per-component basis.
+  AdjustOffset::Adjustments adjustments;
+  string16 out16;
   for (size_t component_start = 0, component_end;
        component_start < input16.length();
        component_start = component_end + 1) {
@@ -1209,22 +1284,18 @@ std::wstring IDNToUnicode(const char* host,
     if (component_end == string16::npos)
       component_end = input16.length();  // For getting the last component.
     size_t component_length = component_end - component_start;
-
-    size_t output_component_start = out16.length();
+    size_t new_component_start = out16.length();
     bool converted_idn = false;
     if (component_end > component_start) {
       // Add the substring that we just found.
       converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
           component_length, languages, &out16);
     }
-    size_t output_component_length = out16.length() - output_component_start;
+    size_t new_component_length = out16.length() - new_component_start;
 
-    if ((output_offset != std::wstring::npos) &&
-        (*offset_for_adjustment > component_start)) {
-      if ((*offset_for_adjustment < component_end) && converted_idn)
-        output_offset = std::wstring::npos;
-      else
-        output_offset += output_component_length - component_length;
+    if (converted_idn && offsets_for_adjustment) {
+      adjustments.push_back(AdjustOffset::Adjustment(
+          component_start, component_length, new_component_length));
     }
 
     // Need to add the dot we just found (if we found one).
@@ -1232,10 +1303,28 @@ std::wstring IDNToUnicode(const char* host,
       out16.push_back('.');
   }
 
-  if (offset_for_adjustment)
-    *offset_for_adjustment = output_offset;
+  // Make offset adjustment.
+  if (offsets_for_adjustment && !adjustments.empty()) {
+    std::for_each(offsets_for_adjustment->begin(),
+                  offsets_for_adjustment->end(),
+                  AdjustOffset(adjustments));
+  }
 
-  return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);
+  return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);
+}
+
+std::wstring IDNToUnicode(const char* host,
+                          size_t host_len,
+                          const std::wstring& languages,
+                          size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  std::wstring result =
+      IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return result;
 }
 
 std::string CanonicalizeHost(const std::string& host,
@@ -1648,51 +1737,73 @@ std::string GetHostOrSpecFromURL(const GURL& url) {
   return url.has_host() ? TrimEndingDot(url.host()) : url.spec();
 }
 
-void AppendFormattedHost(const GURL& url,
-                         const std::wstring& languages,
-                         std::wstring* output,
-                         url_parse::Parsed* new_parsed,
-                         size_t* offset_for_adjustment) {
+void AppendFormattedHostWithOffsets(
+    const GURL& url,
+    const std::wstring& languages,
+    std::wstring* output,
+    url_parse::Parsed* new_parsed,
+    std::vector<size_t>* offsets_for_adjustment) {
   DCHECK(output);
   const url_parse::Component& host =
       url.parsed_for_possibly_invalid_spec().host;
 
   if (host.is_nonempty()) {
     // Handle possible IDN in the host name.
-    int new_host_begin = static_cast<int>(output->length());
+    size_t host_begin = output->length();
     if (new_parsed)
-      new_parsed->host.begin = new_host_begin;
-    size_t offset_past_current_output =
-        (!offset_for_adjustment ||
-         (*offset_for_adjustment == std::wstring::npos) ||
-         (*offset_for_adjustment < output->length())) ?
-            std::wstring::npos : (*offset_for_adjustment - output->length());
-    size_t* offset_into_host =
-        (offset_past_current_output >= static_cast<size_t>(host.len)) ?
-            NULL : &offset_past_current_output;
+      new_parsed->host.begin = static_cast<int>(host_begin);
+    size_t old_host_len = static_cast<size_t>(host.len);
+
+    // Compose a list of offsets within the host area.
+    std::vector<size_t> offsets_into_host =
+        OffsetsIntoSection(offsets_for_adjustment, host_begin);
 
     const std::string& spec = url.possibly_invalid_spec();
     DCHECK(host.begin >= 0 &&
            ((spec.length() == 0 && host.begin == 0) ||
             host.begin < static_cast<int>(spec.length())));
-    output->append(IDNToUnicode(&spec[host.begin],
-                   static_cast<size_t>(host.len), languages, offset_into_host));
+    output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,
+                                            languages, &offsets_into_host));
 
-    int new_host_len = static_cast<int>(output->length()) - new_host_begin;
+    size_t new_host_len = output->length() - host_begin;
     if (new_parsed)
-      new_parsed->host.len = new_host_len;
-    if (offset_into_host) {
-      *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ?
-          std::wstring::npos : (new_host_begin + *offset_into_host);
-    } else if (offset_past_current_output != std::wstring::npos) {
-      *offset_for_adjustment += new_host_len - host.len;
-    }
+      new_parsed->host.len = static_cast<int>(new_host_len);
+
+    // Apply offset adjustments.
+    ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,
+        old_host_len, new_host_len, host_begin);
   } else if (new_parsed) {
     new_parsed->host.reset();
   }
 }
 
+void AppendFormattedHost(const GURL& url,
+                         const std::wstring& languages,
+                         std::wstring* output,
+                         url_parse::Parsed* new_parsed,
+                         size_t* offset_for_adjustment) {
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+}
+
 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|.
+string16 FormatUrlWithOffsets(const GURL& url,
+                              const std::string& languages,
+                              FormatUrlTypes format_types,
+                              UnescapeRule::Type unescape_rules,
+                              url_parse::Parsed* new_parsed,
+                              size_t* prefix_end,
+                              std::vector<size_t>* offsets_for_adjustment) {
+  return WideToUTF16Hack(
+      FormatUrlInternal(url, ASCIIToWide(languages), format_types,
+                        unescape_rules, new_parsed, prefix_end,
+                        offsets_for_adjustment));
+}
+
 string16 FormatUrl(const GURL& url,
                    const std::string& languages,
                    FormatUrlTypes format_types,
@@ -1700,10 +1811,15 @@ string16 FormatUrl(const GURL& url,
                    url_parse::Parsed* new_parsed,
                    size_t* prefix_end,
                    size_t* offset_for_adjustment) {
-  return WideToUTF16Hack(
+  std::vector<size_t> offsets;
+  if (offset_for_adjustment)
+    offsets.push_back(*offset_for_adjustment);
+  string16 result = WideToUTF16Hack(
       FormatUrlInternal(url, ASCIIToWide(languages), format_types,
-                        unescape_rules, new_parsed, prefix_end,
-                        offset_for_adjustment));
+                        unescape_rules, new_parsed, prefix_end, &offsets));
+  if (offset_for_adjustment)
+    *offset_for_adjustment = offsets[0];
+  return result;
 }
 
 bool CanStripTrailingSlash(const GURL& url) {
@@ -2156,4 +2272,12 @@ NetworkInterface::NetworkInterface(const std::string& name,
 NetworkInterface::~NetworkInterface() {
 }
 
+ClampComponentOffset::ClampComponentOffset(size_t component_start)
+  : component_start(component_start) {}
+
+size_t ClampComponentOffset::operator()(size_t offset) {
+  return (offset >= component_start) ?
+      offset : std::wstring::npos;
+}
+
 }  // namespace net
diff --git a/net/base/net_util.h b/net/base/net_util.h
index bae27c3..0ff3369 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -198,15 +198,21 @@ std::string GetFileNameFromCD(const std::string& header,
 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
 // When |languages| is empty, even that mixing is not allowed.
 //
-// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
-// point at the same logical place in the output string. If this isn't possible
-// because it points past the end of |host| or into the middle of a punycode
-// sequence, it will be set to std::wstring::npos.  |offset_for_adjustment| may
-// be NULL.
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be adjusted to point at the same logical
+// place in the result strings during decoding.  If this isn't possible because
+// an offset points past the end of |host| or into the middle of a punycode
+// sequence, the offending offset will be set to std::wstring::npos.
+// |offset[s]_for_adjustment| may be NULL.
 std::wstring IDNToUnicode(const char* host,
                           size_t host_len,
                           const std::wstring& languages,
                           size_t* offset_for_adjustment);
+std::wstring IDNToUnicodeWithOffsets(
+    const char* host,
+    size_t host_len,
+    const std::wstring& languages,
+    std::vector<size_t>* offsets_for_adjustment);
 
 // Canonicalizes |host| and returns it.  Also fills |host_info| with
 // IP address information.  |host_info| must not be NULL.
@@ -292,11 +298,24 @@ int SetNonBlocking(int fd);
 // the user. The given parsed structure will be updated. The host name formatter
 // also takes the same accept languages component as ElideURL. |new_parsed| may
 // be null.
+//
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be adjusted to point at the same logical
+// place in the result strings after reformatting of the host.  If this isn't
+// possible because an offset points past the end of the host or into the middle
+// of a multi-character sequence, the offending offset will be set to
+// std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
 void AppendFormattedHost(const GURL& url,
                          const std::wstring& languages,
                          std::wstring* output,
                          url_parse::Parsed* new_parsed,
                          size_t* offset_for_adjustment);
+void AppendFormattedHostWithOffsets(
+    const GURL& url,
+    const std::wstring& languages,
+    std::wstring* output,
+    url_parse::Parsed* new_parsed,
+    std::vector<size_t>* offsets_for_adjustment);
 
 // Creates a string representation of |url|. The IDN host name may be in Unicode
 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
@@ -309,12 +328,13 @@ void AppendFormattedHost(const GURL& url,
 // The last three parameters may be NULL.
 // |new_parsed| will be set to the parsing parameters of the resultant URL.
 // |prefix_end| will be the length before the hostname of the resultant URL.
-// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
-// will be modified to reflect changes this function makes to the output string;
-// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
-// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
-// the output string will be "http://c.com/" and |offset_for_adjustment| will be
-// 8.  If the offset cannot be successfully adjusted (e.g. because it points
+//
+// (|offset[s]_for_adjustment|) specifies one or more offsets into the original
+// |url|'s spec(); each offset will be modified to reflect changes this function
+// makes to the output string. For example, if |url| is "http://a:b@c.com/",
+// |omit_username_password| is true, and an offset is 12 (the offset of '.'),
+// then on return the output string will be "http://c.com/" and the offset will
+// be 8.  If an offset cannot be successfully adjusted (e.g. because it points
 // into the middle of a component that was entirely removed, past the end of the
 // string, or into the middle of an encoding sequence), it will be set to
 // string16::npos.
@@ -325,6 +345,13 @@ string16 FormatUrl(const GURL& url,
                    url_parse::Parsed* new_parsed,
                    size_t* prefix_end,
                    size_t* offset_for_adjustment);
+string16 FormatUrlWithOffsets(const GURL& url,
+                              const std::string& languages,
+                              FormatUrlTypes format_types,
+                              UnescapeRule::Type unescape_rules,
+                              url_parse::Parsed* new_parsed,
+                              size_t* prefix_end,
+                              std::vector<size_t>* offsets_for_adjustment);
 
 // This is a convenience function for FormatUrl() with
 // format_types = kFormatUrlOmitAll and unescape = SPACES.  This is the typical
@@ -454,6 +481,16 @@ typedef std::list<NetworkInterface> NetworkInterfaceList;
 // Can be called only on a thread that allows IO.
 bool GetNetworkList(NetworkInterfaceList* networks);
 
+// Private adjustment function called by std::transform which sets the offset
+// to npos if the offset occurs at or before |component_start|, otherwise don't
+// alter the offset. Exposed here for unit testing.
+struct ClampComponentOffset {
+  explicit ClampComponentOffset(size_t component_start);
+  size_t operator()(size_t offset);
+
+  const size_t component_start;
+};
+
 }  // namespace net
 
 #endif  // NET_BASE_NET_UTIL_H_
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index b547f83..4265866 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -4,6 +4,8 @@
 
 #include "net/base/net_util.h"
 
+#include <algorithm>
+
 #include "base/file_path.h"
 #include "base/format_macros.h"
 #include "base/string_number_conversions.h"
@@ -21,6 +23,8 @@ namespace net {
 
 namespace {
 
+static const size_t kNpos = string16::npos;
+
 struct FileCase {
   const wchar_t* file;
   const char* url;
@@ -988,6 +992,20 @@ TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
                       &offset);
     EXPECT_EQ(adjust_cases[i].output_offset, offset);
   }
+
+  std::vector<size_t> offsets;
+  for (size_t i = 0; i < 40; ++i)
+    offsets.push_back(i);
+  IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
+                          L"zh-CN", &offsets);
+  size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
+                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
+                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
+                       16, kNpos};
+  ASSERT_EQ(40U, arraysize(expected));
+  for (size_t i = 0; i < 40; ++i)
+    EXPECT_EQ(expected[i], offsets[i]);
 }
 
 TEST(NetUtilTest, CompliantHost) {
@@ -1799,13 +1817,24 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(basic_cases[i].output_offset, offset);
   }
 
+  size_t url_size = 26;
+  std::vector<size_t> offsets;
+  for (size_t i = 0; i < url_size + 1; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
+                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+                       NULL, NULL, &offsets);
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(i, offsets[i]);
+  EXPECT_EQ(kNpos, offsets[url_size]);
+
   const struct {
     const char* input_url;
     size_t input_offset;
     size_t output_offset;
   } omit_auth_cases[] = {
     {"http://foo:bar@www.google.com/", 6, 6},
-    {"http://foo:bar@www.google.com/", 7, 7},
+    {"http://foo:bar@www.google.com/", 7, string16::npos},
     {"http://foo:bar@www.google.com/", 8, string16::npos},
     {"http://foo:bar@www.google.com/", 10, string16::npos},
     {"http://foo:bar@www.google.com/", 11, string16::npos},
@@ -1823,13 +1852,28 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
   }
 
+  url_size = 30;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
+                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+                       NULL, NULL, &offsets);
+  for (size_t i = 0; i < 7; ++i)
+    EXPECT_EQ(i, offsets[i]);
+  for (size_t i = 7; i < 15; ++i)
+    EXPECT_EQ(kNpos, offsets[i]);
+  for (size_t i = 16; i < url_size; ++i)
+    EXPECT_EQ(i - 8 , offsets[i]);
+
   const AdjustOffsetCase view_source_cases[] = {
     {0, 0},
     {3, 3},
     {11, 11},
     {12, 12},
     {13, 13},
-    {19, 19},
+    {18, 18},
+    {19, string16::npos},
     {20, string16::npos},
     {23, 19},
     {26, 22},
@@ -1843,6 +1887,20 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(view_source_cases[i].output_offset, offset);
   }
 
+  url_size = 38;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
+                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+                       NULL, NULL, &offsets);
+  size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                       17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
+                       24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
+  ASSERT_EQ(url_size, arraysize(expected));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected[i], offsets[i]);
+
   const AdjustOffsetCase idn_hostname_cases[] = {
     {8, string16::npos},
     {16, string16::npos},
@@ -1859,6 +1917,21 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
   }
 
+  url_size = 33;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
+                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
+                       NULL, NULL, &offsets);
+  size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
+                         17, 18, 19};
+  ASSERT_EQ(url_size, arraysize(expected_1));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_1[i], offsets[i]);
+
   const AdjustOffsetCase unescape_cases[] = {
     {25, 25},
     {26, string16::npos},
@@ -1881,11 +1954,31 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(unescape_cases[i].output_offset, offset);
   }
 
+  url_size = 68;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL(
+      "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
+      "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
+      &offsets);
+  size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
+                         26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
+  ASSERT_EQ(url_size, arraysize(expected_2));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_2[i], offsets[i]);
+
   const AdjustOffsetCase ref_cases[] = {
     {30, 30},
     {31, 31},
     {32, string16::npos},
     {34, 32},
+    {35, string16::npos},
     {37, 33},
     {38, string16::npos},
   };
@@ -1899,6 +1992,22 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(ref_cases[i].output_offset, offset);
   }
 
+  url_size = 38;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  // "http://www.google.com/foo.html#\x30B0\x30B0z"
+  FormatUrlWithOffsets(GURL(
+      "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
+      kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
+      &offsets);
+  size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+                         30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
+  ASSERT_EQ(url_size, arraysize(expected_3));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_3[i], offsets[i]);
+
   const AdjustOffsetCase omit_http_cases[] = {
     {0, string16::npos},
     {3, string16::npos},
@@ -1912,6 +2021,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(omit_http_cases[i].output_offset, offset);
   }
 
+  url_size = 23;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
+      kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
+  size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
+                         2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
+  ASSERT_EQ(url_size, arraysize(expected_4));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_4[i], offsets[i]);
+
   const AdjustOffsetCase omit_http_start_with_ftp[] = {
     {0, 0},
     {3, 3},
@@ -1924,6 +2045,18 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
     EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
   }
 
+  url_size = 23;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
+      kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
+  size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                         16, 17, 18, 19, 20, 21, kNpos};
+  ASSERT_EQ(url_size, arraysize(expected_5));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_5[i], offsets[i]);
+
   const AdjustOffsetCase omit_all_cases[] = {
     {12, 0},
     {13, 1},
@@ -1936,6 +2069,19 @@ TEST(NetUtilTest, FormatUrlAdjustOffset) {
                    UnescapeRule::NORMAL, NULL, NULL, &offset);
     EXPECT_EQ(omit_all_cases[i].output_offset, offset);
   }
+
+  url_size = 21;
+  offsets.clear();
+  for (size_t i = 0; i < url_size; ++i)
+    offsets.push_back(i);
+  FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
+                       UnescapeRule::NORMAL, NULL, NULL, &offsets);
+  size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
+                         kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
+                         kNpos};
+  ASSERT_EQ(url_size, arraysize(expected_6));
+  for (size_t i = 0; i < url_size; ++i)
+    EXPECT_EQ(expected_6[i], offsets[i]);
 }
 
 TEST(NetUtilTest, SimplifyUrlForRequest) {
@@ -2221,4 +2367,20 @@ TEST(NetUtilTest, GetNetworkList) {
   }
 }
 
+TEST(NetUtilTest, AdjustComponentOffset) {
+  std::vector<size_t> old_offsets;
+  for (size_t i = 0; i < 10; ++i)
+    old_offsets.push_back(i);
+  std::vector<size_t> new_offsets;
+  std::transform(old_offsets.begin(),
+                 old_offsets.end(),
+                 std::back_inserter(new_offsets),
+                 ClampComponentOffset(5));
+  size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
+  EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
+  EXPECT_EQ(new_offsets.size(), old_offsets.size());
+  for (size_t i = 0; i < arraysize(expected_1); ++i)
+    EXPECT_EQ(expected_1[i], new_offsets[i]);
+}
+
 }  // namespace net
author	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
committer	mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-04-13 00:45:39 +0000
commit	a47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
tree	fdf872770d4cd58ee753f219475850490a008f6d /net
parent	2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
download	chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2