32 files changed, 1393 insertions, 747 deletions
diff --git a/app/gfx/text_elider.cc b/app/gfx/text_elider.cc
index a1db1c6..dc9b199 100644
--- a/app/gfx/text_elider.cc
+++ b/app/gfx/text_elider.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -33,8 +33,8 @@ std::wstring ElideUrl(const GURL& url,
                       const std::wstring& languages) {
   // Get a formatted string and corresponding parsing of the url.
   url_parse::Parsed parsed;
-  std::wstring url_string =
-      net::FormatUrl(url, languages, true, UnescapeRule::SPACES, &parsed, NULL);
+  std::wstring url_string = net::FormatUrl(url, languages, true,
+      UnescapeRule::SPACES, &parsed, NULL, NULL);
   if (available_pixel_width <= 0)
     return url_string;
 
@@ -334,12 +334,12 @@ std::wstring ElideText(const std::wstring& text,
 SortedDisplayURL::SortedDisplayURL(const GURL& url,
                                    const std::wstring& languages) {
   std::wstring host;
-  net::AppendFormattedHost(url, languages, &host, NULL);
+  net::AppendFormattedHost(url, languages, &host, NULL, NULL);
   sort_host_ = WideToUTF16Hack(host);
   string16 host_minus_www = WideToUTF16Hack(net::StripWWW(host));
   url_parse::Parsed parsed;
   display_url_ = WideToUTF16Hack(net::FormatUrl(url, languages,
-      true, UnescapeRule::SPACES, &parsed, &prefix_end_));
+      true, UnescapeRule::SPACES, &parsed, &prefix_end_, NULL));
   if (sort_host_.length() > host_minus_www.length()) {
     prefix_end_ += sort_host_.length() - host_minus_www.length();
     sort_host_.swap(host_minus_www);
diff --git a/base/base.gyp b/base/base.gyp
index 71ff640..f09e2e5 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -633,6 +633,7 @@
         'timer_unittest.cc',
         'tracked_objects_unittest.cc',
         'tuple_unittest.cc',
+        'utf_string_conversions_unittest.cc',
         'values_unittest.cc',
         'version_unittest.cc',
         'waitable_event_unittest.cc',
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc
index ba9f9ae..c93b103 100644
--- a/base/i18n/icu_string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -157,6 +157,90 @@ const char kCodepageUTF16LE[] = "UTF-16LE";
 
 // Codepage <-> Wide/UTF-16  ---------------------------------------------------
 
+// Convert a UTF-16 string into the specified codepage_name.  If the codepage
+// isn't found, return false.
+bool UTF16ToCodepage(const string16& utf16,
+                     const char* codepage_name,
+                     OnStringConversionError::Type on_error,
+                     std::string* encoded) {
+  encoded->clear();
+
+  UErrorCode status = U_ZERO_ERROR;
+  UConverter* converter = ucnv_open(codepage_name, &status);
+  if (!U_SUCCESS(status))
+    return false;
+
+  return ConvertFromUTF16(converter, utf16.c_str(),
+                          static_cast<int>(utf16.length()), on_error, encoded);
+}
+
+bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
+                                    const char* codepage_name,
+                                    OnStringConversionError::Type on_error,
+                                    string16* utf16,
+                                    size_t* offset_for_adjustment) {
+  utf16->clear();
+
+  UErrorCode status = U_ZERO_ERROR;
+  UConverter* converter = ucnv_open(codepage_name, &status);
+  if (!U_SUCCESS(status))
+    return false;
+
+  // Even in the worst case, the maximum length in 2-byte units of UTF-16
+  // output would be at most the same as the number of bytes in input. There
+  // is no single-byte encoding in which a character is mapped to a
+  // non-BMP character requiring two 2-byte units.
+  //
+  // Moreover, non-BMP characters in legacy multibyte encodings
+  // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
+  // BOCU and SCSU, but we don't care about them.
+  size_t uchar_max_length = encoded.length() + 1;
+
+  SetUpErrorHandlerForToUChars(on_error, converter, &status);
+  char16* byte_buffer = WriteInto(utf16, uchar_max_length);
+  int byte_buffer_length = static_cast<int>(uchar_max_length);
+  const char* data = encoded.data();
+  int length = static_cast<int>(encoded.length());
+  int actual_size = 0;
+  if (offset_for_adjustment) {
+    if (*offset_for_adjustment >= encoded.length()) {
+      *offset_for_adjustment = string16::npos;
+    } else if (*offset_for_adjustment != 0) {
+      // Try to adjust the offset by converting the string in two pieces and
+      // using the length of the first piece as the adjusted offset.
+      actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
+          data, static_cast<int>(*offset_for_adjustment), &status);
+      if (U_SUCCESS(status)) {
+        // Conversion succeeded, so update the offset and then fall through to
+        // appending the second half of the string.
+        data += *offset_for_adjustment;
+        length -= *offset_for_adjustment;
+        *offset_for_adjustment = actual_size;
+        byte_buffer += actual_size;
+        byte_buffer_length -= actual_size;
+      } else {
+        // The offset may have been in the middle of an encoding sequence; mark
+        // it as having failed to adjust and then try to convert the entire
+        // string.
+        *offset_for_adjustment = string16::npos;
+        actual_size = 0;
+        ucnv_reset(converter);
+        status = U_ZERO_ERROR;
+      }
+    }
+  }
+  actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
+                               length, &status);
+  ucnv_close(converter);
+  if (!U_SUCCESS(status)) {
+    utf16->clear();  // Make sure the output is empty on error.
+    return false;
+  }
+
+  utf16->resize(actual_size);
+  return true;
+}
+
 // Convert a wstring into the specified codepage_name.  If the codepage
 // isn't found, return false.
 bool WideToCodepage(const std::wstring& wide,
@@ -188,31 +272,16 @@ bool WideToCodepage(const std::wstring& wide,
 #endif  // defined(WCHAR_T_IS_UTF32)
 }
 
-// Convert a UTF-16 string into the specified codepage_name.  If the codepage
-// isn't found, return false.
-bool UTF16ToCodepage(const string16& utf16,
-                    const char* codepage_name,
-                    OnStringConversionError::Type on_error,
-                    std::string* encoded) {
-  encoded->clear();
-
-  UErrorCode status = U_ZERO_ERROR;
-  UConverter* converter = ucnv_open(codepage_name, &status);
-  if (!U_SUCCESS(status))
-    return false;
-
-  return ConvertFromUTF16(converter, utf16.c_str(),
-                          static_cast<int>(utf16.length()), on_error, encoded);
-}
-
 // Converts a string of the given codepage into wstring.
 // If the codepage isn't found, return false.
-bool CodepageToWide(const std::string& encoded,
-                    const char* codepage_name,
-                    OnStringConversionError::Type on_error,
-                    std::wstring* wide) {
+bool CodepageToWideAndAdjustOffset(const std::string& encoded,
+                                   const char* codepage_name,
+                                   OnStringConversionError::Type on_error,
+                                   std::wstring* wide,
+                                   size_t* offset_for_adjustment) {
 #if defined(WCHAR_T_IS_UTF16)
-  return CodepageToUTF16(encoded, codepage_name, on_error, wide);
+  return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
+                                        offset_for_adjustment);
 #elif defined(WCHAR_T_IS_UTF32)
   wide->clear();
 
@@ -227,70 +296,53 @@ bool CodepageToWide(const std::string& encoded,
   // this can be 4 times larger than actually needed.
   size_t wchar_max_length = encoded.length() + 1;
 
-  // The byte buffer and its length to pass to ucnv_toAlgorithimic.
-  char* byte_buffer = reinterpret_cast<char*>(
-      WriteInto(wide, wchar_max_length));
-  int byte_buffer_length = static_cast<int>(wchar_max_length) * 4;
-
   SetUpErrorHandlerForToUChars(on_error, converter, &status);
-  int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(),
-                                       converter,
-                                       byte_buffer,
-                                       byte_buffer_length,
-                                       encoded.data(),
-                                       static_cast<int>(encoded.length()),
-                                       &status);
+  char* byte_buffer =
+      reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
+  int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
+  const char* data = encoded.data();
+  int length = static_cast<int>(encoded.length());
+  int actual_size = 0;
+  if (offset_for_adjustment) {
+    if (*offset_for_adjustment >= encoded.length()) {
+      *offset_for_adjustment = std::wstring::npos;
+    } else if (*offset_for_adjustment != 0) {
+      // Try to adjust the offset by converting the string in two pieces and
+      // using the length of the first piece as the adjusted offset.
+      actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+          byte_buffer, byte_buffer_length, data,
+          static_cast<int>(*offset_for_adjustment), &status);
+      if (U_SUCCESS(status)) {
+        // Conversion succeeded, so update the offset and then fall through to
+        // appending the second half of the string.
+        data += *offset_for_adjustment;
+        length -= *offset_for_adjustment;
+        *offset_for_adjustment = actual_size / sizeof(wchar_t);
+        byte_buffer += actual_size;
+        byte_buffer_length -= actual_size;
+      } else {
+        // The offset may have been in the middle of an encoding sequence; mark
+        // it as having failed to adjust and then try to convert the entire
+        // string.
+        *offset_for_adjustment = std::wstring::npos;
+        actual_size = 0;
+        ucnv_reset(converter);
+        status = U_ZERO_ERROR;
+      }
+    }
+  }
+  actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+      byte_buffer, byte_buffer_length, data, length, &status);
   ucnv_close(converter);
-
   if (!U_SUCCESS(status)) {
     wide->clear();  // Make sure the output is empty on error.
     return false;
   }
 
   // actual_size is # of bytes.
-  wide->resize(actual_size / 4);
+  wide->resize(actual_size / sizeof(wchar_t));
   return true;
 #endif  // defined(WCHAR_T_IS_UTF32)
 }
 
-// Converts a string of the given codepage into UTF-16.
-// If the codepage isn't found, return false.
-bool CodepageToUTF16(const std::string& encoded,
-                     const char* codepage_name,
-                     OnStringConversionError::Type on_error,
-                     string16* utf16) {
-  utf16->clear();
-
-  UErrorCode status = U_ZERO_ERROR;
-  UConverter* converter = ucnv_open(codepage_name, &status);
-  if (!U_SUCCESS(status))
-    return false;
-
-  // Even in the worst case, the maximum length in 2-byte units of UTF-16
-  // output would be at most the same as the number of bytes in input. There
-  // is no single-byte encoding in which a character is mapped to a
-  // non-BMP character requiring two 2-byte units.
-  //
-  // Moreover, non-BMP characters in legacy multibyte encodings
-  // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
-  // BOCU and SCSU, but we don't care about them.
-  size_t uchar_max_length = encoded.length() + 1;
-
-  SetUpErrorHandlerForToUChars(on_error, converter, &status);
-  int actual_size = ucnv_toUChars(converter,
-                                  WriteInto(utf16, uchar_max_length),
-                                  static_cast<int>(uchar_max_length),
-                                  encoded.data(),
-                                  static_cast<int>(encoded.length()),
-                                  &status);
-  ucnv_close(converter);
-  if (!U_SUCCESS(status)) {
-    utf16->clear();  // Make sure the output is empty on error.
-    return false;
-  }
-
-  utf16->resize(actual_size);
-  return true;
-}
-
 }  // namespace base
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
index e7dac605..6f2cab7 100644
--- a/base/i18n/icu_string_conversions.h
+++ b/base/i18n/icu_string_conversions.h
@@ -40,6 +40,17 @@ extern const char kCodepageUTF8[];
 extern const char kCodepageUTF16BE[];
 extern const char kCodepageUTF16LE[];
 
+// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|,
+// which will be adjusted to point at the same logical place in |utf16|.  If
+// this isn't possible because it points past the end of |encoded| or into the
+// middle of a multibyte sequence, it will be set to std::string16::npos.
+// |offset_for_adjustment| may be NULL.
+bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
+                                    const char* codepage_name,
+                                    OnStringConversionError::Type on_error,
+                                    string16* utf16,
+                                    size_t* offset_for_adjustment);
+
 // Converts between UTF-16 strings and the encoding specified.  If the
 // encoding doesn't exist or the encoding fails (when on_error is FAIL),
 // returns false.
@@ -47,11 +58,24 @@ bool UTF16ToCodepage(const string16& utf16,
                      const char* codepage_name,
                      OnStringConversionError::Type on_error,
                      std::string* encoded);
+inline bool CodepageToUTF16(const std::string& encoded,
+                            const char* codepage_name,
+                            OnStringConversionError::Type on_error,
+                            string16* utf16) {
+  return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16,
+                                        NULL);
+}
 
-bool CodepageToUTF16(const std::string& encoded,
-                     const char* codepage_name,
-                     OnStringConversionError::Type on_error,
-                     string16* utf16);
+// Like CodepageToWide() (see below), but also takes an offset into |encoded|,
+// which will be adjusted to point at the same logical place in |wide|.  If
+// this isn't possible because it points past the end of |encoded| or into the
+// middle of a multibyte sequence, it will be set to std::wstring::npos.
+// |offset_for_adjustment| may be NULL.
+bool CodepageToWideAndAdjustOffset(const std::string& encoded,
+                                   const char* codepage_name,
+                                   OnStringConversionError::Type on_error,
+                                   std::wstring* wide,
+                                   size_t* offset_for_adjustment);
 
 // Converts between wide strings and the encoding specified.  If the
 // encoding doesn't exist or the encoding fails (when on_error is FAIL),
@@ -60,10 +84,13 @@ bool WideToCodepage(const std::wstring& wide,
                     const char* codepage_name,
                     OnStringConversionError::Type on_error,
                     std::string* encoded);
-bool CodepageToWide(const std::string& encoded,
-                    const char* codepage_name,
-                    OnStringConversionError::Type on_error,
-                    std::wstring* wide);
+inline bool CodepageToWide(const std::string& encoded,
+                           const char* codepage_name,
+                           OnStringConversionError::Type on_error,
+                           std::wstring* wide) {
+  return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide,
+                                       NULL);
+}
 
 }  // namespace base
 
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc
index 969ddb7..0088a03 100644
--- a/base/i18n/icu_string_conversions_unittest.cc
+++ b/base/i18n/icu_string_conversions_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -9,9 +9,9 @@
 #include <sstream>
 
 #include "base/basictypes.h"
+#include "base/i18n/icu_string_conversions.h"
 #include "base/logging.h"
 #include "base/utf_string_conversions.h"
-#include "base/i18n/icu_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
 namespace base {
@@ -39,7 +39,7 @@ string16 BuildString16(const wchar_t* s) {
 #endif
 }
 
-static const wchar_t* const kConvertRoundtripCases[] = {
+const wchar_t* const kConvertRoundtripCases[] = {
   L"Google Video",
   // "网页 图片 资讯更多 »"
   L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
@@ -68,7 +68,7 @@ static const wchar_t* const kConvertRoundtripCases[] = {
 
 }  // namespace
 
-TEST(StringUtilTest, ConvertCodepageUTF8) {
+TEST(ICUStringConversionsTest, ConvertCodepageUTF8) {
   // Make sure WideToCodepage works like WideToUTF8.
   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
     std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
@@ -156,7 +156,7 @@ static const struct {
    true,
 #if defined(WCHAR_T_IS_UTF16)
    L"\xD840\xDC00\x4E00",
-#else
+#elif defined(WCHAR_T_IS_UTF32)
    L"\x20000\x4E00",
 #endif
    L"\xD840\xDC00\x4E00"},
@@ -234,7 +234,7 @@ static const struct {
    NULL},
 };
 
-TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
+TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) {
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
     std::wstring wide;
     bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
@@ -296,7 +296,7 @@ TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
                               OnStringConversionError::SKIP, &encoded));
 }
 
-TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) {
+TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) {
     string16 utf16;
     bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,
@@ -325,4 +325,45 @@ TEST(StringUtilTest, ConvertBetweenCodepageAndUTF16) {
   }
 }
 
+static const struct {
+  const char* codepage_name;
+  const char* encoded;
+  size_t input_offset;
+  size_t u16_output_offset;
+  size_t wide_output_offset;
+} kAdjustOffsetCases[] = {
+  {"gb2312", "", 0, string16::npos, std::wstring::npos},
+  {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},
+  {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},
+  {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},
+  {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},
+  {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,
+   std::wstring::npos},
+  {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,
+   std::wstring::npos},
+  {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},
+};
+
+TEST(ICUStringConversionsTest, AdjustOffset) {
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {
+    string16 utf16;
+    size_t offset = kAdjustOffsetCases[i].input_offset;
+    EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,
+        kAdjustOffsetCases[i].codepage_name,
+        OnStringConversionError::FAIL, &utf16, &offset));
+    EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
+
+    std::wstring wide;
+    offset = kAdjustOffsetCases[i].input_offset;
+    CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,
+        kAdjustOffsetCases[i].codepage_name,
+        OnStringConversionError::FAIL, &wide, &offset);
+#if defined(WCHAR_T_IS_UTF16)
+    EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
+#elif defined(WCHAR_T_IS_UTF32)
+    EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);
+#endif
+  }
+}
+
 }  // namespace base
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index 0ccea91..d691003 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -229,203 +229,6 @@ TEST(StringUtilTest, IsStringUTF8) {
   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 }
 
-static const wchar_t* const kConvertRoundtripCases[] = {
-  L"Google Video",
-  // "网页 图片 资讯更多 »"
-  L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
-  //  "Παγκόσμιος Ιστός"
-  L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
-  L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
-  // "Поиск страниц на русском"
-  L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
-  L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
-  L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
-  // "전체서비스"
-  L"\xc804\xccb4\xc11c\xbe44\xc2a4",
-
-  // Test characters that take more than 16 bits. This will depend on whether
-  // wchar_t is 16 or 32 bits.
-#if defined(WCHAR_T_IS_UTF16)
-  L"\xd800\xdf00",
-  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
-  L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
-#elif defined(WCHAR_T_IS_UTF32)
-  L"\x10300",
-  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
-  L"\x11d40\x11d41\x11d42\x11d43\x11d44",
-#endif
-};
-
-TEST(StringUtilTest, ConvertUTF8AndWide) {
-  // we round-trip all the wide strings through UTF-8 to make sure everything
-  // agrees on the conversion. This uses the stream operators to test them
-  // simultaneously.
-  for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
-    std::ostringstream utf8;
-    utf8 << WideToUTF8(kConvertRoundtripCases[i]);
-    std::wostringstream wide;
-    wide << UTF8ToWide(utf8.str());
-
-    EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
-  }
-}
-
-TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) {
-  // An empty std::wstring should be converted to an empty std::string,
-  // and vice versa.
-  std::wstring wempty;
-  std::string empty;
-  EXPECT_EQ(empty, WideToUTF8(wempty));
-  EXPECT_EQ(wempty, UTF8ToWide(empty));
-}
-
-TEST(StringUtilTest, ConvertUTF8ToWide) {
-  struct UTF8ToWideCase {
-    const char* utf8;
-    const wchar_t* wide;
-    bool success;
-  } convert_cases[] = {
-    // Regular UTF-8 input.
-    {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
-    // Non-character is passed through.
-    {"\xef\xbf\xbfHello", L"\xffffHello", true},
-    // Truncated UTF-8 sequence.
-    {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
-    // Truncated off the end.
-    {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},
-    // Non-shortest-form UTF-8.
-    {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
-    // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
-    {"\xed\xb0\x80", L"", false},
-    // Non-BMP characters. The second is a non-character regarded as valid.
-    // The result will either be in UTF-16 or UTF-32.
-#if defined(WCHAR_T_IS_UTF16)
-    {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
-    {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
-#elif defined(WCHAR_T_IS_UTF32)
-    {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
-    {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
-#endif
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
-    std::wstring converted;
-    EXPECT_EQ(convert_cases[i].success,
-              UTF8ToWide(convert_cases[i].utf8,
-                         strlen(convert_cases[i].utf8),
-                         &converted));
-    std::wstring expected(convert_cases[i].wide);
-    EXPECT_EQ(expected, converted);
-  }
-
-  // Manually test an embedded NULL.
-  std::wstring converted;
-  EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted));
-  ASSERT_EQ(3U, converted.length());
-#if defined(WCHAR_T_IS_UNSIGNED)
-  EXPECT_EQ(0U, converted[0]);
-#else
-  EXPECT_EQ(0, converted[0]);
-#endif
-  EXPECT_EQ('Z', converted[1]);
-  EXPECT_EQ('\t', converted[2]);
-
-  // Make sure that conversion replaces, not appends.
-  EXPECT_TRUE(UTF8ToWide("B", 1, &converted));
-  ASSERT_EQ(1U, converted.length());
-  EXPECT_EQ('B', converted[0]);
-}
-
-#if defined(WCHAR_T_IS_UTF16)
-// This test is only valid when wchar_t == UTF-16.
-TEST(StringUtilTest, ConvertUTF16ToUTF8) {
-  struct UTF16ToUTF8Case {
-    const wchar_t* utf16;
-    const char* utf8;
-    bool success;
-  } convert_cases[] = {
-    // Regular UTF-16 input.
-    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
-    // Test a non-BMP character.
-    {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
-    // Non-characters are passed through.
-    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
-    {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
-    // The first character is a truncated UTF-16 character.
-    {L"\xd800\x597d", "\xe5\xa5\xbd", false},
-    // Truncated at the end.
-    {L"\x597d\xd800", "\xe5\xa5\xbd", false},
-  };
-
-  for (int i = 0; i < arraysize(convert_cases); i++) {
-    std::string converted;
-    EXPECT_EQ(convert_cases[i].success,
-              WideToUTF8(convert_cases[i].utf16,
-                         wcslen(convert_cases[i].utf16),
-                         &converted));
-    std::string expected(convert_cases[i].utf8);
-    EXPECT_EQ(expected, converted);
-  }
-}
-
-#elif defined(WCHAR_T_IS_UTF32)
-// This test is only valid when wchar_t == UTF-32.
-TEST(StringUtilTest, ConvertUTF32ToUTF8) {
-  struct WideToUTF8Case {
-    const wchar_t* utf32;
-    const char* utf8;
-    bool success;
-  } convert_cases[] = {
-    // Regular 16-bit input.
-    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
-    // Test a non-BMP character.
-    {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
-    // Non-characters are passed through.
-    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
-    {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
-    // Invalid Unicode code points.
-    {L"\xfffffffHello", "Hello", false},
-    // The first character is a truncated UTF-16 character.
-    {L"\xd800\x597d", "\xe5\xa5\xbd", false},
-    {L"\xdc01Hello", "Hello", false},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
-    std::string converted;
-    EXPECT_EQ(convert_cases[i].success,
-              WideToUTF8(convert_cases[i].utf32,
-                         wcslen(convert_cases[i].utf32),
-                         &converted));
-    std::string expected(convert_cases[i].utf8);
-    EXPECT_EQ(expected, converted);
-  }
-}
-#endif  // defined(WCHAR_T_IS_UTF32)
-
-TEST(StringUtilTest, ConvertMultiString) {
-  static wchar_t wmulti[] = {
-    L'f', L'o', L'o', L'\0',
-    L'b', L'a', L'r', L'\0',
-    L'b', L'a', L'z', L'\0',
-    L'\0'
-  };
-  static char multi[] = {
-    'f', 'o', 'o', '\0',
-    'b', 'a', 'r', '\0',
-    'b', 'a', 'z', '\0',
-    '\0'
-  };
-  std::wstring wmultistring;
-  memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti));
-  EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());
-  std::string expected;
-  memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
-  EXPECT_EQ(arraysize(multi) - 1, expected.length());
-  const std::string& converted = WideToUTF8(wmultistring);
-  EXPECT_EQ(arraysize(multi) - 1, converted.length());
-  EXPECT_EQ(expected, converted);
-}
-
 TEST(StringUtilTest, ConvertASCII) {
   static const char* char_cases[] = {
     "Google Video",
diff --git a/base/utf_string_conversions.cc b/base/utf_string_conversions.cc
index 6b25cd8..ffff50a 100644
--- a/base/utf_string_conversions.cc
+++ b/base/utf_string_conversions.cc
@@ -84,43 +84,50 @@ bool ReadUnicodeCharacter(const wchar_t* src, int32 src_len,
 
 // WriteUnicodeCharacter -------------------------------------------------------
 
-// Appends a UTF-8 character to the given 8-bit string.
-void WriteUnicodeCharacter(uint32 code_point, std::string* output) {
+// Appends a UTF-8 character to the given 8-bit string.  Returns the number of
+// bytes written.
+size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) {
   if (code_point <= 0x7f) {
     // Fast path the common case of one byte.
     output->push_back(code_point);
-    return;
+    return 1;
   }
 
-  // U8_APPEND_UNSAFE can append up to 4 bytes.
-  int32 char_offset = static_cast<int32>(output->length());
+  // CBU8_APPEND_UNSAFE can append up to 4 bytes.
+  size_t char_offset = output->length();
+  size_t original_char_offset = char_offset;
   output->resize(char_offset + CBU8_MAX_LENGTH);
 
   CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
 
-  // U8_APPEND_UNSAFE will advance our pointer past the inserted character, so
+  // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
   // it will represent the new length of the string.
   output->resize(char_offset);
+  return char_offset - original_char_offset;
 }
 
-// Appends the given code point as a UTF-16 character to the STL string.
-void WriteUnicodeCharacter(uint32 code_point, string16* output) {
+// Appends the given code point as a UTF-16 character to the given 16-bit
+// string.  Returns the number of 16-bit values written.
+size_t WriteUnicodeCharacter(uint32 code_point, string16* output) {
   if (CBU16_LENGTH(code_point) == 1) {
     // Thie code point is in the Basic Multilingual Plane (BMP).
     output->push_back(static_cast<char16>(code_point));
-  } else {
-    // Non-BMP characters use a double-character encoding.
-    int32 char_offset = static_cast<int32>(output->length());
-    output->resize(char_offset + CBU16_MAX_LENGTH);
-    CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+    return 1;
   }
+  // Non-BMP characters use a double-character encoding.
+  size_t char_offset = output->length();
+  output->resize(char_offset + CBU16_MAX_LENGTH);
+  CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+  return CBU16_MAX_LENGTH;
 }
 
 #if defined(WCHAR_T_IS_UTF32)
-// Appends the given UTF-32 character to the given 32-bit string.
-inline void WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
+// Appends the given UTF-32 character to the given 32-bit string.  Returns the
+// number of 32-bit values written.
+inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
   // This is the easy case, just append the character.
   output->push_back(code_point);
+  return 1;
 }
 #endif  // defined(WCHAR_T_IS_UTF32)
 
@@ -131,31 +138,57 @@ inline void WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
 // determine the source, and the given output STL string will be replaced by
 // the result.
 template<typename SRC_CHAR, typename DEST_STRING>
-bool ConvertUnicode(const SRC_CHAR* src, size_t src_len, DEST_STRING* output) {
-  output->clear();
+bool ConvertUnicode(const SRC_CHAR* src,
+                    size_t src_len,
+                    DEST_STRING* output,
+                    size_t* offset_for_adjustment) {
+  size_t output_offset =
+      (offset_for_adjustment && *offset_for_adjustment < src_len) ?
+          *offset_for_adjustment : DEST_STRING::npos;
 
   // ICU requires 32-bit numbers.
   bool success = true;
   int32 src_len32 = static_cast<int32>(src_len);
   for (int32 i = 0; i < src_len32; i++) {
     uint32 code_point;
+    size_t original_i = i;
+    size_t chars_written = 0;
     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
-      WriteUnicodeCharacter(code_point, output);
+      chars_written = WriteUnicodeCharacter(code_point, output);
     } else {
       // TODO(jungshik): consider adding 'Replacement character' (U+FFFD)
       // in place of an invalid codepoint.
       success = false;
     }
+    if ((output_offset != DEST_STRING::npos) &&
+        (*offset_for_adjustment > original_i)) {
+      // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
+      // character read, not after it (so that incrementing it in the loop
+      // increment will place it at the right location), so we need to account
+      // for that in determining the amount that was read.
+      if (*offset_for_adjustment <= static_cast<size_t>(i))
+        output_offset = DEST_STRING::npos;
+      else
+        output_offset += chars_written - (i - original_i + 1);
+    }
   }
+
+  if (offset_for_adjustment)
+    *offset_for_adjustment = output_offset;
   return success;
 }
 
-// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
-// of space in the given string. We also assume that the input character types
-// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
-// the string length is greater than zero.
+// Guesses the length of the output in UTF-8 in bytes, clears that output
+// string, and reserves that amount of space.  We assume that the input
+// character types are unsigned, which will be true for UTF-16 and -32 on our
+// systems.
 template<typename CHAR>
-void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
+void PrepareForUTF8Output(const CHAR* src,
+                          size_t src_len,
+                          std::string* output) {
+  output->clear();
+  if (src_len == 0)
+    return;
   if (src[0] < 0x80) {
     // Assume that the entire input will be ASCII.
     output->reserve(src_len);
@@ -165,11 +198,15 @@ void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
   }
 }
 
-// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
-// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
-// We assume the source length is > 0.
+// Prepares an output buffer (containing either UTF-16 or -32 data) given some
+// UTF-8 input that will be converted to it.  See PrepareForUTF8Output().
 template<typename STRING>
-void ReserveUTF16Or32Output(const char* src, size_t src_len, STRING* output) {
+void PrepareForUTF16Or32Output(const char* src,
+                               size_t src_len,
+                               STRING* output) {
+  output->clear();
+  if (src_len == 0)
+    return;
   if (static_cast<unsigned char>(src[0]) < 0x80) {
     // Assume the input is all ASCII, which means 1:1 correspondence.
     output->reserve(src_len);
@@ -184,111 +221,121 @@ void ReserveUTF16Or32Output(const char* src, size_t src_len, STRING* output) {
 
 // UTF-8 <-> Wide --------------------------------------------------------------
 
-std::string WideToUTF8(const std::wstring& wide) {
-  std::string ret;
-  if (wide.empty())
-    return ret;
+bool WideToUTF8AndAdjustOffset(const wchar_t* src,
+                               size_t src_len,
+                               std::string* output,
+                               size_t* offset_for_adjustment) {
+  PrepareForUTF8Output(src, src_len, output);
+  return ConvertUnicode<wchar_t, std::string>(src, src_len, output,
+                                              offset_for_adjustment);
+}
 
+std::string WideToUTF8AndAdjustOffset(const std::wstring& wide,
+                                      size_t* offset_for_adjustment) {
+  std::string ret;
   // Ignore the success flag of this call, it will do the best it can for
   // invalid input, which is what we want here.
-  WideToUTF8(wide.data(), wide.length(), &ret);
+  WideToUTF8AndAdjustOffset(wide.data(), wide.length(), &ret,
+                            offset_for_adjustment);
   return ret;
 }
 
-bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF8Output(src, src_len, output);
-  return ConvertUnicode<wchar_t, std::string>(src, src_len, output);
+bool UTF8ToWideAndAdjustOffset(const char* src,
+                               size_t src_len,
+                               std::wstring* output,
+                               size_t* offset_for_adjustment) {
+  PrepareForUTF16Or32Output(src, src_len, output);
+  return ConvertUnicode<char, std::wstring>(src, src_len, output,
+                                            offset_for_adjustment);
 }
 
-std::wstring UTF8ToWide(const base::StringPiece& utf8) {
+std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
+                                       size_t* offset_for_adjustment) {
   std::wstring ret;
-  if (utf8.empty())
-    return ret;
-
-  UTF8ToWide(utf8.data(), utf8.length(), &ret);
+  UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
+                            offset_for_adjustment);
   return ret;
 }
 
-bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode<char, std::wstring>(src, src_len, output);
-}
-
 // UTF-16 <-> Wide -------------------------------------------------------------
 
 #if defined(WCHAR_T_IS_UTF16)
 
 // When wide == UTF-16, then conversions are a NOP.
-string16 WideToUTF16(const std::wstring& wide) {
-  return wide;
-}
-
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+bool WideToUTF16AndAdjustOffset(const wchar_t* src,
+                                size_t src_len,
+                                string16* output,
+                                size_t* offset_for_adjustment) {
   output->assign(src, src_len);
+  if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
+    *offset_for_adjustment = string16::npos;
   return true;
 }
 
-std::wstring UTF16ToWide(const string16& utf16) {
-  return utf16;
+string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
+                                    size_t* offset_for_adjustment) {
+  if (offset_for_adjustment && (*offset_for_adjustment >= wide.length()))
+    *offset_for_adjustment = string16::npos;
+  return wide;
 }
 
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
+bool UTF16ToWideAndAdjustOffset(const char16* src,
+                                size_t src_len,
+                                std::wstring* output,
+                                size_t* offset_for_adjustment) {
   output->assign(src, src_len);
+  if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
+    *offset_for_adjustment = std::wstring::npos;
   return true;
 }
 
-#elif defined(WCHAR_T_IS_UTF32)
-
-string16 WideToUTF16(const std::wstring& wide) {
-  string16 ret;
-  if (wide.empty())
-    return ret;
-
-  WideToUTF16(wide.data(), wide.length(), &ret);
-  return ret;
+std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
+                                        size_t* offset_for_adjustment) {
+  if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
+    *offset_for_adjustment = std::wstring::npos;
+  return utf16;
 }
 
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
+#elif defined(WCHAR_T_IS_UTF32)
 
+bool WideToUTF16AndAdjustOffset(const wchar_t* src,
+                                size_t src_len,
+                                string16* output,
+                                size_t* offset_for_adjustment) {
+  output->clear();
   // Assume that normally we won't have any non-BMP characters so the counts
   // will be the same.
   output->reserve(src_len);
-  return ConvertUnicode<wchar_t, string16>(src, src_len, output);
+  return ConvertUnicode<wchar_t, string16>(src, src_len, output,
+                                           offset_for_adjustment);
 }
 
-std::wstring UTF16ToWide(const string16& utf16) {
-  std::wstring ret;
-  if (utf16.empty())
-    return ret;
-
-  UTF16ToWide(utf16.data(), utf16.length(), &ret);
+string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
+                                    size_t* offset_for_adjustment) {
+  string16 ret;
+  WideToUTF16AndAdjustOffset(wide.data(), wide.length(), &ret,
+                             offset_for_adjustment);
   return ret;
 }
 
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
+bool UTF16ToWideAndAdjustOffset(const char16* src,
+                                size_t src_len,
+                                std::wstring* output,
+                                size_t* offset_for_adjustment) {
+  output->clear();
   // Assume that normally we won't have any non-BMP characters so the counts
   // will be the same.
   output->reserve(src_len);
-  return ConvertUnicode<char16, std::wstring>(src, src_len, output);
+  return ConvertUnicode<char16, std::wstring>(src, src_len, output,
+                                              offset_for_adjustment);
+}
+
+std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
+                                        size_t* offset_for_adjustment) {
+  std::wstring ret;
+  UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
+                             offset_for_adjustment);
+  return ret;
 }
 
 #endif  // defined(WCHAR_T_IS_UTF32)
@@ -298,20 +345,12 @@ bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 #if defined(WCHAR_T_IS_UTF32)
 
 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode<char, string16>(src, src_len, output);
+  PrepareForUTF16Or32Output(src, src_len, output);
+  return ConvertUnicode<char, string16>(src, src_len, output, NULL);
 }
 
 string16 UTF8ToUTF16(const std::string& utf8) {
   string16 ret;
-  if (utf8.empty())
-    return ret;
-
   // Ignore the success flag of this call, it will do the best it can for
   // invalid input, which is what we want here.
   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
@@ -319,20 +358,12 @@ string16 UTF8ToUTF16(const std::string& utf8) {
 }
 
 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF8Output(src, src_len, output);
-  return ConvertUnicode<char16, std::string>(src, src_len, output);
+  PrepareForUTF8Output(src, src_len, output);
+  return ConvertUnicode<char16, std::string>(src, src_len, output, NULL);
 }
 
 std::string UTF16ToUTF8(const string16& utf16) {
   std::string ret;
-  if (utf16.empty())
-    return ret;
-
   // Ignore the success flag of this call, it will do the best it can for
   // invalid input, which is what we want here.
   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
diff --git a/base/utf_string_conversions.h b/base/utf_string_conversions.h
index 89846ed..323233b 100644
--- a/base/utf_string_conversions.h
+++ b/base/utf_string_conversions.h
@@ -10,6 +10,37 @@
 #include "base/string16.h"
 #include "base/string_piece.h"
 
+// Like the conversions below, but also takes an offset into the source string,
+// which will be adjusted to point at the same logical place in the result
+// string.  If this isn't possible because it points past the end of the source
+// string or into the middle of a multibyte sequence, it will be set to
+// std::wstring::npos.  |offset_for_adjustment| may be NULL.
+bool WideToUTF8AndAdjustOffset(const wchar_t* src,
+                               size_t src_len,
+                               std::string* output,
+                               size_t* offset_for_adjustment);
+std::string WideToUTF8AndAdjustOffset(const std::wstring& wide,
+                                      size_t* offset_for_adjustment);
+bool UTF8ToWideAndAdjustOffset(const char* src,
+                               size_t src_len,
+                               std::wstring* output,
+                               size_t* offset_for_adjustment);
+std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
+                                       size_t* offset_for_adjustment);
+
+bool WideToUTF16AndAdjustOffset(const wchar_t* src,
+                                size_t src_len,
+                                string16* output,
+                                size_t* offset_for_adjustment);
+string16 WideToUTF16AndAdjustOffset(const std::wstring& wide,
+                                    size_t* offset_for_adjustment);
+bool UTF16ToWideAndAdjustOffset(const char16* src,
+                                size_t src_len,
+                                std::wstring* output,
+                                size_t* offset_for_adjustment);
+std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
+                                        size_t* offset_for_adjustment);
+
 // These convert between UTF-8, -16, and -32 strings. They are potentially slow,
 // so avoid unnecessary conversions. The low-level versions return a boolean
 // indicating whether the conversion was 100% valid. In this case, it will still
@@ -23,15 +54,34 @@
 // the Unicode replacement character or adding |replacement_char| parameter.
 // Currently, it's skipped in the ouput, which could be problematic in
 // some situations.
-bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output);
-std::string WideToUTF8(const std::wstring& wide);
-bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output);
-std::wstring UTF8ToWide(const base::StringPiece& utf8);
-
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output);
-string16 WideToUTF16(const std::wstring& wide);
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);
-std::wstring UTF16ToWide(const string16& utf16);
+inline bool WideToUTF8(const wchar_t* src,
+                       size_t src_len,
+                       std::string* output) {
+  return WideToUTF8AndAdjustOffset(src, src_len, output, NULL);
+}
+inline std::string WideToUTF8(const std::wstring& wide) {
+  return WideToUTF8AndAdjustOffset(wide, NULL);
+}
+inline bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
+  return UTF8ToWideAndAdjustOffset(src, src_len, output, NULL);
+}
+inline std::wstring UTF8ToWide(const base::StringPiece& utf8) {
+  return UTF8ToWideAndAdjustOffset(utf8, NULL);
+}
+
+inline bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+  return WideToUTF16AndAdjustOffset(src, src_len, output, NULL);
+}
+inline string16 WideToUTF16(const std::wstring& wide) {
+  return WideToUTF16AndAdjustOffset(wide, NULL);
+}
+inline bool UTF16ToWide(const char16* src, size_t src_len,
+                        std::wstring* output) {
+  return UTF16ToWideAndAdjustOffset(src, src_len, output, NULL);
+}
+inline std::wstring UTF16ToWide(const string16& utf16) {
+  return UTF16ToWideAndAdjustOffset(utf16, NULL);
+}
 
 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
 string16 UTF8ToUTF16(const std::string& utf8);
diff --git a/base/utf_string_conversions_unittest.cc b/base/utf_string_conversions_unittest.cc
new file mode 100644
index 0000000..67af7c3
--- /dev/null
+++ b/base/utf_string_conversions_unittest.cc
@@ -0,0 +1,306 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "base/string_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace base {
+
+namespace {
+
+// Given a null-terminated string of wchar_t with each wchar_t representing
+// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
+// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
+// should be represented as a surrogate pair (two UTF-16 units)
+// *even* where wchar_t is 32-bit (Linux and Mac).
+//
+// This is to help write tests for functions with string16 params until
+// the C++ 0x UTF-16 literal is well-supported by compilers.
+string16 BuildString16(const wchar_t* s) {
+#if defined(WCHAR_T_IS_UTF16)
+  return string16(s);
+#elif defined(WCHAR_T_IS_UTF32)
+  string16 u16;
+  while (*s != 0) {
+    DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
+    u16.push_back(*s++);
+  }
+  return u16;
+#endif
+}
+
+const wchar_t* const kConvertRoundtripCases[] = {
+  L"Google Video",
+  // "网页 图片 资讯更多 »"
+  L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
+  //  "Παγκόσμιος Ιστός"
+  L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
+  L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
+  // "Поиск страниц на русском"
+  L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
+  L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
+  L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
+  // "전체서비스"
+  L"\xc804\xccb4\xc11c\xbe44\xc2a4",
+
+  // Test characters that take more than 16 bits. This will depend on whether
+  // wchar_t is 16 or 32 bits.
+#if defined(WCHAR_T_IS_UTF16)
+  L"\xd800\xdf00",
+  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
+  L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
+#elif defined(WCHAR_T_IS_UTF32)
+  L"\x10300",
+  // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
+  L"\x11d40\x11d41\x11d42\x11d43\x11d44",
+#endif
+};
+
+}  // namespace
+
+TEST(UTFStringConversionsTest, ConvertUTF8AndWide) {
+  // we round-trip all the wide strings through UTF-8 to make sure everything
+  // agrees on the conversion. This uses the stream operators to test them
+  // simultaneously.
+  for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
+    std::ostringstream utf8;
+    utf8 << WideToUTF8(kConvertRoundtripCases[i]);
+    std::wostringstream wide;
+    wide << UTF8ToWide(utf8.str());
+
+    EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
+  }
+}
+
+TEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) {
+  // An empty std::wstring should be converted to an empty std::string,
+  // and vice versa.
+  std::wstring wempty;
+  std::string empty;
+  EXPECT_EQ(empty, WideToUTF8(wempty));
+  EXPECT_EQ(wempty, UTF8ToWide(empty));
+}
+
+TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
+  struct UTF8ToWideCase {
+    const char* utf8;
+    const wchar_t* wide;
+    bool success;
+  } convert_cases[] = {
+    // Regular UTF-8 input.
+    {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
+    // Non-character is passed through.
+    {"\xef\xbf\xbfHello", L"\xffffHello", true},
+    // Truncated UTF-8 sequence.
+    {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
+    // Truncated off the end.
+    {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},
+    // Non-shortest-form UTF-8.
+    {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
+    // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
+    {"\xed\xb0\x80", L"", false},
+    // Non-BMP characters. The second is a non-character regarded as valid.
+    // The result will either be in UTF-16 or UTF-32.
+#if defined(WCHAR_T_IS_UTF16)
+    {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
+    {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
+#elif defined(WCHAR_T_IS_UTF32)
+    {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
+    {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
+#endif
+  };
+
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
+    std::wstring converted;
+    EXPECT_EQ(convert_cases[i].success,
+              UTF8ToWide(convert_cases[i].utf8,
+                         strlen(convert_cases[i].utf8),
+                         &converted));
+    std::wstring expected(convert_cases[i].wide);
+    EXPECT_EQ(expected, converted);
+  }
+
+  // Manually test an embedded NULL.
+  std::wstring converted;
+  EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted));
+  ASSERT_EQ(3U, converted.length());
+  EXPECT_EQ(static_cast<wchar_t>(0), converted[0]);
+  EXPECT_EQ('Z', converted[1]);
+  EXPECT_EQ('\t', converted[2]);
+
+  // Make sure that conversion replaces, not appends.
+  EXPECT_TRUE(UTF8ToWide("B", 1, &converted));
+  ASSERT_EQ(1U, converted.length());
+  EXPECT_EQ('B', converted[0]);
+}
+
+#if defined(WCHAR_T_IS_UTF16)
+// This test is only valid when wchar_t == UTF-16.
+TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
+  struct WideToUTF8Case {
+    const wchar_t* utf16;
+    const char* utf8;
+    bool success;
+  } convert_cases[] = {
+    // Regular UTF-16 input.
+    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
+    // Test a non-BMP character.
+    {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
+    // Non-characters are passed through.
+    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
+    {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
+    // The first character is a truncated UTF-16 character.
+    {L"\xd800\x597d", "\xe5\xa5\xbd", false},
+    // Truncated at the end.
+    {L"\x597d\xd800", "\xe5\xa5\xbd", false},
+  };
+
+  for (int i = 0; i < arraysize(convert_cases); i++) {
+    std::string converted;
+    EXPECT_EQ(convert_cases[i].success,
+              WideToUTF8(convert_cases[i].utf16,
+                         wcslen(convert_cases[i].utf16),
+                         &converted));
+    std::string expected(convert_cases[i].utf8);
+    EXPECT_EQ(expected, converted);
+  }
+}
+
+#elif defined(WCHAR_T_IS_UTF32)
+// This test is only valid when wchar_t == UTF-32.
+TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {
+  struct WideToUTF8Case {
+    const wchar_t* utf32;
+    const char* utf8;
+    bool success;
+  } convert_cases[] = {
+    // Regular 16-bit input.
+    {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
+    // Test a non-BMP character.
+    {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
+    // Non-characters are passed through.
+    {L"\xffffHello", "\xEF\xBF\xBFHello", true},
+    {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
+    // Invalid Unicode code points.
+    {L"\xfffffffHello", "Hello", false},
+    // The first character is a truncated UTF-16 character.
+    {L"\xd800\x597d", "\xe5\xa5\xbd", false},
+    {L"\xdc01Hello", "Hello", false},
+  };
+
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
+    std::string converted;
+    EXPECT_EQ(convert_cases[i].success,
+              WideToUTF8(convert_cases[i].utf32,
+                         wcslen(convert_cases[i].utf32),
+                         &converted));
+    std::string expected(convert_cases[i].utf8);
+    EXPECT_EQ(expected, converted);
+  }
+}
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+TEST(UTFStringConversionsTest, ConvertMultiString) {
+  static wchar_t wmulti[] = {
+    L'f', L'o', L'o', L'\0',
+    L'b', L'a', L'r', L'\0',
+    L'b', L'a', L'z', L'\0',
+    L'\0'
+  };
+  static char multi[] = {
+    'f', 'o', 'o', '\0',
+    'b', 'a', 'r', '\0',
+    'b', 'a', 'z', '\0',
+    '\0'
+  };
+  std::wstring wmultistring;
+  memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti));
+  EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());
+  std::string expected;
+  memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
+  EXPECT_EQ(arraysize(multi) - 1, expected.length());
+  const std::string& converted = WideToUTF8(wmultistring);
+  EXPECT_EQ(arraysize(multi) - 1, converted.length());
+  EXPECT_EQ(expected, converted);
+}
+
+TEST(UTFStringConversionsTest, AdjustOffset) {
+  // Under the hood, all the functions call the same converter function, so we
+  // don't need to exhaustively check every case.
+  struct WideToUTF8Case {
+    const wchar_t* wide;
+    size_t input_offset;
+    size_t output_offset;
+  } wide_to_utf8_cases[] = {
+    {L"", 0, std::string::npos},
+    {L"\x4f60\x597d", 0, 0},
+    {L"\x4f60\x597d", 1, 3},
+    {L"\x4f60\x597d", 2, std::string::npos},
+    {L"\x4f60\x597d", std::wstring::npos, std::string::npos},
+    {L"\xd800\x597dz", 1, 0},
+    {L"\xd800\x597dz", 2, 3},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf8_cases); ++i) {
+    size_t offset = wide_to_utf8_cases[i].input_offset;
+    WideToUTF8AndAdjustOffset(wide_to_utf8_cases[i].wide, &offset);
+    EXPECT_EQ(wide_to_utf8_cases[i].output_offset, offset);
+  }
+
+  struct UTF8ToWideCase {
+    const char* utf8;
+    size_t input_offset;
+    size_t output_offset;
+  } utf8_to_wide_cases[] = {
+    {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos},
+    {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
+    {"\xed\xb0\x80z", 3, 0},
+    {"A\xF0\x90\x8C\x80z", 1, 1},
+    {"A\xF0\x90\x8C\x80z", 2, std::wstring::npos},
+#if defined(WCHAR_T_IS_UTF16)
+    {"A\xF0\x90\x8C\x80z", 5, 3},
+#elif defined(WCHAR_T_IS_UTF32)
+    {"A\xF0\x90\x8C\x80z", 5, 2},
+#endif
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) {
+    size_t offset = utf8_to_wide_cases[i].input_offset;
+    UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset);
+    EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset);
+  }
+
+#if defined(WCHAR_T_IS_UTF32)
+  struct WideToUTF16Case {
+    const wchar_t* wide;
+    size_t input_offset;
+    size_t output_offset;
+  } wide_to_utf16_cases[] = {
+    {L"\x4F60\x597D", 1, 1},
+    {L"\x20000\x4E00", 1, 2},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf16_cases); ++i) {
+    size_t offset = wide_to_utf16_cases[i].input_offset;
+    WideToUTF16AndAdjustOffset(wide_to_utf16_cases[i].wide, &offset);
+    EXPECT_EQ(wide_to_utf16_cases[i].output_offset, offset);
+  }
+
+  struct UTF16ToWideCase {
+    const wchar_t* wide;
+    size_t input_offset;
+    size_t output_offset;
+  } utf16_to_wide_cases[] = {
+    {L"\xD840\xDC00\x4E00", 0, 0},
+    {L"\xD840\xDC00\x4E00", 1, std::wstring::npos},
+    {L"\xD840\xDC00\x4E00", 2, 1},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
+    size_t offset = utf16_to_wide_cases[i].input_offset;
+    UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide),
+                               &offset);
+    EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset);
+  }
+#endif
+}
+
+}  // namaspace base
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index f9223b4..1b0340d2 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -438,10 +438,6 @@ void AutocompleteMatch::ClassifyLocationInString(
     size_t overall_length,
     int style,
     ACMatchClassifications* classification) {
-  // Classifying an empty match makes no sense and will lead to validation
-  // errors later.
-  DCHECK(match_length > 0);
-
   classification->clear();
 
   // Don't classify anything about an empty string
@@ -459,6 +455,9 @@ void AutocompleteMatch::ClassifyLocationInString(
     // No match, above classification will suffice for whole string.
     return;
   }
+  // Classifying an empty match makes no sense and will lead to validation
+  // errors later.
+  DCHECK(match_length > 0);
   classification->push_back(ACMatchClassification(match_location,
       (style | ACMatchClassification::MATCH) & ~ACMatchClassification::DIM));
 
diff --git a/chrome/browser/autocomplete/autocomplete.h b/chrome/browser/autocomplete/autocomplete.h
index 0193b8c..f5d9ac0 100644
--- a/chrome/browser/autocomplete/autocomplete.h
+++ b/chrome/browser/autocomplete/autocomplete.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -548,9 +548,9 @@ class AutocompleteProvider
   // profile's bookmark bar model.
   void UpdateStarredStateOfMatches();
 
-  // A convenience function to call gfx::GetCleanStringFromUrl() with the
-  // current set of "Accept Languages" when check_accept_lang is true.
-  // Otherwise, it's called with an empty list.
+  // A convenience function to call net::FormatUrl() with the current set of
+  // "Accept Languages" when check_accept_lang is true.  Otherwise, it's called
+  // with an empty list.
   std::wstring StringForURLDisplay(const GURL& url,
                                    bool check_accept_lang) const;
 
diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc
index b44b6e7..a1d971a 100644
--- a/chrome/browser/autocomplete/history_url_provider.cc
+++ b/chrome/browser/autocomplete/history_url_provider.cc
@@ -68,9 +68,8 @@ void HistoryURLProvider::DeleteMatch(const AutocompleteMatch& match) {
   DCHECK(done_);
 
   // Delete the match from the history DB.
-  HistoryService* history_service =
-      profile_ ? profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) :
-      history_service_;
+  HistoryService* const history_service =
+      profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
   GURL selected_url(match.destination_url);
   if (!history_service || !selected_url.is_valid()) {
     NOTREACHED() << "Can't delete requested URL";
@@ -628,16 +627,17 @@ void HistoryURLProvider::RunAutocompletePasses(
     matches_.push_back(SuggestExactInput(input, trim_http));
 
   // We'll need the history service to run both passes, so try to obtain it.
-  HistoryService* const history_service = profile_ ?
-      profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : history_service_;
+  HistoryService* const history_service =
+      profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
   if (!history_service)
     return;
 
   // Create the data structure for the autocomplete passes.  We'll save this off
   // onto the |params_| member for later deletion below if we need to run pass
   // 2.
-  const std::wstring& languages = profile_ ?
-      profile_->GetPrefs()->GetString(prefs::kAcceptLanguages) : std::wstring();
+  std::wstring languages(languages_);
+  if (languages.empty() && profile_)
+    languages = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
   scoped_ptr<HistoryURLProviderParams> params(
       new HistoryURLProviderParams(input, trim_http, languages));
 
@@ -826,28 +826,47 @@ AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch(
       !!info.visit_count(), AutocompleteMatch::HISTORY_URL);
   match.destination_url = info.url();
   DCHECK(match.destination_url.is_valid());
+  size_t inline_autocomplete_offset =
+      history_match.input_location + params->input.text().length();
   match.fill_into_edit = net::FormatUrl(info.url(),
-      match_type == WHAT_YOU_TYPED ? std::wstring() : params->languages);
-  if (!params->input.prevent_inline_autocomplete()) {
-    match.inline_autocomplete_offset =
-        history_match.input_location + params->input.text().length();
-  }
+      match_type == WHAT_YOU_TYPED ? std::wstring() : params->languages, true,
+      UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset);
   size_t offset = 0;
   if (params->trim_http && !history_match.match_in_scheme) {
     offset = TrimHttpPrefix(&match.fill_into_edit);
-    if (match.inline_autocomplete_offset != std::wstring::npos) {
-      DCHECK(match.inline_autocomplete_offset >= offset);
-      match.inline_autocomplete_offset -= offset;
+    if (inline_autocomplete_offset != std::wstring::npos) {
+      DCHECK(inline_autocomplete_offset >= offset);
+      inline_autocomplete_offset -= offset;
     }
   }
+  if (!params->input.prevent_inline_autocomplete())
+    match.inline_autocomplete_offset = inline_autocomplete_offset;
   DCHECK((match.inline_autocomplete_offset == std::wstring::npos) ||
          (match.inline_autocomplete_offset <= match.fill_into_edit.length()));
 
-  match.contents = match.fill_into_edit;
-  AutocompleteMatch::ClassifyLocationInString(
-      history_match.input_location - offset, params->input.text().length(),
-      match.contents.length(), ACMatchClassification::URL,
-      &match.contents_class);
+  size_t match_start = history_match.input_location;
+  match.contents = net::FormatUrl(info.url(),
+      match_type == WHAT_YOU_TYPED ? std::wstring() : params->languages, true,
+      UnescapeRule::SPACES, NULL, NULL, &match_start);
+  if (offset) {
+    TrimHttpPrefix(&match.contents);
+    if (match_start != std::wstring::npos) {
+      DCHECK(match_start >= offset);
+      match_start -= offset;
+    }
+  }
+  if ((match_start != std::wstring::npos) &&
+      (inline_autocomplete_offset != std::wstring::npos) &&
+      (inline_autocomplete_offset != match_start)) {
+    DCHECK(inline_autocomplete_offset > match_start);
+    AutocompleteMatch::ClassifyLocationInString(match_start,
+        inline_autocomplete_offset - match_start, match.contents.length(),
+        ACMatchClassification::URL, &match.contents_class);
+  } else {
+    AutocompleteMatch::ClassifyLocationInString(std::wstring::npos, 0,
+        match.contents.length(), ACMatchClassification::URL,
+        &match.contents_class);
+  }
   match.description = info.title();
   AutocompleteMatch::ClassifyMatchInString(params->input.text(), info.title(),
                                            ACMatchClassification::NONE,
diff --git a/chrome/browser/autocomplete/history_url_provider.h b/chrome/browser/autocomplete/history_url_provider.h
index 50f6ba7..152a938 100644
--- a/chrome/browser/autocomplete/history_url_provider.h
+++ b/chrome/browser/autocomplete/history_url_provider.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -135,18 +135,18 @@ class HistoryURLProvider : public AutocompleteProvider {
  public:
   HistoryURLProvider(ACProviderListener* listener, Profile* profile)
       : AutocompleteProvider(listener, profile, "HistoryURL"),
-        history_service_(NULL),
         prefixes_(GetPrefixes()),
         params_(NULL) {
   }
 
 #ifdef UNIT_TEST
   HistoryURLProvider(ACProviderListener* listener,
-                     HistoryService* history_service)
-      : AutocompleteProvider(listener, NULL, "History"),
-        history_service_(history_service),
+                     Profile* profile,
+                     const std::wstring& languages)
+      : AutocompleteProvider(listener, profile, "History"),
         prefixes_(GetPrefixes()),
-        params_(NULL) {
+        params_(NULL),
+        languages_(languages) {
   }
 #endif
   // no destructor (see note above)
@@ -379,10 +379,6 @@ class HistoryURLProvider : public AutocompleteProvider {
                                           MatchType match_type,
                                           size_t match_number);
 
-  // This is only non-null for testing, otherwise the HistoryService from the
-  // Profile is used.
-  HistoryService* history_service_;
-
   // Prefixes to try appending to user input when looking for a match.
   const Prefixes prefixes_;
 
@@ -391,6 +387,10 @@ class HistoryURLProvider : public AutocompleteProvider {
   // parameter itself is freed once it's no longer needed.  The only reason we
   // keep this member is so we can set the cancel bit on it.
   HistoryURLProviderParams* params_;
+
+  // Only used by unittests; if non-empty, overrides accept-languages in the
+  // profile's pref system.
+  std::wstring languages_;
 };
 
 #endif  // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
diff --git a/chrome/browser/autocomplete/history_url_provider_unittest.cc b/chrome/browser/autocomplete/history_url_provider_unittest.cc
index 408526a..45e1426 100644
--- a/chrome/browser/autocomplete/history_url_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_url_provider_unittest.cc
@@ -83,6 +83,11 @@ static TestURLInfo test_db[] = {
   {"http://go/", L"Intranet URL", 1, 1},
   {"http://gooey/", L"Intranet URL 2", 5, 5},
 
+  // URLs for testing offset adjustment
+  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", L"Korean", 2, 2},
+  {"http://spaces.com/path%20with%20spaces/foo.html", L"Spaces", 2, 2},
+  {"http://ms/c++%20style%20guide", L"Style guide", 2, 2},
+  {"http://foo:bar@baz.com/", L"HTTP auth", 2, 2},
 };
 
 class HistoryURLProviderTest : public testing::Test,
@@ -116,6 +121,8 @@ class HistoryURLProviderTest : public testing::Test,
                const std::string* expected_urls,
                size_t num_results);
 
+  void RunAdjustOffsetTest(const std::wstring text, size_t expected_offset);
+
   MessageLoopForUI message_loop_;
   ChromeThread ui_thread_;
   ChromeThread file_thread_;
@@ -144,7 +151,7 @@ void HistoryURLProviderTest::SetUpImpl(bool no_db) {
   profile_->CreateHistoryService(true, no_db);
   history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
 
-  autocomplete_ = new HistoryURLProvider(this, profile_.get());
+  autocomplete_ = new HistoryURLProvider(this, profile_.get(), L"en-US,en,ko");
 
   FillData();
 }
@@ -189,6 +196,18 @@ void HistoryURLProviderTest::RunTest(const std::wstring text,
     EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec());
 }
 
+void HistoryURLProviderTest::RunAdjustOffsetTest(const std::wstring text,
+                                                 size_t expected_offset) {
+  AutocompleteInput input(text, std::wstring(), false, false, false);
+  autocomplete_->Start(input, false);
+  if (!autocomplete_->done())
+    MessageLoop::current()->Run();
+
+  matches_ = autocomplete_->matches();
+  ASSERT_GE(matches_.size(), 1U) << "Input text: " << text;
+  EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset);
+}
+
 TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
   // Test that hosts get synthesized below popular pages.
   const std::string expected_nonsynth[] = {
@@ -382,6 +401,14 @@ TEST_F(HistoryURLProviderTest, Fixup) {
   RunTest(L"17173", std::wstring(), false, fixup_5, arraysize(fixup_5));
 }
 
+TEST_F(HistoryURLProviderTest, AdjustOffset) {
+  RunAdjustOffsetTest(L"http://www.\uAD50\uC721", 13);
+  RunAdjustOffsetTest(L"http://spaces.com/path%20with%20spa", 31);
+  RunAdjustOffsetTest(L"http://ms/c++ s", 15);
+  RunAdjustOffsetTest(L"http://foo:ba", std::wstring::npos);
+  RunAdjustOffsetTest(L"http://foo:bar@ba", 9);
+}
+
 TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
   // Ensure that we will still produce matches for navigation when there is no
   // database.
diff --git a/chrome/browser/bookmarks/bookmark_table_model.cc b/chrome/browser/bookmarks/bookmark_table_model.cc
index 142090c..9b4fd82 100644
--- a/chrome/browser/bookmarks/bookmark_table_model.cc
+++ b/chrome/browser/bookmarks/bookmark_table_model.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -324,9 +324,8 @@ std::wstring BookmarkTableModel::GetText(int row, int column_id) {
       std::wstring languages = model_ && model_->profile()
           ? model_->profile()->GetPrefs()->GetString(prefs::kAcceptLanguages)
           : std::wstring();
-      std::wstring url_text =
-          net::FormatUrl(node->GetURL(), languages, false, UnescapeRule::SPACES,
-          NULL, NULL);
+      std::wstring url_text = net::FormatUrl(node->GetURL(), languages, false,
+          UnescapeRule::SPACES, NULL, NULL, NULL);
       if (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT)
         l10n_util::WrapStringWithLTRFormatting(&url_text);
       return url_text;
diff --git a/chrome/browser/bookmarks/bookmark_utils.cc b/chrome/browser/bookmarks/bookmark_utils.cc
index 54ea21cb..e62a30a 100644
--- a/chrome/browser/bookmarks/bookmark_utils.cc
+++ b/chrome/browser/bookmarks/bookmark_utils.cc
@@ -187,7 +187,7 @@ bool DoesBookmarkContainWords(const BookmarkNode* node,
           l10n_util::ToLower(node->GetTitle()), words) ||
       DoesBookmarkTextContainWords(UTF8ToWide(node->GetURL().spec()), words) ||
       DoesBookmarkTextContainWords(net::FormatUrl(
-          node->GetURL(), languages, false, true, NULL, NULL), words);
+          node->GetURL(), languages, false, true, NULL, NULL, NULL), words);
 }
 
 }  // namespace
diff --git a/chrome/browser/gtk/options/exceptions_page_gtk.cc b/chrome/browser/gtk/options/exceptions_page_gtk.cc
index 164a821..10a8f2d 100644
--- a/chrome/browser/gtk/options/exceptions_page_gtk.cc
+++ b/chrome/browser/gtk/options/exceptions_page_gtk.cc
@@ -113,8 +113,7 @@ void ExceptionsPageGtk::SetExceptionList(
   for (size_t i = 0; i < result.size(); ++i) {
     exception_list_[i] = *result[i];
     std::wstring formatted = net::FormatUrl(result[i]->origin, languages,
-                                            false, UnescapeRule::NONE,
-                                            NULL, NULL);
+        false, UnescapeRule::NONE, NULL, NULL, NULL);
     std::string site = WideToUTF8(formatted);
     GtkTreeIter iter;
     gtk_list_store_insert_with_values(exception_list_store_, &iter, (gint) i,
diff --git a/chrome/browser/gtk/options/passwords_page_gtk.cc b/chrome/browser/gtk/options/passwords_page_gtk.cc
index b2f6345..f4a2197 100644
--- a/chrome/browser/gtk/options/passwords_page_gtk.cc
+++ b/chrome/browser/gtk/options/passwords_page_gtk.cc
@@ -156,8 +156,7 @@ void PasswordsPageGtk::SetPasswordList(
   for (size_t i = 0; i < result.size(); ++i) {
     password_list_[i] = *result[i];
     std::wstring formatted = net::FormatUrl(result[i]->origin, languages,
-                                            false, UnescapeRule::NONE,
-                                            NULL, NULL);
+        false, UnescapeRule::NONE, NULL, NULL, NULL);
     std::string site = WideToUTF8(formatted);
     std::string user = UTF16ToUTF8(result[i]->username_value);
     GtkTreeIter iter;
diff --git a/chrome/browser/gtk/options/url_picker_dialog_gtk.cc b/chrome/browser/gtk/options/url_picker_dialog_gtk.cc
index 6c4e38f..e646552 100644
--- a/chrome/browser/gtk/options/url_picker_dialog_gtk.cc
+++ b/chrome/browser/gtk/options/url_picker_dialog_gtk.cc
@@ -196,9 +196,8 @@ std::string UrlPickerDialogGtk::GetURLForPath(GtkTreePath* path) const {
       profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
   // Because the url_field_ is user-editable, we set the URL with
   // username:password and escaped path and query.
-  std::wstring formatted = net::FormatUrl(
-      url_table_model_->GetURL(row), languages,
-      false, UnescapeRule::NONE, NULL, NULL);
+  std::wstring formatted = net::FormatUrl(url_table_model_->GetURL(row),
+      languages, false, UnescapeRule::NONE, NULL, NULL, NULL);
   return WideToUTF8(formatted);
 }
 
diff --git a/chrome/browser/net/browser_url_util.cc b/chrome/browser/net/browser_url_util.cc
index 940d3b6..5f287795 100644
--- a/chrome/browser/net/browser_url_util.cc
+++ b/chrome/browser/net/browser_url_util.cc
@@ -21,9 +21,9 @@ void WriteURLToClipboard(const GURL& url,
   // Unescaping path and query is not a good idea because other applications
   // may not encode non-ASCII characters in UTF-8.  See crbug.com/2820.
   string16 text = url.SchemeIs(chrome::kMailToScheme) ?
-                      ASCIIToUTF16(url.path()) :
-                      WideToUTF16(net::FormatUrl(url, languages, false,
-                                             UnescapeRule::NONE, NULL, NULL));
+      ASCIIToUTF16(url.path()) :
+      WideToUTF16(net::FormatUrl(url, languages, false, UnescapeRule::NONE,
+                                 NULL, NULL, NULL));
 
   ScopedClipboardWriter scw(clipboard);
   scw.WriteURL(text);
diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc
index b465268..a68bc34 100644
--- a/chrome/browser/net/url_fixer_upper.cc
+++ b/chrome/browser/net/url_fixer_upper.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -146,11 +146,10 @@ static string FixupHomedir(const string& text) {
 #endif
 
 // Tries to create a file: URL from |text| if it looks like a filename, even if
-// it doesn't resolve as a valid path or to an existing file.  Returns true
-// with a (possibly invalid) file: URL in |fixed_up_url| for input beginning
-// with a drive specifier or "\\".  Returns false in other cases (including
-// file: URLs: these don't look like filenames), leaving fixed_up_url
-// unchanged.
+// it doesn't resolve as a valid path or to an existing file.  Returns a
+// (possibly invalid) file: URL in |fixed_up_url| for input beginning
+// with a drive specifier or "\\".  Returns the unchanged input in other cases
+// (including file: URLs: these don't look like filenames).
 static string FixupPath(const string& text) {
   DCHECK(!text.empty());
 
@@ -173,7 +172,7 @@ static string FixupPath(const string& text) {
   GURL file_url = net::FilePathToFileURL(FilePath(filename));
   if (file_url.is_valid()) {
     return WideToUTF8(net::FormatUrl(file_url, std::wstring(), true,
-        UnescapeRule::NORMAL, NULL, NULL));
+        UnescapeRule::NORMAL, NULL, NULL, NULL));
   }
 
   // Invalid file URL, just return the input.
@@ -182,7 +181,6 @@ static string FixupPath(const string& text) {
 
 // Checks |domain| to see if a valid TLD is already present.  If not, appends
 // |desired_tld| to the domain, and prepends "www." unless it's already present.
-// Then modifies |fixed_up_url| to reflect the changes.
 static void AddDesiredTLD(const string& desired_tld,
                           string* domain) {
   if (desired_tld.empty() || domain->empty())
@@ -268,30 +266,15 @@ static void FixupHost(const string& text,
   url->append(domain);
 }
 
-// Looks for a port number, including initial colon, at port_start.  If
-// something invalid (which cannot be fixed up) is found, like ":foo" or
-// ":7:7", returns false.  Otherwise, removes any extra colons
-// ("::1337" -> ":1337", ":/" -> "/") and returns true.
 static void FixupPort(const string& text,
                       const url_parse::Component& part,
                       string* url) {
   if (!part.is_valid())
     return;
 
-  // Look for non-digit in port and strip if found.
-  string port(text, part.begin, part.len);
-  for (string::iterator i = port.begin(); i != port.end();) {
-    if (IsAsciiDigit(*i))
-      ++i;
-    else
-      i = port.erase(i);
-  }
-
-  if (port.empty())
-    return;  // Nothing to append.
-
+  // We don't fix up the port at the moment.
   url->append(":");
-  url->append(port);
+  url->append(text, part.begin, part.len);
 }
 
 static inline void FixupPath(const string& text,
@@ -573,7 +556,7 @@ string URLFixerUpper::FixupRelativeFile(const FilePath& base_dir,
     GURL file_url = net::FilePathToFileURL(full_path);
     if (file_url.is_valid())
       return WideToUTF8(net::FormatUrl(file_url, std::wstring(),
-          true, UnescapeRule::NORMAL, NULL, NULL));
+          true, UnescapeRule::NORMAL, NULL, NULL, NULL));
     // Invalid files fall through to regular processing.
   }
 
diff --git a/chrome/browser/net/url_fixer_upper_unittest.cc b/chrome/browser/net/url_fixer_upper_unittest.cc
index d7f8b93..5028cb2 100644
--- a/chrome/browser/net/url_fixer_upper_unittest.cc
+++ b/chrome/browser/net/url_fixer_upper_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -210,8 +210,8 @@ struct fixup_case {
   {" foo.com/asdf  bar", "", "http://foo.com/asdf  bar"},
   {"..www.google.com..", "", "http://www.google.com./"},
   {"http://......", "", "http://....../"},
-  {"http://host.com:ninety-two/", "", "http://host.com/"},
-  {"http://host.com:ninety-two?foo", "", "http://host.com/?foo"},
+  {"http://host.com:ninety-two/", "", "http://host.com:ninety-two/"},
+  {"http://host.com:ninety-two?foo", "", "http://host.com:ninety-two/?foo"},
   {"google.com:123", "", "http://google.com:123/"},
   {"about:", "", "about:"},
   {"about:version", "", "about:version"},
diff --git a/chrome/browser/tab_contents/tab_contents.cc b/chrome/browser/tab_contents/tab_contents.cc
index 3a34459..4e45553 100644
--- a/chrome/browser/tab_contents/tab_contents.cc
+++ b/chrome/browser/tab_contents/tab_contents.cc
@@ -2517,9 +2517,9 @@ void TabContents::LoadStateChanged(const GURL& url,
   upload_size_ = upload_size;
   std::wstring languages =
       profile()->GetPrefs()->GetString(prefs::kAcceptLanguages);
-  load_state_host_.clear();
   std::string host = url.host();
-  net::IDNToUnicode(host.c_str(), host.size(), languages, &load_state_host_);
+  load_state_host_ =
+      net::IDNToUnicode(host.c_str(), host.size(), languages, NULL);
   if (load_state_ == net::LOAD_STATE_READING_RESPONSE)
     SetNotWaitingForResponse();
   if (is_loading())
diff --git a/chrome/browser/toolbar_model.cc b/chrome/browser/toolbar_model.cc
index 1169c42..42977f6 100644
--- a/chrome/browser/toolbar_model.cc
+++ b/chrome/browser/toolbar_model.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -41,7 +41,8 @@ std::wstring ToolbarModel::GetText() {
       url = entry->virtual_url();
     }
   }
-  return net::FormatUrl(url, languages, true, UnescapeRule::NORMAL, NULL, NULL);
+  return net::FormatUrl(url, languages, true, UnescapeRule::NORMAL, NULL, NULL,
+                        NULL);
 }
 
 ToolbarModel::SecurityLevel ToolbarModel::GetSecurityLevel() {
diff --git a/chrome/browser/views/bookmark_editor_view.cc b/chrome/browser/views/bookmark_editor_view.cc
index 5443f81..f40e25f 100644
--- a/chrome/browser/views/bookmark_editor_view.cc
+++ b/chrome/browser/views/bookmark_editor_view.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -278,9 +278,8 @@ void BookmarkEditorView::Init() {
         : std::wstring();
     // The following URL is user-editable.  We specify omit_username_password=
     // false and unescape=false to show the original URL except IDN.
-    url_text =
-        net::FormatUrl(details_.existing_node->GetURL(), languages, false,
-                       UnescapeRule::NONE, NULL, NULL);
+    url_text = net::FormatUrl(details_.existing_node->GetURL(), languages,
+                              false, UnescapeRule::NONE, NULL, NULL, NULL);
   }
   url_tf_.SetText(url_text);
   url_tf_.SetController(this);
diff --git a/chrome/browser/views/url_picker.cc b/chrome/browser/views/url_picker.cc
index 5232676..0133dbd 100644
--- a/chrome/browser/views/url_picker.cc
+++ b/chrome/browser/views/url_picker.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -293,9 +293,8 @@ void UrlPicker::OnSelectionChanged() {
         profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
     // Because the url_field_ is user-editable, we set the URL with
     // username:password and escaped path and query.
-    std::wstring formatted = net::FormatUrl(
-        url_table_model_->GetURL(selection), languages,
-        false, UnescapeRule::NONE, NULL, NULL);
+    std::wstring formatted = net::FormatUrl(url_table_model_->GetURL(selection),
+        languages, false, UnescapeRule::NONE, NULL, NULL, NULL);
     url_field_->SetText(formatted);
     if (title_field_)
       title_field_->SetText(url_table_model_->GetTitle(selection));
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 3d2aca2..5196eb6 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -107,7 +107,14 @@ const char kUrlUnescape[128] = {
 };
 
 std::string UnescapeURLImpl(const std::string& escaped_text,
-                            UnescapeRule::Type rules) {
+                            UnescapeRule::Type rules,
+                            size_t* offset_for_adjustment) {
+  size_t offset_temp = std::wstring::npos;
+  if (!offset_for_adjustment)
+    offset_for_adjustment = &offset_temp;
+  else if (*offset_for_adjustment >= escaped_text.length())
+    *offset_for_adjustment = std::wstring::npos;
+
   // Do not unescape anything, return the |escaped_text| text.
   if (rules == UnescapeRule::NONE)
     return escaped_text;
@@ -136,8 +143,17 @@ std::string UnescapeURLImpl(const std::string& escaped_text,
              // Additionally allow control characters if requested.
              (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
           // Use the unescaped version of the character.
+          size_t length_before_append = result.length();
           result.push_back(value);
           i += 2;
+
+          // Adjust offset to match length change.
+          if (*offset_for_adjustment != std::string::npos) {
+            if (*offset_for_adjustment > (length_before_append + 2))
+              *offset_for_adjustment -= 2;
+            else if (*offset_for_adjustment > length_before_append)
+              *offset_for_adjustment = std::string::npos;
+          }
         } else {
           // Keep escaped. Append a percent and we'll get the following two
           // digits on the next loops through.
@@ -231,19 +247,27 @@ bool EscapeQueryParamValue(const std::wstring& text, const char* codepage,
   return true;
 }
 
-std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
-                                           const char* codepage,
-                                           UnescapeRule::Type rules) {
+std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+                                               UnescapeRule::Type rules,
+                                               size_t* offset_for_adjustment) {
   std::wstring result;
-  if (base::CodepageToWide(UnescapeURLImpl(text, rules), codepage,
-                           base::OnStringConversionError::FAIL, &result))
+  size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+  if (base::CodepageToWideAndAdjustOffset(
+          UnescapeURLImpl(text, rules, offset_for_adjustment),
+          "UTF-8", base::OnStringConversionError::FAIL, &result,
+          offset_for_adjustment))
     return result;          // Character set looks like it's valid.
-  return UTF8ToWide(text);  // Return the escaped version when it's not.
+
+  // Not valid.  Return the escaped version.  Undo our changes to
+  // |offset_for_adjustment| since we haven't changed the string after all.
+  if (offset_for_adjustment)
+    *offset_for_adjustment = original_offset;
+  return UTF8ToWideAndAdjustOffset(text, offset_for_adjustment);
 }
 
 std::string UnescapeURLComponent(const std::string& escaped_text,
                                  UnescapeRule::Type rules) {
-  return UnescapeURLImpl(escaped_text, rules);
+  return UnescapeURLImpl(escaped_text, rules, NULL);
 }
 
 template <class str>
diff --git a/net/base/escape.h b/net/base/escape.h
index 8761d4d..9ff17b6 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -93,17 +93,17 @@ std::string UnescapeURLComponent(const std::string& escaped_text,
                                  UnescapeRule::Type rules);
 
 // Unescapes the given substring as a URL, and then tries to interpret the
-// result as being encoded in the given code page. If the result is convertable
-// into the code page, it will be returned as converted. If it is not, the
-// original escaped string will be converted into a wide string and returned.
-std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
-                                           const char* codepage,
-                                           UnescapeRule::Type rules);
-inline std::wstring UnescapeAndDecodeUTF8URLComponent(
-    const std::string& text,
-    UnescapeRule::Type rules) {
-  return UnescapeAndDecodeURLComponent(text, "UTF-8", rules);
-}
+// result as being encoded as UTF-8. If the result is convertable into UTF-8, it
+// will be returned as converted. If it is not, the original escaped string will
+// be converted into a wide string and returned.
+//
+// |offset_for_adjustment| may be NULL; if not, it is an offset into |text| that
+// will be adjusted to point at the same logical place in the result string.  If
+// this isn't possible because it points into the middle of an escape sequence
+// or past the end of the string, it will be set to std::wstring::npos.
+std::wstring UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+                                               UnescapeRule::Type rules,
+                                               size_t* offset_for_adjustment);
 
 // Deprecated ------------------------------------------------------------------
 
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 44bb9972..8e5e7dc 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -24,8 +24,7 @@ struct UnescapeURLCase {
   const char* output;
 };
 
-struct UnescapeAndDecodeURLCase {
-  const char* encoding;
+struct UnescapeAndDecodeCase {
   const char* input;
 
   // The expected output when run through UnescapeURL.
@@ -38,6 +37,12 @@ struct UnescapeAndDecodeURLCase {
   const wchar_t* decoded;
 };
 
+struct AdjustOffsetCase {
+  const char* input;
+  size_t input_offset;
+  size_t output_offset;
+};
+
 struct EscapeForHTMLCase {
   const char* input;
   const char* expected_output;
@@ -45,7 +50,7 @@ struct EscapeForHTMLCase {
 
 }  // namespace
 
-TEST(Escape, EscapeTextForFormSubmission) {
+TEST(EscapeTest, EscapeTextForFormSubmission) {
   const EscapeCase escape_cases[] = {
     {L"foo", L"foo"},
     {L"foo bar", L"foo+bar"},
@@ -93,7 +98,7 @@ TEST(Escape, EscapeTextForFormSubmission) {
   EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str));
 }
 
-TEST(Escape, EscapePath) {
+TEST(EscapeTest, EscapePath) {
   ASSERT_EQ(
     // Most of the character space we care about, un-escaped
     EscapePath(
@@ -108,7 +113,7 @@ TEST(Escape, EscapePath) {
     "%7B%7C%7D~%7F%80%FF");
 }
 
-TEST(Escape, EscapeUrlEncodedData) {
+TEST(EscapeTest, EscapeUrlEncodedData) {
   ASSERT_EQ(
     // Most of the character space we care about, un-escaped
     EscapeUrlEncodedData(
@@ -123,7 +128,7 @@ TEST(Escape, EscapeUrlEncodedData) {
     "%7B%7C%7D~%7F%80%FF");
 }
 
-TEST(Escape, UnescapeURLComponent) {
+TEST(EscapeTest, UnescapeURLComponent) {
   const UnescapeURLCase unescape_cases[] = {
     {"", UnescapeRule::NORMAL, ""},
     {"%2", UnescapeRule::NORMAL, "%2"},
@@ -184,40 +189,48 @@ TEST(Escape, UnescapeURLComponent) {
   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
 }
 
-TEST(Escape, UnescapeAndDecodeURLComponent) {
-  const UnescapeAndDecodeURLCase unescape_cases[] = {
-    {"UTF8", "%", "%", "%", L"%"},
-    {"UTF8", "+", "+", " ", L"+"},
-    {"UTF8", "%2+", "%2+", "%2 ", L"%2+"},
-    {"UTF8", "+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"},
-    {"UTF8", "Don't escape anything",
-             "Don't escape anything",
-             "Don't escape anything",
-             L"Don't escape anything"},
-    {"UTF8", "+Invalid %escape %2+",
-             "+Invalid %escape %2+",
-             " Invalid %escape %2 ",
-             L"+Invalid %escape %2+"},
-    {"UTF8", "Some random text %25%3bOK",
-             "Some random text %25;OK",
-             "Some random text %25;OK",
-             L"Some random text %25;OK"},
-    {"UTF8", "%01%02%03%04%05%06%07%08%09",
-             "%01%02%03%04%05%06%07%08%09",
-             "%01%02%03%04%05%06%07%08%09",
-             L"%01%02%03%04%05%06%07%08%09"},
-    {"UTF8", "%E4%BD%A0+%E5%A5%BD",
-             "\xE4\xBD\xA0+\xE5\xA5\xBD",
-             "\xE4\xBD\xA0 \xE5\xA5\xBD",
-             L"\x4f60+\x597d"},
-    {"BIG5", "%A7A%A6n",
-             "\xA7\x41\xA6n",
-             "\xA7\x41\xA6n",
-             L"\x4f60\x597d"},
-    {"UTF8", "%ED%ED",  // Invalid UTF-8.
-             "\xED\xED",
-             "\xED\xED",
-             L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
+TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
+  const UnescapeAndDecodeCase unescape_cases[] = {
+    { "%",
+      "%",
+      "%",
+     L"%"},
+    { "+",
+      "+",
+      " ",
+     L"+"},
+    { "%2+",
+      "%2+",
+      "%2 ",
+     L"%2+"},
+    { "+%%%+%%%",
+      "+%%%+%%%",
+      " %%% %%%",
+     L"+%%%+%%%"},
+    { "Don't escape anything",
+      "Don't escape anything",
+      "Don't escape anything",
+     L"Don't escape anything"},
+    { "+Invalid %escape %2+",
+      "+Invalid %escape %2+",
+      " Invalid %escape %2 ",
+     L"+Invalid %escape %2+"},
+    { "Some random text %25%3BOK",
+      "Some random text %25;OK",
+      "Some random text %25;OK",
+     L"Some random text %25;OK"},
+    { "%01%02%03%04%05%06%07%08%09",
+      "%01%02%03%04%05%06%07%08%09",
+      "%01%02%03%04%05%06%07%08%09",
+     L"%01%02%03%04%05%06%07%08%09"},
+    { "%E4%BD%A0+%E5%A5%BD",
+      "\xE4\xBD\xA0+\xE5\xA5\xBD",
+      "\xE4\xBD\xA0 \xE5\xA5\xBD",
+     L"\x4f60+\x597d"},
+    { "%ED%ED",  // Invalid UTF-8.
+      "\xED\xED",
+      "\xED\xED",
+     L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
   };
 
   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
@@ -230,14 +243,36 @@ TEST(Escape, UnescapeAndDecodeURLComponent) {
     EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
 
     // TODO: Need to test unescape_spaces and unescape_percent.
-    std::wstring decoded = UnescapeAndDecodeURLComponent(
-        unescape_cases[i].input, unescape_cases[i].encoding,
-        UnescapeRule::NORMAL);
+    std::wstring decoded = UnescapeAndDecodeUTF8URLComponent(
+        unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
     EXPECT_EQ(std::wstring(unescape_cases[i].decoded), decoded);
   }
 }
 
-TEST(Escape, EscapeForHTML) {
+TEST(EscapeTest, AdjustOffset) {
+  const AdjustOffsetCase adjust_cases[] = {
+    {"", 0, std::wstring::npos},
+    {"test", 0, 0},
+    {"test", 2, 2},
+    {"test", 4, std::wstring::npos},
+    {"test", std::wstring::npos, std::wstring::npos},
+    {"%3Btest", 6, 4},
+    {"%3Btest", 2, std::wstring::npos},
+    {"test%3B", 2, 2},
+    {"%E4%BD%A0+%E5%A5%BD", 9, 1},
+    {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
+    {"%ED%B0%80+%E5%A5%BD", 6, 6},
+  };
+
+  for (size_t i = 0; i < arraysize(adjust_cases); i++) {
+    size_t offset = adjust_cases[i].input_offset;
+    UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
+                                      UnescapeRule::NORMAL, &offset);
+    EXPECT_EQ(adjust_cases[i].output_offset, offset);
+  }
+}
+
+TEST(EscapeTest, EscapeForHTML) {
   const EscapeForHTMLCase tests[] = {
     { "hello", "hello" },
     { "<hello>", "&lt;hello&gt;" },
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 85151e9..9171e54 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -650,60 +650,51 @@ bool IsIDNComponentSafe(const char16* str,
 }
 
 // Converts one component of a host (between dots) to IDN if safe. The result
-// will be APPENDED to the given output string and  will be the same as the
-// input if it is not IDN or the IDN is unsafe to display.
-void IDNToUnicodeOneComponent(const char16* comp,
-                              int comp_len,
+// will be APPENDED to the given output string and will be the same as the input
+// if it is not IDN or the IDN is unsafe to display.  Returns whether any
+// conversion was performed.
+bool IDNToUnicodeOneComponent(const char16* comp,
+                              size_t comp_len,
                               const std::wstring& languages,
                               string16* out) {
-  DCHECK(comp_len >= 0);
+  DCHECK(out);
   if (comp_len == 0)
-    return;
+    return false;
 
-  // Expand the output string to make room for a possibly longer string
-  // (we'll expand if it's still not big enough below).
-  int extra_space = 64;
-  size_t host_begin_in_output = out->size();
-
-  // Just copy the input if it can't be an IDN component.
-  if (comp_len < 4 ||
-      comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') {
-    out->resize(host_begin_in_output + comp_len);
-    for (int i = 0; i < comp_len; i++)
-      (*out)[host_begin_in_output + i] = comp[i];
-    return;
-  }
+  // Only transform if the input can be an IDN component.
+  static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
+  if ((comp_len > arraysize(kIdnPrefix)) &&
+      !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {
+    // Repeatedly expand the output string until it's big enough.  It looks like
+    // ICU will return the required size of the buffer, but that's not
+    // documented, so we'll just grow by 2x. This should be rare and is not on a
+    // critical path.
+    size_t original_length = out->length();
+    for (int extra_space = 64; ; extra_space *= 2) {
+      UErrorCode status = U_ZERO_ERROR;
+      out->resize(out->length() + extra_space);
+      int output_chars = uidna_IDNToUnicode(comp,
+          static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
+          UIDNA_DEFAULT, NULL, &status);
+      if (status == U_ZERO_ERROR) {
+        // Converted successfully.
+        out->resize(original_length + output_chars);
+        if (IsIDNComponentSafe(out->data() + original_length, output_chars,
+                               languages))
+          return true;
+      }
 
-  while (true) {
-    UErrorCode status = U_ZERO_ERROR;
-    out->resize(out->size() + extra_space);
-    int output_chars =
-        uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output],
-                           extra_space, UIDNA_DEFAULT, NULL, &status);
-    if (status == U_ZERO_ERROR) {
-      // Converted successfully.
-      out->resize(host_begin_in_output + output_chars);
-      if (!IsIDNComponentSafe(&out->data()[host_begin_in_output],
-                              output_chars,
-                              languages))
-        break;  // The error handling below will undo the IDN.
-      return;
+      if (status != U_BUFFER_OVERFLOW_ERROR)
+        break;
     }
-    if (status != U_BUFFER_OVERFLOW_ERROR)
-      break;
-
-    // Need to loop again with a bigger buffer. It looks like ICU will
-    // return the required size of the buffer, but that's not documented,
-    // so we'll just grow by 2x. This should be rare and is not on a
-    // critical path.
-    extra_space *= 2;
+    // Failed, revert back to original string.
+    out->resize(original_length);
   }
 
-  // We get here on error, in which case we replace anything that was added
-  // with the literal input.
-  out->resize(host_begin_in_output + comp_len);
-  for (int i = 0; i < comp_len; i++)
-    (*out)[host_begin_in_output + i] = comp[i];
+  // We get here with no IDN or on error, in which case we just append the
+  // literal input.
+  out->append(comp, comp_len);
+  return false;
 }
 
 // Helper for FormatUrl().
@@ -712,19 +703,23 @@ std::wstring FormatViewSourceUrl(const GURL& url,
                                  bool omit_username_password,
                                  UnescapeRule::Type unescape_rules,
                                  url_parse::Parsed* new_parsed,
-                                 size_t* prefix_end) {
+                                 size_t* prefix_end,
+                                 size_t* offset_for_adjustment) {
   DCHECK(new_parsed);
   const wchar_t* const kWideViewSource = L"view-source:";
   const size_t kViewSourceLengthPlus1 = 12;
 
   GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
+  size_t temp_offset = (*offset_for_adjustment == std::wstring::npos) ?
+      std::wstring::npos : (*offset_for_adjustment - kViewSourceLengthPlus1);
+  size_t* temp_offset_ptr = (*offset_for_adjustment < kViewSourceLengthPlus1) ?
+      NULL : &temp_offset;
   std::wstring result = net::FormatUrl(real_url, languages,
-      omit_username_password, unescape_rules, new_parsed, prefix_end);
+      omit_username_password, unescape_rules, new_parsed, prefix_end,
+      temp_offset_ptr);
   result.insert(0, kWideViewSource);
 
   // Adjust position values.
-  if (prefix_end)
-    *prefix_end += kViewSourceLengthPlus1;
   if (new_parsed->scheme.is_nonempty()) {
     // Assume "view-source:real-scheme" as a scheme.
     new_parsed->scheme.len += kViewSourceLengthPlus1;
@@ -746,6 +741,12 @@ std::wstring FormatViewSourceUrl(const GURL& url,
     new_parsed->query.begin += kViewSourceLengthPlus1;
   if (new_parsed->ref.is_nonempty())
     new_parsed->ref.begin += kViewSourceLengthPlus1;
+  if (prefix_end)
+    *prefix_end += kViewSourceLengthPlus1;
+  if (temp_offset_ptr) {
+    *offset_for_adjustment = (temp_offset == std::wstring::npos) ?
+        std::wstring::npos : (temp_offset + kViewSourceLengthPlus1);
+  }
   return result;
 }
 
@@ -769,12 +770,20 @@ std::set<int> explicitly_allowed_ports;
 
 // Appends the substring |in_component| inside of the URL |spec| to |output|,
 // and the resulting range will be filled into |out_component|. |unescape_rules|
-// defines how to clean the URL for human readability.
+// defines how to clean the URL for human readability.  |offset_for_adjustment|
+// is an offset into |output| which will be adjusted based on how it maps to the
+// component being converted; if it is less than output->length(), it will be
+// untouched, and if it is greater than output->length() + in_component.len it
+// will be shortened by the difference in lengths between the input and output
+// components.  Otherwise it points into the component being converted, and is
+// adjusted to point to the same logical place in |output|.
+// |offset_for_adjustment| may not be NULL.
 static void AppendFormattedComponent(const std::string& spec,
                                      const url_parse::Component& in_component,
                                      UnescapeRule::Type unescape_rules,
                                      std::wstring* output,
-                                     url_parse::Component* out_component);
+                                     url_parse::Component* out_component,
+                                     size_t* offset_for_adjustment);
 
 GURL FilePathToFileURL(const FilePath& path) {
   // Produce a URL like "file:///C:/foo" for a regular file, or
@@ -849,58 +858,56 @@ std::string GetHeaderParamValue(const std::string& field,
 //
 // We may want to skip this step in the case of file URLs to allow unicode
 // UNC hostnames regardless of encodings.
-void IDNToUnicode(const char* host,
-                  int host_len,
-                  const std::wstring& languages,
-                  std::wstring* out) {
+std::wstring IDNToUnicode(const char* host,
+                          size_t host_len,
+                          const std::wstring& languages,
+                          size_t* offset_for_adjustment) {
   // Convert the ASCII input to a wide string for ICU.
   string16 input16;
   input16.reserve(host_len);
-  for (int i = 0; i < host_len; i++)
-    input16.push_back(host[i]);
+  std::copy(host, host + host_len, std::back_inserter(input16));
 
   string16 out16;
-  // The output string is appended to, so convert what's already there if
-  // needed.
-#if defined(WCHAR_T_IS_UTF32)
-  WideToUTF16(out->data(), out->length(), &out16);
-  out->clear();  // for equivalence with the swap below
-#elif defined(WCHAR_T_IS_UTF16)
-  out->swap(out16);
-#endif
+  size_t output_offset = offset_for_adjustment ?
+      *offset_for_adjustment : std::wstring::npos;
 
   // Do each component of the host separately, since we enforce script matching
   // on a per-component basis.
-  size_t cur_begin = 0;  // Beginning of the current component (inclusive).
-  while (cur_begin < input16.size()) {
-    // Find the next dot or the end of the string.
-    size_t next_dot = input16.find_first_of('.', cur_begin);
-    if (next_dot == std::wstring::npos)
-      next_dot = input16.size();  // For getting the last component.
-
-    if (next_dot > cur_begin) {
+  for (size_t component_start = 0, component_end;
+       component_start < input16.length();
+       component_start = component_end + 1) {
+    // Find the end of the component.
+    component_end = input16.find('.', component_start);
+    if (component_end == string16::npos)
+      component_end = input16.length();  // For getting the last component.
+    size_t component_length = component_end - component_start;
+
+    size_t output_component_start = out16.length();
+    bool converted_idn = false;
+    if (component_end > component_start) {
       // Add the substring that we just found.
-      IDNToUnicodeOneComponent(&input16[cur_begin],
-                               static_cast<int>(next_dot - cur_begin),
-                               languages,
-                               &out16);
+      converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
+          component_length, languages, &out16);
+    }
+    size_t output_component_length = out16.length() - output_component_start;
+
+    if ((output_offset != std::wstring::npos) &&
+        (*offset_for_adjustment > component_start)) {
+      if ((*offset_for_adjustment < component_end) && converted_idn)
+        output_offset = std::wstring::npos;
+      else
+        output_offset += output_component_length - component_length;
     }
 
-    // Need to add the dot we just found (if we found one). This needs to be
-    // done before we break out below in case the URL ends in a dot.
-    if (next_dot < input16.size())
+    // Need to add the dot we just found (if we found one).
+    if (component_end < input16.length())
       out16.push_back('.');
-    else
-      break;  // No more components left.
-
-    cur_begin = next_dot + 1;
   }
 
-#if defined(WCHAR_T_IS_UTF32)
-  UTF16ToWide(out16.data(), out16.length(), out);
-#elif defined(WCHAR_T_IS_UTF16)
-  out->swap(out16);
-#endif
+  if (offset_for_adjustment)
+    *offset_for_adjustment = output_offset;
+
+  return UTF16ToWideAndAdjustOffset(out16, offset_for_adjustment);
 }
 
 std::string CanonicalizeHost(const std::string& host,
@@ -1262,31 +1269,48 @@ void GetIdentityFromURL(const GURL& url,
                         std::wstring* username,
                         std::wstring* password) {
   UnescapeRule::Type flags = UnescapeRule::SPACES;
-  *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags);
-  *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags);
+  *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
+  *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
 }
 
 void AppendFormattedHost(const GURL& url,
                          const std::wstring& languages,
                          std::wstring* output,
-                         url_parse::Parsed* new_parsed) {
+                         url_parse::Parsed* new_parsed,
+                         size_t* offset_for_adjustment) {
+  DCHECK(output);
   const url_parse::Component& host =
       url.parsed_for_possibly_invalid_spec().host;
 
   if (host.is_nonempty()) {
     // Handle possible IDN in the host name.
+    int new_host_begin = static_cast<int>(output->length());
     if (new_parsed)
-      new_parsed->host.begin = static_cast<int>(output->length());
+      new_parsed->host.begin = new_host_begin;
+    size_t offset_past_current_output =
+        (!offset_for_adjustment ||
+         (*offset_for_adjustment == std::wstring::npos) ||
+         (*offset_for_adjustment < output->length())) ?
+            std::wstring::npos : (*offset_for_adjustment - output->length());
+    size_t* offset_into_host =
+        (offset_past_current_output >= static_cast<size_t>(host.len)) ?
+            NULL : &offset_past_current_output;
 
     const std::string& spec = url.possibly_invalid_spec();
     DCHECK(host.begin >= 0 &&
            ((spec.length() == 0 && host.begin == 0) ||
             host.begin < static_cast<int>(spec.length())));
-    net::IDNToUnicode(&spec[host.begin], host.len, languages, output);
+    output->append(net::IDNToUnicode(&spec[host.begin],
+                   static_cast<size_t>(host.len), languages, offset_into_host));
 
-    if (new_parsed) {
-      new_parsed->host.len =
-          static_cast<int>(output->length()) - new_parsed->host.begin;
+    int new_host_len = static_cast<int>(output->length()) - new_host_begin;
+    if (new_parsed)
+      new_parsed->host.len = new_host_len;
+    if (offset_into_host) {
+      *offset_for_adjustment = (*offset_into_host == std::wstring::npos) ?
+          std::wstring::npos : (new_host_begin + *offset_into_host);
+    } else if (offset_past_current_output != std::wstring::npos) {
+      *offset_for_adjustment += new_host_len - host.len;
     }
   } else if (new_parsed) {
     new_parsed->host.reset();
@@ -1298,19 +1322,36 @@ void AppendFormattedComponent(const std::string& spec,
                               const url_parse::Component& in_component,
                               UnescapeRule::Type unescape_rules,
                               std::wstring* output,
-                              url_parse::Component* out_component) {
+                              url_parse::Component* out_component,
+                              size_t* offset_for_adjustment) {
+  DCHECK(output);
+  DCHECK(offset_for_adjustment);
   if (in_component.is_nonempty()) {
     out_component->begin = static_cast<int>(output->length());
+    size_t offset_past_current_output =
+        ((*offset_for_adjustment == std::wstring::npos) ||
+         (*offset_for_adjustment < output->length())) ?
+            std::wstring::npos : (*offset_for_adjustment - output->length());
+    size_t* offset_into_component =
+        (offset_past_current_output >= static_cast<size_t>(in_component.len)) ?
+            NULL : &offset_past_current_output;
     if (unescape_rules == UnescapeRule::NONE) {
-      output->append(UTF8ToWide(spec.substr(
-          in_component.begin, in_component.len)));
+      output->append(UTF8ToWideAndAdjustOffset(
+          spec.substr(in_component.begin, in_component.len),
+          offset_into_component));
     } else {
       output->append(UnescapeAndDecodeUTF8URLComponent(
-          spec.substr(in_component.begin, in_component.len),
-          unescape_rules));
+          spec.substr(in_component.begin, in_component.len), unescape_rules,
+          offset_into_component));
     }
     out_component->len =
         static_cast<int>(output->length()) - out_component->begin;
+    if (offset_into_component) {
+      *offset_for_adjustment = (*offset_into_component == std::wstring::npos) ?
+          std::wstring::npos : (out_component->begin + *offset_into_component);
+    } else if (offset_past_current_output != std::wstring::npos) {
+      *offset_for_adjustment += out_component->len - in_component.len;
+    }
   } else {
     out_component->reset();
   }
@@ -1321,10 +1362,14 @@ std::wstring FormatUrl(const GURL& url,
                        bool omit_username_password,
                        UnescapeRule::Type unescape_rules,
                        url_parse::Parsed* new_parsed,
-                       size_t* prefix_end) {
+                       size_t* prefix_end,
+                       size_t* offset_for_adjustment) {
   url_parse::Parsed parsed_temp;
   if (!new_parsed)
     new_parsed = &parsed_temp;
+  size_t offset_temp = std::wstring::npos;
+  if (!offset_for_adjustment)
+    offset_for_adjustment = &offset_temp;
 
   std::wstring url_string;
 
@@ -1332,6 +1377,7 @@ std::wstring FormatUrl(const GURL& url,
   if (url.is_empty()) {
     if (prefix_end)
       *prefix_end = 0;
+    *offset_for_adjustment = std::wstring::npos;
     return url_string;
   }
 
@@ -1343,19 +1389,22 @@ std::wstring FormatUrl(const GURL& url,
   if (url.SchemeIs(kViewSource) &&
       !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
     return FormatViewSourceUrl(url, languages, omit_username_password,
-        unescape_rules, new_parsed, prefix_end);
+        unescape_rules, new_parsed, prefix_end, offset_for_adjustment);
   }
 
   // We handle both valid and invalid URLs (this will give us the spec
   // regardless of validity).
   const std::string& spec = url.possibly_invalid_spec();
   const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
+  if (*offset_for_adjustment >= spec.length())
+    *offset_for_adjustment = std::wstring::npos;
 
   // Copy everything before the username (the scheme and the separators.)
   // These are ASCII.
-  int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true);
-  for (int i = 0; i < pre_end; ++i)
-    url_string.push_back(spec[i]);
+  std::copy(spec.begin(),
+      spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
+                                                  true),
+      std::back_inserter(url_string));
   new_parsed->scheme = parsed.scheme;
 
   if (omit_username_password) {
@@ -1364,16 +1413,41 @@ std::wstring FormatUrl(const GURL& url,
     // e.g. "http://google.com:search@evil.ru/"
     new_parsed->username.reset();
     new_parsed->password.reset();
+    if ((*offset_for_adjustment != std::wstring::npos) &&
+        (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
+      if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
+        // The seeming off-by-one and off-by-two in these first two lines are to
+        // account for the ':' after the username and '@' after the password.
+        if (*offset_for_adjustment >
+            static_cast<size_t>(parsed.password.end())) {
+          *offset_for_adjustment -=
+              (parsed.username.len + parsed.password.len + 2);
+        } else if (*offset_for_adjustment >
+                   static_cast<size_t>(parsed.username.begin)) {
+          *offset_for_adjustment = std::wstring::npos;
+        }
+      } else {
+        const url_parse::Component* nonempty_component =
+            parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
+        // The seeming off-by-one in these first two lines is to account for the
+        // '@' after the username/password.
+        if (*offset_for_adjustment >
+            static_cast<size_t>(nonempty_component->end())) {
+          *offset_for_adjustment -= (nonempty_component->len + 1);
+        } else if (*offset_for_adjustment >
+                   static_cast<size_t>(nonempty_component->begin)) {
+          *offset_for_adjustment = std::wstring::npos;
+        }
+      }
+    }
   } else {
-    AppendFormattedComponent(
-        spec, parsed.username, unescape_rules,
-        &url_string, &new_parsed->username);
+    AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
+                             &new_parsed->username, offset_for_adjustment);
     if (parsed.password.is_valid()) {
       url_string.push_back(':');
     }
-    AppendFormattedComponent(
-        spec, parsed.password, unescape_rules,
-        &url_string, &new_parsed->password);
+    AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
+                             &new_parsed->password, offset_for_adjustment);
     if (parsed.username.is_valid() || parsed.password.is_valid()) {
       url_string.push_back('@');
     }
@@ -1381,39 +1455,56 @@ std::wstring FormatUrl(const GURL& url,
   if (prefix_end)
     *prefix_end = static_cast<size_t>(url_string.length());
 
-  AppendFormattedHost(url, languages, &url_string, new_parsed);
+  AppendFormattedHost(url, languages, &url_string, new_parsed,
+                      offset_for_adjustment);
 
   // Port.
   if (parsed.port.is_nonempty()) {
     url_string.push_back(':');
-    int begin = url_string.length();
-    for (int i = parsed.port.begin; i < parsed.port.end(); ++i)
-      url_string.push_back(spec[i]);
-    new_parsed->port.begin = begin;
-    new_parsed->port.len = url_string.length() - begin;
+    new_parsed->port.begin = url_string.length();
+    std::copy(spec.begin() + parsed.port.begin,
+              spec.begin() + parsed.port.end(), std::back_inserter(url_string));
+    new_parsed->port.len = url_string.length() - new_parsed->port.begin;
   } else {
     new_parsed->port.reset();
   }
 
   // Path and query both get the same general unescape & convert treatment.
-  AppendFormattedComponent(
-      spec, parsed.path, unescape_rules, &url_string,
-      &new_parsed->path);
+  AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
+                           &new_parsed->path, offset_for_adjustment);
   if (parsed.query.is_valid())
     url_string.push_back('?');
-  AppendFormattedComponent(
-      spec, parsed.query, unescape_rules, &url_string,
-      &new_parsed->query);
+  AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
+                           &new_parsed->query, offset_for_adjustment);
 
   // Reference is stored in valid, unescaped UTF-8, so we can just convert.
   if (parsed.ref.is_valid()) {
     url_string.push_back('#');
-    int begin = url_string.length();
-    if (parsed.ref.len > 0)
-      url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin],
-                                               parsed.ref.len)));
-    new_parsed->ref.begin = begin;
-    new_parsed->ref.len = url_string.length() - begin;
+    new_parsed->ref.begin = url_string.length();
+    size_t offset_past_current_output =
+        ((*offset_for_adjustment == std::wstring::npos) ||
+         (*offset_for_adjustment < url_string.length())) ?
+            std::wstring::npos : (*offset_for_adjustment - url_string.length());
+    size_t* offset_into_ref =
+        (offset_past_current_output >= static_cast<size_t>(parsed.ref.len)) ?
+            NULL : &offset_past_current_output;
+    if (parsed.ref.len > 0) {
+      url_string.append(UTF8ToWideAndAdjustOffset(spec.substr(parsed.ref.begin,
+                                                              parsed.ref.len),
+                                                  offset_into_ref));
+    }
+    new_parsed->ref.len = url_string.length() - new_parsed->ref.begin;
+    if (offset_into_ref) {
+      *offset_for_adjustment = (*offset_into_ref == std::wstring::npos) ?
+          std::wstring::npos : (new_parsed->ref.begin + *offset_into_ref);
+    } else if (offset_past_current_output != std::wstring::npos) {
+      // We clamped the offset near the beginning of this function to ensure it
+      // was within the input URL.  If we reach here, the input was something
+      // invalid and non-parseable such that the offset was past any component
+      // we could figure out.  In this case it won't be represented in the
+      // output string, so reset it.
+      *offset_for_adjustment = std::wstring::npos;
+    }
   }
 
   return url_string;
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 1f1516f..d9affe6 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -129,10 +129,9 @@ std::string GetHeaderParamValue(const std::string& field,
 std::string GetFileNameFromCD(const std::string& header,
                               const std::string& referrer_charset);
 
-// Converts the given host name to unicode characters, APPENDING them to the
-// the given output string. This can be called for any host name, if the
-// input is not IDN or is invalid in some way, we'll just append the ASCII
-// source to the output so it is still usable.
+// Converts the given host name to unicode characters. This can be called for
+// any host name, if the input is not IDN or is invalid in some way, we'll just
+// return the ASCII source so it is still usable.
 //
 // The input should be the canonicalized ASCII host name from GURL. This
 // function does NOT accept UTF-8! Its length must also be given (this is
@@ -146,10 +145,16 @@ std::string GetFileNameFromCD(const std::string& header,
 // Latin letters in the ASCII range can be mixed with a limited set of
 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
 // When |languages| is empty, even that mixing is not allowed.
-void IDNToUnicode(const char* host,
-                  int host_len,
-                  const std::wstring& languages,
-                  std::wstring* out);
+//
+// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
+// point at the same logical place in the output string. If this isn't possible
+// because it points past the end of |host| or into the middle of a punycode
+// sequence, it will be set to std::wstring::npos.  |offset_for_adjustment| may
+// be NULL.
+std::wstring IDNToUnicode(const char* host,
+                          size_t host_len,
+                          const std::wstring& languages,
+                          size_t* offset_for_adjustment);
 
 // Canonicalizes |host| and returns it.  Also fills |host_info| with
 // IP address information.  |host_info| must not be NULL.
@@ -228,31 +233,47 @@ int SetNonBlocking(int fd);
 // the user. The given parsed structure will be updated. The host name formatter
 // also takes the same accept languages component as ElideURL. |new_parsed| may
 // be null.
-void AppendFormattedHost(const GURL& url, const std::wstring& languages,
-                         std::wstring* output, url_parse::Parsed* new_parsed);
-
-// Creates a string representation of |url|. The IDN host name may
-// be in Unicode if |languages| accepts the Unicode representation.
-// If |omit_username_password| is true, the username and the password are
-// omitted. |unescape_rules| defines how to clean the URL for human readability.
+void AppendFormattedHost(const GURL& url,
+                         const std::wstring& languages,
+                         std::wstring* output,
+                         url_parse::Parsed* new_parsed,
+                         size_t* offset_for_adjustment);
+
+// Creates a string representation of |url|. The IDN host name may be in Unicode
+// if |languages| accepts the Unicode representation. If
+// |omit_username_password| is true, any username and password are removed.
+// |unescape_rules| defines how to clean the URL for human readability.
 // You will generally want |UnescapeRule::SPACES| for display to the user if you
 // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the
 // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and
-// UTF-8. |new_parsed| will have parsing parameters of the resultant URL.
+// UTF-8.
+//
+// The last three parameters may be NULL.
+// |new_parsed| will be set to the parsing parameters of the resultant URL.
 // |prefix_end| will be the length before the hostname of the resultant URL.
-// |new_parsed| and |prefix_end| may be NULL.
+// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
+// will be modified to reflect changes this function makes to the output string;
+// for example, if |url| is "http://a:b@c.com/", |omit_username_password| is
+// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
+// the output string will be "http://c.com/" and |offset_for_adjustment| will be
+// 8.  If the offset cannot be successfully adjusted (e.g. because it points
+// into the middle of a component that was entirely removed, past the end of the
+// string, or into the middle of an encoding sequence), it will be set to
+// std::wstring::npos.
 std::wstring FormatUrl(const GURL& url,
                        const std::wstring& languages,
                        bool omit_username_password,
                        UnescapeRule::Type unescape_rules,
                        url_parse::Parsed* new_parsed,
-                       size_t* prefix_end);
+                       size_t* prefix_end,
+                       size_t* offset_for_adjustment);
 
 // Creates a string representation of |url| for display to the user.
 // This is a shorthand of the above function with omit_username_password=true,
 // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL.
 inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
-  return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL);
+  return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL,
+                   NULL);
 }
 
 // Strip the portions of |url| that aren't core to the network request.
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 07ec17c..308ef80 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -345,6 +345,11 @@ const IDNTestCase idn_cases[] = {
 #endif
 };
 
+struct AdjustOffsetCase {
+  size_t input_offset;
+  size_t output_offset;
+};
+
 struct CompliantHostCase {
   const char* host;
   bool expected_output;
@@ -782,14 +787,10 @@ TEST(NetUtilTest, IDNToUnicodeFast) {
       // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
       if (j == 3 || j == 17 || j == 18)
         continue;
-      std::wstring output;
-      net::IDNToUnicode(idn_cases[i].input,
-                        static_cast<int>(strlen(idn_cases[i].input)),
-                        kLanguages[j],
-                        &output);
+      std::wstring output(net::IDNToUnicode(idn_cases[i].input,
+          strlen(idn_cases[i].input), kLanguages[j], NULL));
       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
-                            idn_cases[i].unicode_output :
-                            ASCIIToWide(idn_cases[i].input));
+          idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
       EXPECT_EQ(expected, output);
     }
@@ -802,20 +803,43 @@ TEST(NetUtilTest, IDNToUnicodeSlow) {
       // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
       if (!(j == 3 || j == 17 || j == 18))
         continue;
-      std::wstring output;
-      net::IDNToUnicode(idn_cases[i].input,
-                        static_cast<int>(strlen(idn_cases[i].input)),
-                        kLanguages[j],
-                        &output);
+      std::wstring output(net::IDNToUnicode(idn_cases[i].input,
+          strlen(idn_cases[i].input), kLanguages[j], NULL));
       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
-                            idn_cases[i].unicode_output :
-                            ASCIIToWide(idn_cases[i].input));
+          idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
       EXPECT_EQ(expected, output);
     }
   }
 }
 
+TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
+  const AdjustOffsetCase adjust_cases[] = {
+    {0, 0},
+    {2, 2},
+    {4, 4},
+    {5, 5},
+    {6, std::wstring::npos},
+    {16, std::wstring::npos},
+    {17, 7},
+    {18, 8},
+    {19, std::wstring::npos},
+    {25, std::wstring::npos},
+    {34, 12},
+    {35, 13},
+    {38, 16},
+    {39, std::wstring::npos},
+    {std::wstring::npos, std::wstring::npos},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
+    size_t offset = adjust_cases[i].input_offset;
+    // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
+    net::IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
+                      &offset);
+    EXPECT_EQ(adjust_cases[i].output_offset, offset);
+  }
+}
+
 TEST(NetUtilTest, CompliantHost) {
   const CompliantHostCase compliant_host_cases[] = {
     {"", false},
@@ -1328,7 +1352,7 @@ TEST(NetUtilTest, FormatUrl) {
     size_t prefix_len;
     std::wstring formatted = net::FormatUrl(
         GURL(tests[i].input), tests[i].languages, tests[i].omit,
-        tests[i].escape_rules, NULL, &prefix_len);
+        tests[i].escape_rules, NULL, &prefix_len, NULL);
     EXPECT_EQ(tests[i].output, formatted) << tests[i].description;
     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
   }
@@ -1340,7 +1364,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   std::wstring formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", false, UnescapeRule::NONE, &parsed, NULL);
+      L"ja", false, UnescapeRule::NONE, &parsed, NULL, NULL);
   EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted);
   EXPECT_EQ(L"%E3%82%B0",
@@ -1360,7 +1384,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", false, UnescapeRule::NORMAL, &parsed, NULL);
+      L"ja", false, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/\x30B0/?q=\x30B0#\x30B0", formatted);
   EXPECT_EQ(L"\x30B0",
@@ -1379,7 +1403,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   formatted = net::FormatUrl(
       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
-      L"ja", true, UnescapeRule::NORMAL, &parsed, NULL);
+      L"ja", true, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
       L"/\x30B0/?q=\x30B0#\x30B0", formatted);
   EXPECT_FALSE(parsed.username.is_valid());
@@ -1395,7 +1419,7 @@ TEST(NetUtilTest, FormatUrlParsed) {
   // View-source case.
   formatted = net::FormatUrl(
       GURL("view-source:http://user:passwd@host:81/path?query#ref"),
-      L"", true, UnescapeRule::NORMAL, &parsed, NULL);
+      L"", true, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   EXPECT_EQ(L"view-source:http://host:81/path?query#ref", formatted);
   EXPECT_EQ(L"view-source:http",
       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
@@ -1408,6 +1432,124 @@ TEST(NetUtilTest, FormatUrlParsed) {
   EXPECT_EQ(L"ref", formatted.substr(parsed.ref.begin, parsed.ref.len));
 }
 
+TEST(NetUtilTest, FormatUrlAdjustOffset) {
+  const AdjustOffsetCase basic_cases[] = {
+    {0, 0},
+    {3, 3},
+    {5, 5},
+    {6, 6},
+    {13, 13},
+    {21, 21},
+    {22, 22},
+    {23, 23},
+    {25, 25},
+    {26, std::wstring::npos},
+    {500000, std::wstring::npos},
+    {std::wstring::npos, std::wstring::npos},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
+    size_t offset = basic_cases[i].input_offset;
+    net::FormatUrl(GURL("http://www.google.com/foo/"), L"en", true,
+                   UnescapeRule::NORMAL, NULL, NULL, &offset);
+    EXPECT_EQ(basic_cases[i].output_offset, offset);
+  }
+
+  const struct {
+    const char* input_url;
+    size_t input_offset;
+    size_t output_offset;
+  } omit_auth_cases[] = {
+    {"http://foo:bar@www.google.com/", 6, 6},
+    {"http://foo:bar@www.google.com/", 7, 7},
+    {"http://foo:bar@www.google.com/", 8, std::wstring::npos},
+    {"http://foo:bar@www.google.com/", 10, std::wstring::npos},
+    {"http://foo:bar@www.google.com/", 11, std::wstring::npos},
+    {"http://foo:bar@www.google.com/", 14, std::wstring::npos},
+    {"http://foo:bar@www.google.com/", 15, 7},
+    {"http://foo:bar@www.google.com/", 25, 17},
+    {"http://foo@www.google.com/", 9, std::wstring::npos},
+    {"http://foo@www.google.com/", 11, 7},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
+    size_t offset = omit_auth_cases[i].input_offset;
+    net::FormatUrl(GURL(omit_auth_cases[i].input_url), L"en", true,
+                   UnescapeRule::NORMAL, NULL, NULL, &offset);
+    EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
+  }
+
+  const AdjustOffsetCase view_source_cases[] = {
+    {0, 0},
+    {3, 3},
+    {11, 11},
+    {12, 12},
+    {13, 13},
+    {19, 19},
+    {20, std::wstring::npos},
+    {23, 19},
+    {26, 22},
+    {std::wstring::npos, std::wstring::npos},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
+    size_t offset = view_source_cases[i].input_offset;
+    net::FormatUrl(GURL("view-source:http://foo@www.google.com/"), L"en", true,
+                   UnescapeRule::NORMAL, NULL, NULL, &offset);
+    EXPECT_EQ(view_source_cases[i].output_offset, offset);
+  }
+
+  const AdjustOffsetCase idn_hostname_cases[] = {
+    {8, std::wstring::npos},
+    {16, std::wstring::npos},
+    {24, std::wstring::npos},
+    {25, 12},
+    {30, 17},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
+    size_t offset = idn_hostname_cases[i].input_offset;
+    // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
+    net::FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), L"ja", true,
+                   UnescapeRule::NORMAL, NULL, NULL, &offset);
+    EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
+  }
+
+  const AdjustOffsetCase unescape_cases[] = {
+    {25, 25},
+    {26, std::wstring::npos},
+    {27, std::wstring::npos},
+    {28, 26},
+    {35, std::wstring::npos},
+    {41, 31},
+    {59, 33},
+    {60, std::wstring::npos},
+    {67, std::wstring::npos},
+    {68, std::wstring::npos},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
+    size_t offset = unescape_cases[i].input_offset;
+    // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
+    net::FormatUrl(GURL(
+        "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
+        L"en", true, UnescapeRule::SPACES, NULL, NULL, &offset);
+    EXPECT_EQ(unescape_cases[i].output_offset, offset);
+  }
+
+  const AdjustOffsetCase ref_cases[] = {
+    {30, 30},
+    {31, 31},
+    {32, std::wstring::npos},
+    {34, 32},
+    {37, 33},
+    {38, std::wstring::npos},
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
+    size_t offset = ref_cases[i].input_offset;
+    // "http://www.google.com/foo.html#\x30B0\x30B0z"
+    net::FormatUrl(GURL(
+        "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), L"en",
+        true, UnescapeRule::NORMAL, NULL, NULL, &offset);
+    EXPECT_EQ(ref_cases[i].output_offset, offset);
+  }
+}
+
 TEST(NetUtilTest, SimplifyUrlForRequest) {
   struct {
     const char* input_url;
@@ -1466,4 +1608,3 @@ TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
     EXPECT_EQ(i, net::explicitly_allowed_ports.size());
   }
 }
-