Copy the relevant parts of ICU to a new file base/third_party/icu/icu_utf.*

so we can do basic UTF8/16/32 conversions without linking all of ICU. Change callers who used to call SysUTF8ToWide/SysWideToUTF8 in base to using these new functions. I will remove the Sys versions of these functions in a later patch. TEST=none BUG=none Review URL: http://codereview.chromium.org/243102 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28219 0039d316-1c4b-4281-b951-d872f2087c98
author: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-07 02:10:20 +0000
committer: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-07 02:10:20 +0000
commit: 047a03f4cefa75a67070f08b3f6b727f7ea702d5 (patch)
tree: d00ccbd9e59106de8fd904b06720be59219d61fe
parent: 0511c153260e5d402d7552ff7b47a2acb17bdf2b (diff)
download: chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.zip
chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.tar.gz
chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.tar.bz2
26 files changed, 1144 insertions, 435 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 0ad3f16..c1d7a8b 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -34,6 +34,8 @@
         'third_party/dmg_fp/dmg_fp.h',
         'third_party/dmg_fp/dtoa.cc',
         'third_party/dmg_fp/g_fmt.cc',
+        'third_party/icu/icu_utf.cc',
+        'third_party/icu/icu_utf.h',
         'third_party/nspr/prcpucfg.h',
         'third_party/nspr/prcpucfg_win.h',
         'third_party/nspr/prtime.cc',
@@ -138,8 +140,8 @@
         'hmac_mac.cc',
         'hmac_nss.cc',
         'hmac_win.cc',
-        'i18n/string_conversions.cc',
-        'i18n/string_conversions.h',
+        'i18n/icu_string_conversions.cc',
+        'i18n/icu_string_conversions.h',
         'iat_patch.cc',
         'iat_patch.h',
         'icu_util.cc',
@@ -151,8 +153,8 @@
         'json_reader.h',
         'json_writer.cc',
         'json_writer.h',
-	'keyboard_code_conversion_gtk.cc',
-	'keyboard_code_conversion_gtk.h',
+        'keyboard_code_conversion_gtk.cc',
+        'keyboard_code_conversion_gtk.h',
         'keyboard_codes.h',
         'keyboard_codes_win.h',
         'keyboard_codes_posix.h',
@@ -323,6 +325,8 @@
         'tracked_objects.cc',
         'tracked_objects.h',
         'tuple.h',
+        'utf_string_conversions.cc',
+        'utf_string_conversions.h',
         'unix_domain_socket_posix.cc',
         'values.cc',
         'values.h',
diff --git a/base/file_util.cc b/base/file_util.cc
index 1ee7abc..d3a989b5 100644
--- a/base/file_util.cc
+++ b/base/file_util.cc
@@ -13,10 +13,9 @@
 
 #include "base/file_path.h"
 #include "base/logging.h"
-#include "base/string_util.h"
-
 #include "base/string_piece.h"
-#include "base/sys_string_conversions.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 
 namespace {
 
diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc
index 7274d76..27adbfa 100644
--- a/base/file_util_posix.cc
+++ b/base/file_util_posix.cc
@@ -34,6 +34,7 @@
 #include "base/string_util.h"
 #include "base/sys_string_conversions.h"
 #include "base/time.h"
+#include "base/utf_string_conversions.h"
 #include "unicode/coll.h"
 
 
diff --git a/base/file_util_unittest.cc b/base/file_util_unittest.cc
index 57190c5..5b606c9 100644
--- a/base/file_util_unittest.cc
+++ b/base/file_util_unittest.cc
@@ -20,8 +20,8 @@
 #include "base/logging.h"
 #include "base/path_service.h"
 #include "base/platform_thread.h"
-#include "base/string_util.h"
 #include "base/time.h"
+#include "base/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "testing/platform_test.h"
 
diff --git a/base/file_version_info_mac.mm b/base/file_version_info_mac.mm
index ae6603f..f177bca 100644
--- a/base/file_version_info_mac.mm
+++ b/base/file_version_info_mac.mm
@@ -9,6 +9,7 @@
 #include "base/file_path.h"
 #include "base/logging.h"
 #include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 
 FileVersionInfo::FileVersionInfo(NSBundle *bundle) : bundle_(bundle) {
   [bundle_ retain];
@@ -43,15 +44,15 @@ FileVersionInfo* FileVersionInfo::CreateFileVersionInfo(
 }
 
 std::wstring FileVersionInfo::company_name() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::company_short_name() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::internal_name() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::product_name() {
@@ -63,7 +64,7 @@ std::wstring FileVersionInfo::product_short_name() {
 }
 
 std::wstring FileVersionInfo::comments() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::legal_copyright() {
@@ -75,22 +76,22 @@ std::wstring FileVersionInfo::product_version() {
 }
 
 std::wstring FileVersionInfo::file_description() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::legal_trademarks() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::private_build() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::file_version() {
   // CFBundleVersion has limitations that may not be honored by a
   // proper Chromium version number, so try KSVersion first.
   std::wstring version = GetStringValue(L"KSVersion");
-  if (version == L"")
+  if (version.empty())
     version = GetStringValue(L"CFBundleVersion");
   return version;
 }
@@ -100,7 +101,7 @@ std::wstring FileVersionInfo::original_filename() {
 }
 
 std::wstring FileVersionInfo::special_build() {
-  return L"";
+  return std::wstring();
 }
 
 std::wstring FileVersionInfo::last_change() {
@@ -132,5 +133,5 @@ std::wstring FileVersionInfo::GetStringValue(const wchar_t* name) {
   std::wstring str;
   if (GetValue(name, &str))
     return str;
-  return L"";
+  return std::wstring();
 }
diff --git a/base/i18n/string_conversions.cc b/base/i18n/icu_string_conversions.cc
index 35c9d6d..225fe0b 100644
--- a/base/i18n/string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "base/i18n/string_conversions.h"
+#include "base/i18n/icu_string_conversions.h"
 
 #include <vector>
 
@@ -82,166 +82,6 @@ void ToUnicodeCallbackSubstitute(const void* context,
   // else ignore the reset, close and clone calls.
 }
 
-// ReadUnicodeCharacter --------------------------------------------------------
-
-// Reads a UTF-8 stream, placing the next code point into the given output
-// |*code_point|. |src| represents the entire string to read, and |*char_index|
-// is the character offset within the string to start reading at. |*char_index|
-// will be updated to index the last character read, such that incrementing it
-// (as in a for loop) will take the reader to the next character.
-//
-// Returns true on success. On false, |*code_point| will be invalid.
-bool ReadUnicodeCharacter(const char* src, int32 src_len,
-                          int32* char_index, uint32* code_point_out) {
-  // U8_NEXT expects to be able to use -1 to signal an error, so we must
-  // use a signed type for code_point.  But this function returns false
-  // on error anyway, so code_point_out is unsigned.
-  int32 code_point;
-  U8_NEXT(src, *char_index, src_len, code_point);
-  *code_point_out = static_cast<uint32>(code_point);
-
-  // The ICU macro above moves to the next char, we want to point to the last
-  // char consumed.
-  (*char_index)--;
-
-  // Validate the decoded value.
-  return IsValidCodepoint(code_point);
-}
-
-// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
-bool ReadUnicodeCharacter(const char16* src, int32 src_len,
-                          int32* char_index, uint32* code_point) {
-  if (U16_IS_SURROGATE(src[*char_index])) {
-    if (!U16_IS_SURROGATE_LEAD(src[*char_index]) ||
-        *char_index + 1 >= src_len ||
-        !U16_IS_TRAIL(src[*char_index + 1])) {
-      // Invalid surrogate pair.
-      return false;
-    }
-
-    // Valid surrogate pair.
-    *code_point = U16_GET_SUPPLEMENTARY(src[*char_index],
-                                        src[*char_index + 1]);
-    (*char_index)++;
-  } else {
-    // Not a surrogate, just one 16-bit word.
-    *code_point = src[*char_index];
-  }
-
-  return IsValidCodepoint(*code_point);
-}
-
-#if defined(WCHAR_T_IS_UTF32)
-// Reads UTF-32 character. The usage is the same as the 8-bit version above.
-bool ReadUnicodeCharacter(const wchar_t* src, int32 src_len,
-                          int32* char_index, uint32* code_point) {
-  // Conversion is easy since the source is 32-bit.
-  *code_point = src[*char_index];
-
-  // Validate the value.
-  return IsValidCodepoint(*code_point);
-}
-#endif  // defined(WCHAR_T_IS_UTF32)
-
-// WriteUnicodeCharacter -------------------------------------------------------
-
-// Appends a UTF-8 character to the given 8-bit string.
-void WriteUnicodeCharacter(uint32 code_point, std::string* output) {
-  if (code_point <= 0x7f) {
-    // Fast path the common case of one byte.
-    output->push_back(code_point);
-    return;
-  }
-
-  // U8_APPEND_UNSAFE can append up to 4 bytes.
-  int32 char_offset = static_cast<int32>(output->length());
-  output->resize(char_offset + U8_MAX_LENGTH);
-
-  U8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
-
-  // U8_APPEND_UNSAFE will advance our pointer past the inserted character, so
-  // it will represent the new length of the string.
-  output->resize(char_offset);
-}
-
-// Appends the given code point as a UTF-16 character to the STL string.
-void WriteUnicodeCharacter(uint32 code_point, string16* output) {
-  if (U16_LENGTH(code_point) == 1) {
-    // Thie code point is in the Basic Multilingual Plane (BMP).
-    output->push_back(static_cast<char16>(code_point));
-  } else {
-    // Non-BMP characters use a double-character encoding.
-    int32 char_offset = static_cast<int32>(output->length());
-    output->resize(char_offset + U16_MAX_LENGTH);
-    U16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
-  }
-}
-
-#if defined(WCHAR_T_IS_UTF32)
-// Appends the given UTF-32 character to the given 32-bit string.
-inline void WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
-  // This is the easy case, just append the character.
-  output->push_back(code_point);
-}
-#endif  // defined(WCHAR_T_IS_UTF32)
-
-// Generalized Unicode converter -----------------------------------------------
-
-// Converts the given source Unicode character type to the given destination
-// Unicode character type as a STL string. The given input buffer and size
-// determine the source, and the given output STL string will be replaced by
-// the result.
-template<typename SRC_CHAR, typename DEST_STRING>
-bool ConvertUnicode(const SRC_CHAR* src, size_t src_len, DEST_STRING* output) {
-  output->clear();
-
-  // ICU requires 32-bit numbers.
-  bool success = true;
-  int32 src_len32 = static_cast<int32>(src_len);
-  for (int32 i = 0; i < src_len32; i++) {
-    uint32 code_point;
-    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
-      WriteUnicodeCharacter(code_point, output);
-    } else {
-      // TODO(jungshik): consider adding 'Replacement character' (U+FFFD)
-      // in place of an invalid codepoint.
-      success = false;
-    }
-  }
-  return success;
-}
-
-
-// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
-// of space in the given string. We also assume that the input character types
-// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
-// the string length is greater than zero.
-template<typename CHAR>
-void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
-  if (src[0] < 0x80) {
-    // Assume that the entire input will be ASCII.
-    output->reserve(src_len);
-  } else {
-    // Assume that the entire input is non-ASCII and will have 3 bytes per char.
-    output->reserve(src_len * 3);
-  }
-}
-
-// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
-// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
-// We assume the source length is > 0.
-template<typename STRING>
-void ReserveUTF16Or32Output(const char* src, size_t src_len, STRING* output) {
-  if (static_cast<unsigned char>(src[0]) < 0x80) {
-    // Assume the input is all ASCII, which means 1:1 correspondence.
-    output->reserve(src_len);
-  } else {
-    // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
-    // character.
-    output->reserve(src_len / 2);
-  }
-}
-
 bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src,
                       int uchar_len, OnStringUtilConversionError::Type on_error,
                       std::string* encoded) {
@@ -308,184 +148,6 @@ inline UConverterType utf32_platform_endian() {
 
 }  // namespace
 
-// UTF-8 <-> Wide --------------------------------------------------------------
-
-std::string WideToUTF8(const std::wstring& wide) {
-  std::string ret;
-  if (wide.empty())
-    return ret;
-
-  // Ignore the success flag of this call, it will do the best it can for
-  // invalid input, which is what we want here.
-  WideToUTF8(wide.data(), wide.length(), &ret);
-  return ret;
-}
-
-bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF8Output(src, src_len, output);
-  return ConvertUnicode<wchar_t, std::string>(src, src_len, output);
-}
-
-std::wstring UTF8ToWide(const base::StringPiece& utf8) {
-  std::wstring ret;
-  if (utf8.empty())
-    return ret;
-
-  UTF8ToWide(utf8.data(), utf8.length(), &ret);
-  return ret;
-}
-
-bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode<char, std::wstring>(src, src_len, output);
-}
-
-// UTF-16 <-> Wide -------------------------------------------------------------
-
-#if defined(WCHAR_T_IS_UTF16)
-
-// When wide == UTF-16, then conversions are a NOP.
-string16 WideToUTF16(const std::wstring& wide) {
-  return wide;
-}
-
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
-  output->assign(src, src_len);
-  return true;
-}
-
-std::wstring UTF16ToWide(const string16& utf16) {
-  return utf16;
-}
-
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
-  output->assign(src, src_len);
-  return true;
-}
-
-#elif defined(WCHAR_T_IS_UTF32)
-
-string16 WideToUTF16(const std::wstring& wide) {
-  string16 ret;
-  if (wide.empty())
-    return ret;
-
-  WideToUTF16(wide.data(), wide.length(), &ret);
-  return ret;
-}
-
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  // Assume that normally we won't have any non-BMP characters so the counts
-  // will be the same.
-  output->reserve(src_len);
-  return ConvertUnicode<wchar_t, string16>(src, src_len, output);
-}
-
-std::wstring UTF16ToWide(const string16& utf16) {
-  std::wstring ret;
-  if (utf16.empty())
-    return ret;
-
-  UTF16ToWide(utf16.data(), utf16.length(), &ret);
-  return ret;
-}
-
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  // Assume that normally we won't have any non-BMP characters so the counts
-  // will be the same.
-  output->reserve(src_len);
-  return ConvertUnicode<char16, std::wstring>(src, src_len, output);
-}
-
-#endif  // defined(WCHAR_T_IS_UTF32)
-
-// UTF16 <-> UTF8 --------------------------------------------------------------
-
-#if defined(WCHAR_T_IS_UTF32)
-
-bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF16Or32Output(src, src_len, output);
-  return ConvertUnicode<char, string16>(src, src_len, output);
-}
-
-string16 UTF8ToUTF16(const std::string& utf8) {
-  string16 ret;
-  if (utf8.empty())
-    return ret;
-
-  // Ignore the success flag of this call, it will do the best it can for
-  // invalid input, which is what we want here.
-  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
-  return ret;
-}
-
-bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
-  if (src_len == 0) {
-    output->clear();
-    return true;
-  }
-
-  ReserveUTF8Output(src, src_len, output);
-  return ConvertUnicode<char16, std::string>(src, src_len, output);
-}
-
-std::string UTF16ToUTF8(const string16& utf16) {
-  std::string ret;
-  if (utf16.empty())
-    return ret;
-
-  // Ignore the success flag of this call, it will do the best it can for
-  // invalid input, which is what we want here.
-  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
-  return ret;
-}
-
-#elif defined(WCHAR_T_IS_UTF16)
-// Easy case since we can use the "wide" versions we already wrote above.
-
-bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
-  return UTF8ToWide(src, src_len, output);
-}
-
-string16 UTF8ToUTF16(const std::string& utf8) {
-  return UTF8ToWide(utf8);
-}
-
-bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
-  return WideToUTF8(src, src_len, output);
-}
-
-std::string UTF16ToUTF8(const string16& utf16) {
-  return WideToUTF8(utf16);
-}
-
-#endif
-
 // Codepage <-> Wide/UTF-16  ---------------------------------------------------
 
 // Convert a wstring into the specified codepage_name.  If the codepage
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
new file mode 100644
index 0000000..d849c71
--- /dev/null
+++ b/base/i18n/icu_string_conversions.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_ICU_STRING_CONVERSIONS_H_
+#define BASE_I18N_ICU_STRING_CONVERSIONS_H_
+
+#include <string>
+
+#include "base/string16.h"
+#include "base/string_piece.h"
+
+// Defines the error handling modes of UTF16ToCodepage, CodepageToUTF16,
+// WideToCodepage and CodepageToWide.
+class OnStringUtilConversionError {
+ public:
+  enum Type {
+    // The function will return failure. The output buffer will be empty.
+    FAIL,
+
+    // The offending characters are skipped and the conversion will proceed as
+    // if they did not exist.
+    SKIP,
+
+    // When converting to Unicode, the offending byte sequences are substituted
+    // by Unicode replacement character (U+FFFD). When converting from Unicode,
+    // this is the same as SKIP.
+    SUBSTITUTE,
+  };
+
+ private:
+  OnStringUtilConversionError();
+};
+
+// Converts between UTF-16 strings and the encoding specified.  If the
+// encoding doesn't exist or the encoding fails (when on_error is FAIL),
+// returns false.
+bool UTF16ToCodepage(const string16& utf16,
+                     const char* codepage_name,
+                     OnStringUtilConversionError::Type on_error,
+                     std::string* encoded);
+
+bool CodepageToUTF16(const std::string& encoded,
+                     const char* codepage_name,
+                     OnStringUtilConversionError::Type on_error,
+                     string16* utf16);
+
+// Converts between wide strings and the encoding specified.  If the
+// encoding doesn't exist or the encoding fails (when on_error is FAIL),
+// returns false.
+bool WideToCodepage(const std::wstring& wide,
+                    const char* codepage_name,
+                    OnStringUtilConversionError::Type on_error,
+                    std::string* encoded);
+bool CodepageToWide(const std::string& encoded,
+                    const char* codepage_name,
+                    OnStringUtilConversionError::Type on_error,
+                    std::wstring* wide);
+
+#endif  // BASE_I18N_ICU_STRING_CONVERSIONS_H_
diff --git a/base/json_reader.cc b/base/json_reader.cc
index 2c3ab0b..ca33cb2 100644
--- a/base/json_reader.cc
+++ b/base/json_reader.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -8,6 +8,7 @@
 #include "base/logging.h"
 #include "base/scoped_ptr.h"
 #include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 #include "base/values.h"
 
 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,
@@ -118,8 +119,8 @@ std::string JSONReader::FormatErrorMessage(int line, int column,
 }
 
 JSONReader::JSONReader()
-  : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
-    allow_trailing_comma_(false) {}
+    : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),
+      allow_trailing_comma_(false) {}
 
 Value* JSONReader::JsonToValue(const std::string& json, bool check_root,
                                bool allow_trailing_comma) {
diff --git a/base/json_writer.cc b/base/json_writer.cc
index 1a9f1b6..25df120 100644
--- a/base/json_writer.cc
+++ b/base/json_writer.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -8,6 +8,7 @@
 #include "base/string_util.h"
 #include "base/values.h"
 #include "base/string_escape.h"
+#include "base/utf_string_conversions.h"
 
 #if defined(OS_WIN)
 static const char kPrettyPrintLineEnding[] = "\r\n";
diff --git a/base/logging.cc b/base/logging.cc
index d35cfdb..7981310 100644
--- a/base/logging.cc
+++ b/base/logging.cc
@@ -39,7 +39,7 @@ typedef pthread_mutex_t* MutexHandle;
 #include "base/lock_impl.h"
 #include "base/string_piece.h"
 #include "base/string_util.h"
-#include "base/sys_string_conversions.h"
+#include "base/utf_string_conversions.h"
 
 namespace logging {
 
@@ -328,7 +328,7 @@ void DisplayDebugMessage(const std::string& str) {
     backslash[1] = 0;
   wcscat_s(prog_name, MAX_PATH, L"debug_message.exe");
 
-  std::wstring cmdline = base::SysUTF8ToWide(str);
+  std::wstring cmdline = UTF8ToWide(str);
   if (cmdline.empty())
     return;
 
@@ -578,5 +578,5 @@ void CloseLogFile() {
 }  // namespace logging
 
 std::ostream& operator<<(std::ostream& out, const wchar_t* wstr) {
-  return out << base::SysWideToUTF8(std::wstring(wstr));
+  return out << WideToUTF8(std::wstring(wstr));
 }
diff --git a/base/platform_file_posix.cc b/base/platform_file_posix.cc
index f964c62..623223c 100644
--- a/base/platform_file_posix.cc
+++ b/base/platform_file_posix.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -9,7 +9,7 @@
 #include <sys/stat.h>
 
 #include "base/logging.h"
-#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 
 namespace base {
 
diff --git a/base/process_util_mac.mm b/base/process_util_mac.mm
index 183fe31..61029c0 100644
--- a/base/process_util_mac.mm
+++ b/base/process_util_mac.mm
@@ -19,6 +19,7 @@
 #include "base/eintr_wrapper.h"
 #include "base/logging.h"
 #include "base/string_util.h"
+#include "base/sys_string_conversions.h"
 #include "base/time.h"
 
 namespace base {
@@ -42,9 +43,9 @@ void RestoreDefaultExceptionHandler() {
 
 NamedProcessIterator::NamedProcessIterator(const std::wstring& executable_name,
                                            const ProcessFilter* filter)
-  : executable_name_(executable_name),
-    index_of_kinfo_proc_(0),
-    filter_(filter) {
+    : executable_name_(executable_name),
+      index_of_kinfo_proc_(0),
+      filter_(filter) {
   // Get a snapshot of all of my processes (yes, as we loop it can go stale, but
   // but trying to find where we were in a constantly changing list is basically
   // impossible.
@@ -111,7 +112,7 @@ const ProcessEntry* NamedProcessIterator::NextProcessEntry() {
 }
 
 bool NamedProcessIterator::CheckForNextProcess() {
-  std::string executable_name_utf8(WideToUTF8(executable_name_));
+  std::string executable_name_utf8(base::SysWideToUTF8(executable_name_));
 
   std::string data;
   std::string exec_name;
diff --git a/base/stats_table.cc b/base/stats_table.cc
index c175551..522db5a 100644
--- a/base/stats_table.cc
+++ b/base/stats_table.cc
@@ -11,8 +11,8 @@
 #include "base/shared_memory.h"
 #include "base/string_piece.h"
 #include "base/string_util.h"
-#include "base/sys_string_conversions.h"
 #include "base/thread_local_storage.h"
+#include "base/utf_string_conversions.h"
 
 #if defined(OS_POSIX)
 #include "errno.h"
@@ -170,8 +170,7 @@ StatsTablePrivate* StatsTablePrivate::New(const std::string& name,
                                           int max_threads,
                                           int max_counters) {
   scoped_ptr<StatsTablePrivate> priv(new StatsTablePrivate());
-  if (!priv->shared_memory_.Create(base::SysUTF8ToWide(name), false, true,
-                                   size))
+  if (!priv->shared_memory_.Create(UTF8ToWide(name), false, true, size))
     return NULL;
   if (!priv->shared_memory_.Map(size))
     return NULL;
diff --git a/base/string16.cc b/base/string16.cc
index ca45fba..d1d0908 100644
--- a/base/string16.cc
+++ b/base/string16.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -14,6 +14,7 @@
 #elif defined(WCHAR_T_IS_UTF32)
 
 #include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 
 namespace base {
 
diff --git a/base/string16_unittest.cc b/base/string16_unittest.cc
new file mode 100644
index 0000000..69eed4b
--- /dev/null
+++ b/base/string16_unittest.cc
@@ -0,0 +1,52 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <sstream>
+
+#include "base/string16.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+#if defined(WCHAR_T_IS_UTF32)
+
+// We define a custom operator<< for string16 so we can use it with logging.
+// This tests that conversion.
+TEST(String16Test, OutputStream) {
+  // Basic stream test.
+  {
+    std::ostringstream stream;
+    stream << "Empty '" << string16() << "' standard '"
+           << string16(ASCIIToUTF16("Hello, world")) << "'";
+    EXPECT_STREQ("Empty '' standard 'Hello, world'",
+                 stream.str().c_str());
+  }
+
+  // Interesting edge cases.
+  {
+    // These should each get converted to the invalid character: EF BF BD.
+    string16 initial_surrogate;
+    initial_surrogate.push_back(0xd800);
+    string16 final_surrogate;
+    final_surrogate.push_back(0xdc00);
+
+    // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
+    string16 surrogate_pair;
+    surrogate_pair.push_back(0xd800);
+    surrogate_pair.push_back(0xdf00);
+    surrogate_pair.push_back('z');
+
+    // Will get converted to the invalid char + 's': EF BF BD 's'.
+    string16 unterminated_surrogate;
+    unterminated_surrogate.push_back(0xd800);
+    unterminated_surrogate.push_back('s');
+
+    std::ostringstream stream;
+    stream << initial_surrogate << "," << final_surrogate << ","
+           << surrogate_pair << ",", unterminated_surrogate;
+
+    EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
+                 stream.str().c_str());
+  }
+}
+
+#endif
diff --git a/base/string_util.h b/base/string_util.h
index 52c2a84..254e18f 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -18,7 +18,8 @@
 
 // TODO(brettw) this dependency should be removed and callers that need
 // these functions should include this file directly.
-#include "base/i18n/string_conversions.h"
+#include "base/utf_string_conversions.h"
+#include "base/i18n/icu_string_conversions.h"
 
 // Safe standard library wrappers for all platforms.
 
diff --git a/base/sys_info_posix.cc b/base/sys_info_posix.cc
index 06f7526..74a10ac 100644
--- a/base/sys_info_posix.cc
+++ b/base/sys_info_posix.cc
@@ -2,9 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "base/file_util.h"
 #include "base/sys_info.h"
-#include "base/basictypes.h"
 
 #include <errno.h>
 #include <string.h>
@@ -23,8 +21,10 @@
 #include <sys/sysctl.h>
 #endif
 
+#include "base/basictypes.h"
+#include "base/file_util.h"
 #include "base/logging.h"
-#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 
 namespace base {
 
@@ -105,7 +105,7 @@ std::wstring SysInfo::GetEnvVar(const wchar_t* var) {
   std::string var_utf8 = WideToUTF8(std::wstring(var));
   char* value = getenv(var_utf8.c_str());
   if (!value) {
-    return L"";
+    return std::wstring();
   } else {
     return UTF8ToWide(value);
   }
diff --git a/base/system_monitor_unittest.cc b/base/system_monitor_unittest.cc
index ff39d87..7ba3a6b 100644
--- a/base/system_monitor_unittest.cc
+++ b/base/system_monitor_unittest.cc
@@ -8,10 +8,10 @@
 class PowerTest : public base::SystemMonitor::PowerObserver {
  public:
   PowerTest()
-    : battery_(false),
-      power_state_changes_(0),
-      suspends_(0),
-      resumes_(0) {};
+      : battery_(false),
+        power_state_changes_(0),
+        suspends_(0),
+        resumes_(0) {};
 
   // PowerObserver callbacks.
   void OnPowerStateChange(bool on_battery_power) {
diff --git a/base/third_party/icu/README b/base/third_party/icu/README
new file mode 100644
index 0000000..faeb5ef
--- /dev/null
+++ b/base/third_party/icu/README
@@ -0,0 +1,8 @@
+This file has the relevant components from ICU copied to handle basic
+UTF8/16/32 conversions. Components are copied from utf.h utf8.h utf16.h and
+utf_impl.c
+
+The main change is that U_/U8_/U16_ prefixes have been replaced with
+CBU_/CBU8_/CBU16_ (for "Chrome Base") to avoid confusion with the "real" ICU
+macros should ICU be in use on the system. For the same reason, the functions
+and types have been put in the "base_icu" namespace.
diff --git a/base/third_party/icu/icu_utf.cc b/base/third_party/icu/icu_utf.cc
new file mode 100644
index 0000000..b47c8ac
--- /dev/null
+++ b/base/third_party/icu/icu_utf.cc
@@ -0,0 +1,228 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  utf_impl.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file provides implementation functions for macros in the utfXX.h
+*   that would otherwise be too long as macros.
+*/
+
+#include "base/third_party/icu/icu_utf.h"
+
+namespace base_icu {
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF8_ERROR_VALUE_2 0x9f
+
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with e>
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF_ERROR_VALUE 0xffff
+
+/*
+ * This table could be replaced on many machines by
+ * a few lines of assembler code using an
+ * "index of first 0-bit from msb" instruction and
+ * one or two more integer instructions.
+ *
+ * For example, on an i386, do something like
+ * - MOV AL, leadByte
+ * - NOT AL         (8-bit, leave b15..b8==0..0, reverse only b7..b0)
+ * - MOV AH, 0
+ * - BSR BX, AX     (16-bit)
+ * - MOV AX, 6      (result)
+ * - JZ finish      (ZF==1 if leadByte==0xff)
+ * - SUB AX, BX (result)
+ * -finish:
+ * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
+ *
+ * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal;
+ * lead bytes above 0xf4 are illegal.
+ * We keep them in this table for skipping long ISO 10646-UTF-8 sequences.
+ */
+const uint8
+utf8_countTrailBytes[256]={
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3,
+    3, 3, 3,    /* illegal in Unicode */
+    4, 4, 4, 4, /* illegal in Unicode */
+    5, 5,       /* illegal in Unicode */
+    0, 0        /* illegal bytes 0xfe and 0xff */
+};
+
+static const UChar32
+utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
+
+static const UChar32
+utf8_errorValue[6]={
+    CBUTF8_ERROR_VALUE_1, CBUTF8_ERROR_VALUE_2, CBUTF_ERROR_VALUE, 0x10ffff,
+    0x3ffffff, 0x7fffffff
+};
+
+/*
+ * Handle the non-inline part of the U8_NEXT() macro and its obsolete sibling
+ * UTF8_NEXT_CHAR_SAFE().
+ *
+ * The "strict" parameter controls the error behavior:
+ * <0  "Safe" behavior of U8_NEXT(): All illegal byte sequences yield a negative
+ *     code point result.
+ *  0  Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE):
+ *     All illegal byte sequences yield a positive code point such that this
+ *     result code point would be encoded with the same number of bytes as
+ *     the illegal sequence.
+ * >0  Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE):
+ *     Same as the obsolete "safe" behavior, but non-characters are also treated
+ *     like illegal sequences.
+ *
+ * The special negative (<0) value -2 is used for lenient treatment of surrogate
+ * code points as legal. Some implementations use this for roundtripping of
+ * Unicode 16-bit strings that are not well-formed UTF-16, that is, they
+ * contain unpaired surrogates.
+ *
+ * Note that a UBool is the same as an int8_t.
+ */
+UChar32
+utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict) {
+    int32 i=*pi;
+    uint8 count=CBU8_COUNT_TRAIL_BYTES(c);
+    if((i)+count<=(length)) {
+        uint8 trail, illegal=0;
+
+        CBU8_MASK_LEAD_BYTE((c), count);
+        /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
+        switch(count) {
+        /* each branch falls through to the next one */
+        case 5:
+        case 4:
+            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
+            illegal=1;
+            break;
+        case 3:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            if(c<0x110) {
+                illegal|=(trail&0xc0)^0x80;
+            } else {
+                /* code point>0x10ffff, outside Unicode */
+                illegal=1;
+                break;
+            }
+        case 2:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            illegal|=(trail&0xc0)^0x80;
+        case 1:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            illegal|=(trail&0xc0)^0x80;
+            break;
+        case 0:
+            if(strict>=0) {
+                return CBUTF8_ERROR_VALUE_1;
+            } else {
+                return CBU_SENTINEL;
+            }
+        /* no default branch to optimize switch()  - all values are covered */
+        }
+
+        /*
+         * All the error handling should return a value
+         * that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
+         *
+         * Starting with Unicode 3.0.1, non-shortest forms are illegal.
+         * Starting with Unicode 3.2, surrogate code points must not be
+         * encoded in UTF-8, and there are no irregular sequences any more.
+         *
+         * U8_ macros (new in ICU 2.4) return negative values for error conditions.
+         */
+
+        /* correct sequence - all trail bytes have (b7..b6)==(10)? */
+        /* illegal is also set if count>=4 */
+        if(illegal || (c)<utf8_minLegal[count] || (CBU_IS_SURROGATE(c) && strict!=-2)) {
+            /* error handling */
+            uint8 errorCount=count;
+            /* don't go beyond this sequence */
+            i=*pi;
+            while(count>0 && CBU8_IS_TRAIL(s[i])) {
+                ++(i);
+                --count;
+            }
+            if(strict>=0) {
+                c=utf8_errorValue[errorCount-count];
+            } else {
+                c=CBU_SENTINEL;
+            }
+        } else if((strict)>0 && CBU_IS_UNICODE_NONCHAR(c)) {
+            /* strict: forbid non-characters like U+fffe */
+            c=utf8_errorValue[count];
+        }
+    } else /* too few bytes left */ {
+        /* error handling */
+        int32 i0=i;
+        /* don't just set (i)=(length) in case there is an illegal sequence */
+        while((i)<(length) && CBU8_IS_TRAIL(s[i])) {
+            ++(i);
+        }
+        if(strict>=0) {
+            c=utf8_errorValue[i-i0];
+        } else {
+            c=CBU_SENTINEL;
+        }
+    }
+    *pi=i;
+    return c;
+}
+
+}  // namespace base_icu
diff --git a/base/third_party/icu/icu_utf.h b/base/third_party/icu/icu_utf.h
new file mode 100644
index 0000000..050a84b
--- /dev/null
+++ b/base/third_party/icu/icu_utf.h
@@ -0,0 +1,358 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2004, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+#ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_
+#define BASE_THIRD_PARTY_ICU_ICU_UTF_H_
+
+#include "base/basictypes.h"
+
+namespace base_icu {
+
+typedef uint32 UChar32;
+typedef int8 UBool;
+
+// General ---------------------------------------------------------------------
+// from utf.h
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with CBU_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define CBU_SENTINEL (-1)
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_UNICODE_CHAR(c) \
+    ((uint32)(c)<0xd800 || \
+        ((uint32)(c)>0xdfff && \
+         (uint32)(c)<=0x10ffff && \
+         !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+
+// UTF-8 macros ----------------------------------------------------------------
+// from utf8.h
+
+extern const uint8 utf8_countTrailBytes[256];
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @internal
+ */
+#define CBU8_COUNT_TRAIL_BYTES(leadByte) (base_icu::utf8_countTrailBytes[(uint8)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @internal
+ */
+#define CBU8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_LEAD(c) ((uint8)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define CBU8_LENGTH(c) \
+    ((uint32)(c)<=0x7f ? 1 : \
+        ((uint32)(c)<=0x7ff ? 2 : \
+            ((uint32)(c)<=0xd7ff ? 3 : \
+                ((uint32)(c)<=0xdfff || (uint32)(c)>0x10ffff ? 0 : \
+                    ((uint32)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define CBU8_MAX_LENGTH 4
+
+/**
+ * Function for handling "next code point" with error-checking.
+ * @internal
+ */
+UChar32 utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict);
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8 * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see CBU8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define CBU8_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(((uint8)(c))>=0x80) { \
+        if(CBU8_IS_LEAD(c)) { \
+            (c)=base_icu::utf8_nextCharSafeBody((const uint8 *)s, &(i), (int32)(length), c, -1); \
+        } else { \
+            (c)=CBU_SENTINEL; \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8 * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see CBU8_APPEND
+ * @stable ICU 2.4
+ */
+#define CBU8_APPEND_UNSAFE(s, i, c) { \
+    if((uint32)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8)(c); \
+    } else { \
+        if((uint32)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
+    } \
+}
+
+// UTF-16 macros ---------------------------------------------------------------
+// from utf8.h
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SURROGATE(c) CBU_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Helper constant for CBU16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define CBU16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define CBU16_GET_SUPPLEMENTARY(lead, trail) \
+    (((base_icu::UChar32)(lead)<<10UL)+(base_icu::UChar32)(trail)-CBU16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define CBU16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define CBU16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define CBU16_LENGTH(c) ((uint32)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define CBU16_MAX_LENGTH 2
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see CBU16_APPEND
+ * @stable ICU 2.4
+ */
+#define CBU16_APPEND_UNSAFE(s, i, c) { \
+    if((uint32)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16)(c); \
+    } else { \
+        (s)[(i)++]=(uint16)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+}  // namesapce base_icu
+
+#endif  // BASE_THIRD_PARTY_ICU_ICU_UTF_H_
diff --git a/base/trace_event.cc b/base/trace_event.cc
index 6c79825..be2fbaa 100644
--- a/base/trace_event.cc
+++ b/base/trace_event.cc
@@ -11,6 +11,7 @@
 #include "base/platform_thread.h"
 #include "base/process_util.h"
 #include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 #include "base/time.h"
 
 #define USE_UNRELIABLE_NOW
diff --git a/base/utf_string_conversions.cc b/base/utf_string_conversions.cc
new file mode 100644
index 0000000..6b25cd8
--- /dev/null
+++ b/base/utf_string_conversions.cc
@@ -0,0 +1,361 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/utf_string_conversions.h"
+
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "base/third_party/icu/icu_utf.h"
+
+namespace {
+
+inline bool IsValidCodepoint(uint32 code_point) {
+  // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
+  // codepoints larger than 0x10FFFF (the highest codepoint allowed).
+  // Non-characters and unassigned codepoints are allowed.
+  return code_point < 0xD800u ||
+         (code_point >= 0xE000u && code_point <= 0x10FFFFu);
+}
+
+// ReadUnicodeCharacter --------------------------------------------------------
+
+// Reads a UTF-8 stream, placing the next code point into the given output
+// |*code_point|. |src| represents the entire string to read, and |*char_index|
+// is the character offset within the string to start reading at. |*char_index|
+// will be updated to index the last character read, such that incrementing it
+// (as in a for loop) will take the reader to the next character.
+//
+// Returns true on success. On false, |*code_point| will be invalid.
+bool ReadUnicodeCharacter(const char* src, int32 src_len,
+                          int32* char_index, uint32* code_point_out) {
+  // U8_NEXT expects to be able to use -1 to signal an error, so we must
+  // use a signed type for code_point.  But this function returns false
+  // on error anyway, so code_point_out is unsigned.
+  int32 code_point;
+  CBU8_NEXT(src, *char_index, src_len, code_point);
+  *code_point_out = static_cast<uint32>(code_point);
+
+  // The ICU macro above moves to the next char, we want to point to the last
+  // char consumed.
+  (*char_index)--;
+
+  // Validate the decoded value.
+  return IsValidCodepoint(code_point);
+}
+
+// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
+bool ReadUnicodeCharacter(const char16* src, int32 src_len,
+                          int32* char_index, uint32* code_point) {
+  if (CBU16_IS_SURROGATE(src[*char_index])) {
+    if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
+        *char_index + 1 >= src_len ||
+        !CBU16_IS_TRAIL(src[*char_index + 1])) {
+      // Invalid surrogate pair.
+      return false;
+    }
+
+    // Valid surrogate pair.
+    *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index],
+                                          src[*char_index + 1]);
+    (*char_index)++;
+  } else {
+    // Not a surrogate, just one 16-bit word.
+    *code_point = src[*char_index];
+  }
+
+  return IsValidCodepoint(*code_point);
+}
+
+#if defined(WCHAR_T_IS_UTF32)
+// Reads UTF-32 character. The usage is the same as the 8-bit version above.
+bool ReadUnicodeCharacter(const wchar_t* src, int32 src_len,
+                          int32* char_index, uint32* code_point) {
+  // Conversion is easy since the source is 32-bit.
+  *code_point = src[*char_index];
+
+  // Validate the value.
+  return IsValidCodepoint(*code_point);
+}
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// WriteUnicodeCharacter -------------------------------------------------------
+
+// Appends a UTF-8 character to the given 8-bit string.
+void WriteUnicodeCharacter(uint32 code_point, std::string* output) {
+  if (code_point <= 0x7f) {
+    // Fast path the common case of one byte.
+    output->push_back(code_point);
+    return;
+  }
+
+  // U8_APPEND_UNSAFE can append up to 4 bytes.
+  int32 char_offset = static_cast<int32>(output->length());
+  output->resize(char_offset + CBU8_MAX_LENGTH);
+
+  CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+
+  // U8_APPEND_UNSAFE will advance our pointer past the inserted character, so
+  // it will represent the new length of the string.
+  output->resize(char_offset);
+}
+
+// Appends the given code point as a UTF-16 character to the STL string.
+void WriteUnicodeCharacter(uint32 code_point, string16* output) {
+  if (CBU16_LENGTH(code_point) == 1) {
+    // Thie code point is in the Basic Multilingual Plane (BMP).
+    output->push_back(static_cast<char16>(code_point));
+  } else {
+    // Non-BMP characters use a double-character encoding.
+    int32 char_offset = static_cast<int32>(output->length());
+    output->resize(char_offset + CBU16_MAX_LENGTH);
+    CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+  }
+}
+
+#if defined(WCHAR_T_IS_UTF32)
+// Appends the given UTF-32 character to the given 32-bit string.
+inline void WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
+  // This is the easy case, just append the character.
+  output->push_back(code_point);
+}
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// Generalized Unicode converter -----------------------------------------------
+
+// Converts the given source Unicode character type to the given destination
+// Unicode character type as a STL string. The given input buffer and size
+// determine the source, and the given output STL string will be replaced by
+// the result.
+template<typename SRC_CHAR, typename DEST_STRING>
+bool ConvertUnicode(const SRC_CHAR* src, size_t src_len, DEST_STRING* output) {
+  output->clear();
+
+  // ICU requires 32-bit numbers.
+  bool success = true;
+  int32 src_len32 = static_cast<int32>(src_len);
+  for (int32 i = 0; i < src_len32; i++) {
+    uint32 code_point;
+    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
+      WriteUnicodeCharacter(code_point, output);
+    } else {
+      // TODO(jungshik): consider adding 'Replacement character' (U+FFFD)
+      // in place of an invalid codepoint.
+      success = false;
+    }
+  }
+  return success;
+}
+
+// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
+// of space in the given string. We also assume that the input character types
+// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
+// the string length is greater than zero.
+template<typename CHAR>
+void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
+  if (src[0] < 0x80) {
+    // Assume that the entire input will be ASCII.
+    output->reserve(src_len);
+  } else {
+    // Assume that the entire input is non-ASCII and will have 3 bytes per char.
+    output->reserve(src_len * 3);
+  }
+}
+
+// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
+// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
+// We assume the source length is > 0.
+template<typename STRING>
+void ReserveUTF16Or32Output(const char* src, size_t src_len, STRING* output) {
+  if (static_cast<unsigned char>(src[0]) < 0x80) {
+    // Assume the input is all ASCII, which means 1:1 correspondence.
+    output->reserve(src_len);
+  } else {
+    // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
+    // character.
+    output->reserve(src_len / 2);
+  }
+}
+
+}  // namespace
+
+// UTF-8 <-> Wide --------------------------------------------------------------
+
+std::string WideToUTF8(const std::wstring& wide) {
+  std::string ret;
+  if (wide.empty())
+    return ret;
+
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  WideToUTF8(wide.data(), wide.length(), &ret);
+  return ret;
+}
+
+bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF8Output(src, src_len, output);
+  return ConvertUnicode<wchar_t, std::string>(src, src_len, output);
+}
+
+std::wstring UTF8ToWide(const base::StringPiece& utf8) {
+  std::wstring ret;
+  if (utf8.empty())
+    return ret;
+
+  UTF8ToWide(utf8.data(), utf8.length(), &ret);
+  return ret;
+}
+
+bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF16Or32Output(src, src_len, output);
+  return ConvertUnicode<char, std::wstring>(src, src_len, output);
+}
+
+// UTF-16 <-> Wide -------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF16)
+
+// When wide == UTF-16, then conversions are a NOP.
+string16 WideToUTF16(const std::wstring& wide) {
+  return wide;
+}
+
+bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+  output->assign(src, src_len);
+  return true;
+}
+
+std::wstring UTF16ToWide(const string16& utf16) {
+  return utf16;
+}
+
+bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
+  output->assign(src, src_len);
+  return true;
+}
+
+#elif defined(WCHAR_T_IS_UTF32)
+
+string16 WideToUTF16(const std::wstring& wide) {
+  string16 ret;
+  if (wide.empty())
+    return ret;
+
+  WideToUTF16(wide.data(), wide.length(), &ret);
+  return ret;
+}
+
+bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  // Assume that normally we won't have any non-BMP characters so the counts
+  // will be the same.
+  output->reserve(src_len);
+  return ConvertUnicode<wchar_t, string16>(src, src_len, output);
+}
+
+std::wstring UTF16ToWide(const string16& utf16) {
+  std::wstring ret;
+  if (utf16.empty())
+    return ret;
+
+  UTF16ToWide(utf16.data(), utf16.length(), &ret);
+  return ret;
+}
+
+bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  // Assume that normally we won't have any non-BMP characters so the counts
+  // will be the same.
+  output->reserve(src_len);
+  return ConvertUnicode<char16, std::wstring>(src, src_len, output);
+}
+
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// UTF16 <-> UTF8 --------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF32)
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF16Or32Output(src, src_len, output);
+  return ConvertUnicode<char, string16>(src, src_len, output);
+}
+
+string16 UTF8ToUTF16(const std::string& utf8) {
+  string16 ret;
+  if (utf8.empty())
+    return ret;
+
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
+  return ret;
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF8Output(src, src_len, output);
+  return ConvertUnicode<char16, std::string>(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const string16& utf16) {
+  std::string ret;
+  if (utf16.empty())
+    return ret;
+
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
+  return ret;
+}
+
+#elif defined(WCHAR_T_IS_UTF16)
+// Easy case since we can use the "wide" versions we already wrote above.
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+  return UTF8ToWide(src, src_len, output);
+}
+
+string16 UTF8ToUTF16(const std::string& utf8) {
+  return UTF8ToWide(utf8);
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  return WideToUTF8(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const string16& utf16) {
+  return WideToUTF8(utf16);
+}
+
+#endif
diff --git a/base/i18n/string_conversions.h b/base/utf_string_conversions.h
index c055bb1..89846ed 100644
--- a/base/i18n/string_conversions.h
+++ b/base/utf_string_conversions.h
@@ -2,8 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#ifndef BASE_I18N_STRING_CONVERSIONS_H_
-#define BASE_I18N_STRING_CONVERSIONS_H_
+#ifndef BASE_UTF_STRING_CONVERSIONS_H_
+#define BASE_UTF_STRING_CONVERSIONS_H_
 
 #include <string>
 
@@ -51,51 +51,4 @@ std::string UTF16ToUTF8(const string16& utf16);
 # define UTF16ToWideHack UTF16ToWide
 #endif
 
-// Defines the error handling modes of UTF16ToCodepage, CodepageToUTF16,
-// WideToCodepage and CodepageToWide.
-class OnStringUtilConversionError {
- public:
-  enum Type {
-    // The function will return failure. The output buffer will be empty.
-    FAIL,
-
-    // The offending characters are skipped and the conversion will proceed as
-    // if they did not exist.
-    SKIP,
-
-    // When converting to Unicode, the offending byte sequences are substituted
-    // by Unicode replacement character (U+FFFD). When converting from Unicode,
-    // this is the same as SKIP.
-    SUBSTITUTE,
-  };
-
- private:
-  OnStringUtilConversionError();
-};
-
-// Converts between UTF-16 strings and the encoding specified.  If the
-// encoding doesn't exist or the encoding fails (when on_error is FAIL),
-// returns false.
-bool UTF16ToCodepage(const string16& utf16,
-                     const char* codepage_name,
-                     OnStringUtilConversionError::Type on_error,
-                     std::string* encoded);
-
-bool CodepageToUTF16(const std::string& encoded,
-                     const char* codepage_name,
-                     OnStringUtilConversionError::Type on_error,
-                     string16* utf16);
-
-// Converts between wide strings and the encoding specified.  If the
-// encoding doesn't exist or the encoding fails (when on_error is FAIL),
-// returns false.
-bool WideToCodepage(const std::wstring& wide,
-                    const char* codepage_name,
-                    OnStringUtilConversionError::Type on_error,
-                    std::string* encoded);
-bool CodepageToWide(const std::string& encoded,
-                    const char* codepage_name,
-                    OnStringUtilConversionError::Type on_error,
-                    std::wstring* wide);
-
-#endif  // BASE_I18N_STRING_CONVERSIONS_H_
+#endif  // BASE_UTF_STRING_CONVERSIONS_H_
diff --git a/base/values.cc b/base/values.cc
index 51e68a7..305f1cb 100644
--- a/base/values.cc
+++ b/base/values.cc
@@ -1,9 +1,10 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "base/logging.h"
 #include "base/string_util.h"
+#include "base/utf_string_conversions.h"
 #include "base/values.h"
 
 ///////////////////// Value ////////////////////
diff --git a/chrome/browser/download/download_manager_unittest.cc b/chrome/browser/download/download_manager_unittest.cc
index 164c92b..a5058cf 100644
--- a/chrome/browser/download/download_manager_unittest.cc
+++ b/chrome/browser/download/download_manager_unittest.cc
@@ -1,13 +1,19 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include <string>
 
+#include "base/string_util.h"
+#include "build/build_config.h"
 #include "chrome/browser/download/download_manager.h"
 #include "chrome/browser/download/download_util.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
+#if defined(OS_LINUX)
+#include <locale.h>
+#endif
+
 class DownloadManagerTest : public testing::Test {
  public:
   DownloadManagerTest() {
@@ -455,6 +461,16 @@ const struct {
 // Tests to ensure that the file names we generate from hints from the server
 // (content-disposition, URL name, etc) don't cause security holes.
 TEST_F(DownloadManagerTest, TestDownloadFilename) {
+#if defined(OS_LINUX)
+  // This test doesn't run when the locale is not UTF-8 becuase some of the
+  // string conversions fail. This is OK (we have the default value) but they
+  // don't match our expectations.
+  std::string locale = setlocale(LC_CTYPE, NULL);
+  StringToLowerASCII(&locale);
+  ASSERT_NE(std::string::npos, locale.find("utf-8"))
+      << "Your locale must be set to UTF-8 for this test to pass!";
+#endif
+
   std::wstring file_name;
   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kGeneratedFiles); ++i) {
     GetGeneratedFilename(kGeneratedFiles[i].disposition,
author	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-07 02:10:20 +0000
committer	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-07 02:10:20 +0000
commit	047a03f4cefa75a67070f08b3f6b727f7ea702d5 (patch)
tree	d00ccbd9e59106de8fd904b06720be59219d61fe
parent	0511c153260e5d402d7552ff7b47a2acb17bdf2b (diff)
download	chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.zip chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.tar.gz chromium_src-047a03f4cefa75a67070f08b3f6b727f7ea702d5.tar.bz2