4 files changed, 131 insertions, 31 deletions
diff --git a/base/string_util.h b/base/string_util.h
index 7245dc9..bd95589 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -180,6 +180,11 @@ std::string16 WideToUTF16(const std::wstring& wide);
 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);
 std::wstring UTF16ToWide(const std::string16& utf8);
 
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output);
+std::string16 UTF8ToUTF16(const std::string& utf8);
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output);
+std::string UTF16ToUTF8(const std::string16& utf16);
+
 // Defines the error handling modes of WideToCodepage and CodepageToWide.
 class OnStringUtilConversionError {
  public:
diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc
index baa4674..6ad6ca2 100644
--- a/base/string_util_icu.cc
+++ b/base/string_util_icu.cc
@@ -145,6 +145,38 @@ bool ConvertUnicode(const SRC_CHAR* src, size_t src_len,
   return success;
 }
 
+
+// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
+// of space in the given string. We also assume that the input character types
+// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
+// the string length is greater than zero.
+template<typename CHAR>
+void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
+  if (src[0] < 0x80) {
+    // Assume that the entire input will be ASCII.
+    output->reserve(src_len);
+  } else {
+    // Assume that the entire input is non-ASCII and will have 3 bytes per char.
+    output->reserve(src_len * 3);
+  }
+}
+
+// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
+// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
+// We assume the source length is > 0.
+template<typename CHAR>
+void ReserveUTF16Or32Output(const char* src, size_t src_len,
+                            std::basic_string<CHAR>* output) {
+  if (static_cast<unsigned char>(src[0]) < 0x80) {
+    // Assume the input is all ASCII, which means 1:1 correspondence.
+    output->reserve(src_len);
+  } else {
+    // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
+    // character.
+    output->reserve(src_len / 2);
+  }
+}
+
 }  // namespace
 
 // UTF-8 <-> Wide --------------------------------------------------------------
@@ -166,14 +198,7 @@ bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
     return true;
   }
 
-  // Intelligently guess the size of the output string. When it's an ASCII
-  // character, assume the rest will be ASCII and use a buffer size the same as
-  // the input. When it's not ASCII, assume 3-bytes per character as the
-  // starting point. This will be resized internally later if it's too small.
-  if (static_cast<uint32>(src[0]) < 0x80)
-    output->reserve(src_len);
-  else
-    output->reserve(src_len * 3);
+  ReserveUTF8Output(src, src_len, output);
   return ConvertUnicode<wchar_t, char>(src, src_len, output);
 }
 
@@ -192,15 +217,7 @@ bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
     return true;
   }
 
-  // Intelligently guess the size of the output string. When it's an ASCII
-  // character, assume the rest will be ASCII and use a buffer size the same as
-  // the input. When it's not ASCII, assume the UTF-8 takes 2 bytes per
-  // character (this is more conservative than 3 which we use above when
-  // converting the other way).
-  if (static_cast<unsigned char>(src[0]) < 0x80)
-    output->reserve(src_len);
-  else
-    output->reserve(src_len / 2);
+  ReserveUTF16Or32Output(src, src_len, output);
   return ConvertUnicode<char, wchar_t>(src, src_len, output);
 }
 
@@ -273,6 +290,73 @@ bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 
 #endif  // defined(WCHAR_T_IS_UTF32)
 
+// UTF16 <-> UTF8 --------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF32)
+
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF16Or32Output(src, src_len, output);
+  return ConvertUnicode<char, char16>(src, src_len, output);
+}
+
+std::string16 UTF8ToUTF16(const std::string& utf8) {
+  std::string16 ret;
+  if (utf8.empty())
+    return ret;
+
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
+  return ret;
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  if (src_len == 0) {
+    output->clear();
+    return true;
+  }
+
+  ReserveUTF8Output(src, src_len, output);
+  return ConvertUnicode<char, char16>(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const std::string16& utf16) {
+  std::string ret;
+  if (utf16.empty())
+    return ret;
+
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
+  return ret;
+}
+
+#elif defined(WCHAR_T_IS_UTF16)
+// Easy case since we can use the "wide" versions we already wrote above.
+
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output) {
+  return UTF8ToWide(src, src_len, output);
+}
+
+std::string16 UTF8ToUTF16(const std::string& utf8) {
+  return UTF8ToWide(utf8);
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  return WideToUTF8(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const std::string16& utf16) {
+  return WideToUTF8(utf16);
+}
+
+#endif
+
 // Codepage <-> Wide -----------------------------------------------------------
 
 // Convert a unicode string into the specified codepage_name.  If the codepage
diff --git a/webkit/glue/glue_util.cc b/webkit/glue/glue_util.cc
index 5d2f147..da61bd2 100644
--- a/webkit/glue/glue_util.cc
+++ b/webkit/glue/glue_util.cc
@@ -45,6 +45,14 @@ std::string16 StringToStdString16(const WebCore::String& str) {
   return std::string16(chars ? chars : (UChar *)L"", str.length());
 }
 
+std::string StringToStdString(const WebCore::String& str) {
+  if (str.length() == 0)
+    return std::string();
+  std::string ret;
+  UTF16ToUTF8(str.characters(), str.length(), &ret);
+  return ret;
+}
+
 WebCore::String StdWStringToString(const std::wstring& str) {
 #if defined(WCHAR_T_IS_UTF16)
   return WebCore::String(str.data(), static_cast<unsigned>(str.length()));
diff --git a/webkit/glue/glue_util.h b/webkit/glue/glue_util.h
index 1ad8d59..55f8ea3 100644
--- a/webkit/glue/glue_util.h
+++ b/webkit/glue/glue_util.h
@@ -18,20 +18,23 @@ namespace WebCore {
 }
 
 namespace webkit_glue {
-  std::string CStringToStdString(const WebCore::CString& str);
-  WebCore::CString StdStringToCString(const std::string& str);
-  std::wstring StringToStdWString(const WebCore::String& str);
-  std::string16 StringToStdString16(const WebCore::String& str);
-
-  WebCore::String StdWStringToString(const std::wstring& str);
-  WebCore::String StdStringToString(const std::string& str);
-  
-  WebCore::DeprecatedString StdWStringToDeprecatedString(const std::wstring& str);
-  std::wstring DeprecatedStringToStdWString(const WebCore::DeprecatedString& dep);
-
-  GURL KURLToGURL(const WebCore::KURL& url);
-  WebCore::KURL GURLToKURL(const GURL& url);
-}
+
+std::string CStringToStdString(const WebCore::CString& str);
+WebCore::CString StdStringToCString(const std::string& str);
+std::wstring StringToStdWString(const WebCore::String& str);
+std::string16 StringToStdString16(const WebCore::String& str);
+std::string StringToStdString(const WebCore::String& str);
+
+WebCore::String StdWStringToString(const std::wstring& str);
+WebCore::String StdStringToString(const std::string& str);
+
+WebCore::DeprecatedString StdWStringToDeprecatedString(const std::wstring& str);
+std::wstring DeprecatedStringToStdWString(const WebCore::DeprecatedString& dep);
+
+GURL KURLToGURL(const WebCore::KURL& url);
+WebCore::KURL GURLToKURL(const GURL& url);
+
+}  // namespace webkit_glue
 
 #endif  // #ifndef WEBKIT_GLUE_GLUE_UTIL_H_