summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--base/string_util.h5
-rw-r--r--base/string_util_icu.cc118
-rw-r--r--webkit/glue/glue_util.cc8
-rw-r--r--webkit/glue/glue_util.h31
4 files changed, 131 insertions, 31 deletions
diff --git a/base/string_util.h b/base/string_util.h
index 7245dc9..bd95589 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -180,6 +180,11 @@ std::string16 WideToUTF16(const std::wstring& wide);
bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output);
std::wstring UTF16ToWide(const std::string16& utf8);
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output);
+std::string16 UTF8ToUTF16(const std::string& utf8);
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output);
+std::string UTF16ToUTF8(const std::string16& utf16);
+
// Defines the error handling modes of WideToCodepage and CodepageToWide.
class OnStringUtilConversionError {
public:
diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc
index baa4674..6ad6ca2 100644
--- a/base/string_util_icu.cc
+++ b/base/string_util_icu.cc
@@ -145,6 +145,38 @@ bool ConvertUnicode(const SRC_CHAR* src, size_t src_len,
return success;
}
+
+// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
+// of space in the given string. We also assume that the input character types
+// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
+// the string length is greater than zero.
+template<typename CHAR>
+void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
+ if (src[0] < 0x80) {
+ // Assume that the entire input will be ASCII.
+ output->reserve(src_len);
+ } else {
+ // Assume that the entire input is non-ASCII and will have 3 bytes per char.
+ output->reserve(src_len * 3);
+ }
+}
+
+// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
+// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
+// We assume the source length is > 0.
+template<typename CHAR>
+void ReserveUTF16Or32Output(const char* src, size_t src_len,
+ std::basic_string<CHAR>* output) {
+ if (static_cast<unsigned char>(src[0]) < 0x80) {
+ // Assume the input is all ASCII, which means 1:1 correspondence.
+ output->reserve(src_len);
+ } else {
+ // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
+ // character.
+ output->reserve(src_len / 2);
+ }
+}
+
} // namespace
// UTF-8 <-> Wide --------------------------------------------------------------
@@ -166,14 +198,7 @@ bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
return true;
}
- // Intelligently guess the size of the output string. When it's an ASCII
- // character, assume the rest will be ASCII and use a buffer size the same as
- // the input. When it's not ASCII, assume 3-bytes per character as the
- // starting point. This will be resized internally later if it's too small.
- if (static_cast<uint32>(src[0]) < 0x80)
- output->reserve(src_len);
- else
- output->reserve(src_len * 3);
+ ReserveUTF8Output(src, src_len, output);
return ConvertUnicode<wchar_t, char>(src, src_len, output);
}
@@ -192,15 +217,7 @@ bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
return true;
}
- // Intelligently guess the size of the output string. When it's an ASCII
- // character, assume the rest will be ASCII and use a buffer size the same as
- // the input. When it's not ASCII, assume the UTF-8 takes 2 bytes per
- // character (this is more conservative than 3 which we use above when
- // converting the other way).
- if (static_cast<unsigned char>(src[0]) < 0x80)
- output->reserve(src_len);
- else
- output->reserve(src_len / 2);
+ ReserveUTF16Or32Output(src, src_len, output);
return ConvertUnicode<char, wchar_t>(src, src_len, output);
}
@@ -273,6 +290,73 @@ bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
#endif // defined(WCHAR_T_IS_UTF32)
+// UTF16 <-> UTF8 --------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF32)
+
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output) {
+ if (src_len == 0) {
+ output->clear();
+ return true;
+ }
+
+ ReserveUTF16Or32Output(src, src_len, output);
+ return ConvertUnicode<char, char16>(src, src_len, output);
+}
+
+std::string16 UTF8ToUTF16(const std::string& utf8) {
+ std::string16 ret;
+ if (utf8.empty())
+ return ret;
+
+ // Ignore the success flag of this call, it will do the best it can for
+ // invalid input, which is what we want here.
+ UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
+ return ret;
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+ if (src_len == 0) {
+ output->clear();
+ return true;
+ }
+
+ ReserveUTF8Output(src, src_len, output);
+ return ConvertUnicode<char, char16>(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const std::string16& utf16) {
+ std::string ret;
+ if (utf16.empty())
+ return ret;
+
+ // Ignore the success flag of this call, it will do the best it can for
+ // invalid input, which is what we want here.
+ UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
+ return ret;
+}
+
+#elif defined(WCHAR_T_IS_UTF16)
+// Easy case since we can use the "wide" versions we already wrote above.
+
+bool UTF8ToUTF16(const char* src, size_t src_len, std::string16* output) {
+ return UTF8ToWide(src, src_len, output);
+}
+
+std::string16 UTF8ToUTF16(const std::string& utf8) {
+ return UTF8ToWide(utf8);
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+ return WideToUTF8(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(const std::string16& utf16) {
+ return WideToUTF8(utf16);
+}
+
+#endif
+
// Codepage <-> Wide -----------------------------------------------------------
// Convert a unicode string into the specified codepage_name. If the codepage
diff --git a/webkit/glue/glue_util.cc b/webkit/glue/glue_util.cc
index 5d2f147..da61bd2 100644
--- a/webkit/glue/glue_util.cc
+++ b/webkit/glue/glue_util.cc
@@ -45,6 +45,14 @@ std::string16 StringToStdString16(const WebCore::String& str) {
return std::string16(chars ? chars : (UChar *)L"", str.length());
}
+std::string StringToStdString(const WebCore::String& str) {
+ if (str.length() == 0)
+ return std::string();
+ std::string ret;
+ UTF16ToUTF8(str.characters(), str.length(), &ret);
+ return ret;
+}
+
WebCore::String StdWStringToString(const std::wstring& str) {
#if defined(WCHAR_T_IS_UTF16)
return WebCore::String(str.data(), static_cast<unsigned>(str.length()));
diff --git a/webkit/glue/glue_util.h b/webkit/glue/glue_util.h
index 1ad8d59..55f8ea3 100644
--- a/webkit/glue/glue_util.h
+++ b/webkit/glue/glue_util.h
@@ -18,20 +18,23 @@ namespace WebCore {
}
namespace webkit_glue {
- std::string CStringToStdString(const WebCore::CString& str);
- WebCore::CString StdStringToCString(const std::string& str);
- std::wstring StringToStdWString(const WebCore::String& str);
- std::string16 StringToStdString16(const WebCore::String& str);
-
- WebCore::String StdWStringToString(const std::wstring& str);
- WebCore::String StdStringToString(const std::string& str);
-
- WebCore::DeprecatedString StdWStringToDeprecatedString(const std::wstring& str);
- std::wstring DeprecatedStringToStdWString(const WebCore::DeprecatedString& dep);
-
- GURL KURLToGURL(const WebCore::KURL& url);
- WebCore::KURL GURLToKURL(const GURL& url);
-}
+
+std::string CStringToStdString(const WebCore::CString& str);
+WebCore::CString StdStringToCString(const std::string& str);
+std::wstring StringToStdWString(const WebCore::String& str);
+std::string16 StringToStdString16(const WebCore::String& str);
+std::string StringToStdString(const WebCore::String& str);
+
+WebCore::String StdWStringToString(const std::wstring& str);
+WebCore::String StdStringToString(const std::string& str);
+
+WebCore::DeprecatedString StdWStringToDeprecatedString(const std::wstring& str);
+std::wstring DeprecatedStringToStdWString(const WebCore::DeprecatedString& dep);
+
+GURL KURLToGURL(const WebCore::KURL& url);
+WebCore::KURL GURLToKURL(const GURL& url);
+
+} // namespace webkit_glue
#endif // #ifndef WEBKIT_GLUE_GLUE_UTIL_H_