diff options
author | deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-26 09:53:41 +0000 |
---|---|---|
committer | deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-06-26 09:53:41 +0000 |
commit | 5ae918ab1e21c4647739c586299dd9a5f5ae0890 (patch) | |
tree | fd25baf8b5b4f1590a05e6a2c2f3d2c9743c296e /base/sys_string_conversions_linux.cc | |
parent | defbbfec366940e3aa16e6b9070712a23c8c3b76 (diff) | |
download | chromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.zip chromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.tar.gz chromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.tar.bz2 |
Implement Linux sys_string_conversions using the system APIs.
BUG=14826
Review URL: http://codereview.chromium.org/149065
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@19351 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/sys_string_conversions_linux.cc')
-rw-r--r-- | base/sys_string_conversions_linux.cc | 117 |
1 files changed, 113 insertions, 4 deletions
diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc index 7661d87..118f0ac 100644 --- a/base/sys_string_conversions_linux.cc +++ b/base/sys_string_conversions_linux.cc @@ -4,6 +4,8 @@ #include "base/sys_string_conversions.h" +#include <wchar.h> + #include "base/string_piece.h" #include "base/string_util.h" @@ -23,13 +25,120 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) { } std::string SysWideToNativeMB(const std::wstring& wide) { - // TODO(evanm): we can't assume Linux is UTF-8. - return SysWideToUTF8(wide); + mbstate_t ps; + + // Calculate the number of multi-byte characters. We walk through the string + // without writing the output, counting the number of multi-byte characters. + size_t num_out_chars = 0; + memset(&ps, 0, sizeof(ps)); + for (size_t i = 0; i < wide.size(); ++i) { + const wchar_t src = wide[i]; + // Use a temp buffer since calling wcrtomb with an output of NULL does not + // calculate the output length. + char buf[16]; + // Skip NULLs to avoid wcrtomb's special handling of them. + size_t res = src ? wcrtomb(buf, src, &ps) : 0; + switch (res) { + // Handle any errors and return an empty string. + case -1: + return std::string(); + break; + case 0: + // We hit an embedded null byte, keep going. + ++num_out_chars; + break; + default: + num_out_chars += res; + break; + } + } + + if (num_out_chars == 0) + return std::string(); + + std::string out; + out.resize(num_out_chars); + + // We walk the input string again, with |i| tracking the index of the + // wide input, and |j| tracking the multi-byte output. + memset(&ps, 0, sizeof(ps)); + for (size_t i = 0, j = 0; i < wide.size(); ++i) { + const wchar_t src = wide[i]; + // We don't want wcrtomb to do it's funkiness for embedded NULLs. + size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; + switch (res) { + // Handle any errors and return an empty string. + case -1: + return std::string(); + break; + case 0: + // We hit an embedded null byte, keep going. + ++j; // Output is already zeroed. + break; + default: + j += res; + break; + } + } + + return out; } std::wstring SysNativeMBToWide(const StringPiece& native_mb) { - // TODO(evanm): we can't assume Linux is UTF-8. - return SysUTF8ToWide(native_mb); + mbstate_t ps; + + // Calculate the number of wide characters. We walk through the string + // without writing the output, counting the number of wide characters. + size_t num_out_chars = 0; + memset(&ps, 0, sizeof(ps)); + for (size_t i = 0; i < native_mb.size(); ) { + const char* src = native_mb.data() + i; + size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); + switch (res) { + // Handle any errors and return an empty string. + case -2: + case -1: + return std::wstring(); + break; + case 0: + // We hit an embedded null byte, keep going. + i += 1; // Fall through. + default: + i += res; + ++num_out_chars; + break; + } + } + + if (num_out_chars == 0) + return std::wstring(); + + std::wstring out; + out.resize(num_out_chars); + + memset(&ps, 0, sizeof(ps)); // Clear the shift state. + // We walk the input string again, with |i| tracking the index of the + // multi-byte input, and |j| tracking the wide output. + for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { + const char* src = native_mb.data() + i; + wchar_t* dst = &out[j]; + size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); + switch (res) { + // Handle any errors and return an empty string. + case -2: + case -1: + return std::wstring(); + break; + case 0: + i += 1; // Skip null byte. + break; + default: + i += res; + break; + } + } + + return out; } } // namespace base |