summaryrefslogtreecommitdiffstats
path: root/base/sys_string_conversions_linux.cc
diff options
context:
space:
mode:
authordeanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-06-26 09:53:41 +0000
committerdeanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-06-26 09:53:41 +0000
commit5ae918ab1e21c4647739c586299dd9a5f5ae0890 (patch)
treefd25baf8b5b4f1590a05e6a2c2f3d2c9743c296e /base/sys_string_conversions_linux.cc
parentdefbbfec366940e3aa16e6b9070712a23c8c3b76 (diff)
downloadchromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.zip
chromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.tar.gz
chromium_src-5ae918ab1e21c4647739c586299dd9a5f5ae0890.tar.bz2
Implement Linux sys_string_conversions using the system APIs.
BUG=14826 Review URL: http://codereview.chromium.org/149065 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@19351 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/sys_string_conversions_linux.cc')
-rw-r--r--base/sys_string_conversions_linux.cc117
1 files changed, 113 insertions, 4 deletions
diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc
index 7661d87..118f0ac 100644
--- a/base/sys_string_conversions_linux.cc
+++ b/base/sys_string_conversions_linux.cc
@@ -4,6 +4,8 @@
#include "base/sys_string_conversions.h"
+#include <wchar.h>
+
#include "base/string_piece.h"
#include "base/string_util.h"
@@ -23,13 +25,120 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) {
}
std::string SysWideToNativeMB(const std::wstring& wide) {
- // TODO(evanm): we can't assume Linux is UTF-8.
- return SysWideToUTF8(wide);
+ mbstate_t ps;
+
+ // Calculate the number of multi-byte characters. We walk through the string
+ // without writing the output, counting the number of multi-byte characters.
+ size_t num_out_chars = 0;
+ memset(&ps, 0, sizeof(ps));
+ for (size_t i = 0; i < wide.size(); ++i) {
+ const wchar_t src = wide[i];
+ // Use a temp buffer since calling wcrtomb with an output of NULL does not
+ // calculate the output length.
+ char buf[16];
+ // Skip NULLs to avoid wcrtomb's special handling of them.
+ size_t res = src ? wcrtomb(buf, src, &ps) : 0;
+ switch (res) {
+ // Handle any errors and return an empty string.
+ case -1:
+ return std::string();
+ break;
+ case 0:
+ // We hit an embedded null byte, keep going.
+ ++num_out_chars;
+ break;
+ default:
+ num_out_chars += res;
+ break;
+ }
+ }
+
+ if (num_out_chars == 0)
+ return std::string();
+
+ std::string out;
+ out.resize(num_out_chars);
+
+ // We walk the input string again, with |i| tracking the index of the
+ // wide input, and |j| tracking the multi-byte output.
+ memset(&ps, 0, sizeof(ps));
+ for (size_t i = 0, j = 0; i < wide.size(); ++i) {
+ const wchar_t src = wide[i];
+ // We don't want wcrtomb to do it's funkiness for embedded NULLs.
+ size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
+ switch (res) {
+ // Handle any errors and return an empty string.
+ case -1:
+ return std::string();
+ break;
+ case 0:
+ // We hit an embedded null byte, keep going.
+ ++j; // Output is already zeroed.
+ break;
+ default:
+ j += res;
+ break;
+ }
+ }
+
+ return out;
}
std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
- // TODO(evanm): we can't assume Linux is UTF-8.
- return SysUTF8ToWide(native_mb);
+ mbstate_t ps;
+
+ // Calculate the number of wide characters. We walk through the string
+ // without writing the output, counting the number of wide characters.
+ size_t num_out_chars = 0;
+ memset(&ps, 0, sizeof(ps));
+ for (size_t i = 0; i < native_mb.size(); ) {
+ const char* src = native_mb.data() + i;
+ size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
+ switch (res) {
+ // Handle any errors and return an empty string.
+ case -2:
+ case -1:
+ return std::wstring();
+ break;
+ case 0:
+ // We hit an embedded null byte, keep going.
+ i += 1; // Fall through.
+ default:
+ i += res;
+ ++num_out_chars;
+ break;
+ }
+ }
+
+ if (num_out_chars == 0)
+ return std::wstring();
+
+ std::wstring out;
+ out.resize(num_out_chars);
+
+ memset(&ps, 0, sizeof(ps)); // Clear the shift state.
+ // We walk the input string again, with |i| tracking the index of the
+ // multi-byte input, and |j| tracking the wide output.
+ for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
+ const char* src = native_mb.data() + i;
+ wchar_t* dst = &out[j];
+ size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
+ switch (res) {
+ // Handle any errors and return an empty string.
+ case -2:
+ case -1:
+ return std::wstring();
+ break;
+ case 0:
+ i += 1; // Skip null byte.
+ break;
+ default:
+ i += res;
+ break;
+ }
+ }
+
+ return out;
}
} // namespace base