Implement Linux sys_string_conversions using the system APIs.

BUG=14826 Review URL: http://codereview.chromium.org/140062 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@19239 0039d316-1c4b-4281-b951-d872f2087c98
author: deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-06-25 11:12:29 +0000
committer: deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-06-25 11:12:29 +0000
commit: fb17d121e0b074f6f14335fbf6ddee10aec53425 (patch)
tree: 428dd23758ecd0c4c87091b1e92f83af3ddf18b8 /base/sys_string_conversions_linux.cc
parent: 688ccb5793b4b6a1160f8d71897d817ed9f39a57 (diff)
download: chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.zip
chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.tar.gz
chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.tar.bz2
1 files changed, 109 insertions, 4 deletions
diff --git a/base/sys_string_conversions_linux.cc b/base/sys_string_conversions_linux.cc
index 7661d87..ff1d3c3 100644
--- a/base/sys_string_conversions_linux.cc
+++ b/base/sys_string_conversions_linux.cc
@@ -4,6 +4,8 @@
 
 #include "base/sys_string_conversions.h"
 
+#include <wchar.h>
+
 #include "base/string_piece.h"
 #include "base/string_util.h"
 
@@ -23,13 +25,116 @@ std::wstring SysUTF8ToWide(const StringPiece& utf8) {
 }
 
 std::string SysWideToNativeMB(const std::wstring& wide) {
-  // TODO(evanm): we can't assume Linux is UTF-8.
-  return SysWideToUTF8(wide);
+  mbstate_t ps;
+
+  // Calculate the number of multi-byte characters.  We walk through the string
+  // without writing the output, counting the number of multi-byte characters.
+  size_t num_out_chars = 0;
+  memset(&ps, 0, sizeof(ps));
+  for (size_t i = 0; i < wide.size(); ++i) {
+    const wchar_t src = wide[i];
+    // Use a temp buf since a output of NULL does not do what we want.
+    char buf[16];
+    // We don't want wcrtomb to do it's funkiness for embedded NULLs.
+    size_t res = src ? wcrtomb(buf, src, &ps) : 0;
+    switch (res) {
+      // Handle any errors and return an empty string.
+      case -1:
+        return std::string();
+        break;
+      case 0:
+        // We hit an embedded null byte, keep going.
+        ++num_out_chars;
+      default:
+        num_out_chars += res;
+        break;
+    }
+  }
+
+  if (num_out_chars == 0)
+    return std::string();
+
+  std::string out;
+  out.resize(num_out_chars);
+
+  // We walk the input string again, with |i| tracking the index of the
+  // wide input, and |j| tracking the multi-byte output.
+  memset(&ps, 0, sizeof(ps));
+  for (size_t i = 0, j = 0; i < wide.size(); ++i) {
+    const wchar_t src = wide[i];
+    // We don't want wcrtomb to do it's funkiness for embedded NULLs.
+    size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
+    switch (res) {
+      // Handle any errors and return an empty string.
+      case -1:
+        return std::string();
+        break;
+      case 0:
+        // We hit an embedded null byte, keep going.
+        ++j;  // Output is already 0.
+      default:
+        j += res;
+        break;
+    }
+  }
+
+  return out;
 }
 
 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
-  // TODO(evanm): we can't assume Linux is UTF-8.
-  return SysUTF8ToWide(native_mb);
+  mbstate_t ps;
+
+  // Calculate the number of wide characters.  We walk through the string
+  // without writing the output, counting the number of wide characters.
+  size_t num_out_chars = 0;
+  memset(&ps, 0, sizeof(ps));
+  for (size_t i = 0; i < native_mb.size(); ) {
+    const char* src = native_mb.data() + i;
+    size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
+    switch (res) {
+      // Handle any errors and return an empty string.
+      case -2:
+      case -1:
+        return std::wstring();
+        break;
+      case 0:
+        // We hit an embedded null byte, keep going.
+        i += 1;  // Fall through.
+      default:
+        i += res;
+        ++num_out_chars;
+        break;
+    }
+  }
+
+  if (num_out_chars == 0)
+    return std::wstring();
+
+  std::wstring out;
+  out.resize(num_out_chars);
+
+  memset(&ps, 0, sizeof(ps));  // Clear the shift state.
+  // We walk the input string again, with |i| tracking the index of the
+  // multi-byte input, and |j| tracking the wide output.
+  for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
+    const char* src = native_mb.data() + i;
+    wchar_t* dst = &out[j];
+    size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
+    switch (res) {
+      // Handle any errors and return an empty string.
+      case -2:
+      case -1:
+        return std::wstring();
+        break;
+      case 0:
+        i += 1;  // Skip null, fall through.
+      default:
+        i += res;
+        break;
+    }
+  }
+
+  return out;
 }
 
 }  // namespace base
author	deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-06-25 11:12:29 +0000
committer	deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-06-25 11:12:29 +0000
commit	fb17d121e0b074f6f14335fbf6ddee10aec53425 (patch)
tree	428dd23758ecd0c4c87091b1e92f83af3ddf18b8 /base/sys_string_conversions_linux.cc
parent	688ccb5793b4b6a1160f8d71897d817ed9f39a57 (diff)
download	chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.zip chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.tar.gz chromium_src-fb17d121e0b074f6f14335fbf6ddee10aec53425.tar.bz2