diff options
author | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-07 03:11:42 +0000 |
---|---|---|
committer | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-07 03:11:42 +0000 |
commit | 15af80eb0cb603cf5a365cb86061bad95f063d40 (patch) | |
tree | df2a4465e17cfe58b747bab9042df9134a571146 /base | |
parent | ac2800f20948f30c17ff41be93bcee5540c0952c (diff) | |
download | chromium_src-15af80eb0cb603cf5a365cb86061bad95f063d40.zip chromium_src-15af80eb0cb603cf5a365cb86061bad95f063d40.tar.gz chromium_src-15af80eb0cb603cf5a365cb86061bad95f063d40.tar.bz2 |
This is the addition of the system-dependent string conversions. This is just the new code and changing logging to use it. I will change other things to use this and delete the code in string_util in a subsequent pass.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@479 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/build/base.vcproj | 4 | ||||
-rw-r--r-- | base/build/base_unittests.vcproj | 4 | ||||
-rw-r--r-- | base/logging.cc | 31 | ||||
-rw-r--r-- | base/sys_string_conversions.h | 67 | ||||
-rw-r--r-- | base/sys_string_conversions_mac.cc | 156 | ||||
-rw-r--r-- | base/sys_string_conversions_win.cc | 93 | ||||
-rw-r--r-- | base/sys_string_conversions_win_unittest.cc | 80 |
7 files changed, 412 insertions, 23 deletions
diff --git a/base/build/base.vcproj b/base/build/base.vcproj index 5de82ba..60c759c 100644 --- a/base/build/base.vcproj +++ b/base/build/base.vcproj @@ -654,6 +654,10 @@ > </File> <File + RelativePath="..\sys_string_conversions_win.cc" + > + </File> + <File RelativePath="..\task.h" > </File> diff --git a/base/build/base_unittests.vcproj b/base/build/base_unittests.vcproj index bd22f7f..7ffc029 100644 --- a/base/build/base_unittests.vcproj +++ b/base/build/base_unittests.vcproj @@ -304,6 +304,10 @@ > </File> <File + RelativePath="..\sys_string_conversions_win_unittest.cc" + > + </File> + <File RelativePath="..\thread_local_storage_unittest.cc" > </File> diff --git a/base/logging.cc b/base/logging.cc index eba177e..ca9905d 100644 --- a/base/logging.cc +++ b/base/logging.cc @@ -36,6 +36,7 @@ #include "base/command_line.h" #include "base/lock_impl.h" #include "base/logging.h" +#include "base/sys_string_conversions.h" namespace logging { @@ -207,28 +208,24 @@ void DisplayDebugMessage(const std::string& str) { backslash[1] = 0; wcscat_s(prog_name, MAX_PATH, L"debug_message.exe"); - // stupid CreateProcess requires a non-const command line and may modify it. - // We also want to use the wide string - int charcount = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0); - if (!charcount) - return; - scoped_array<wchar_t> cmdline(new wchar_t[charcount]); - if (!MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, cmdline.get(), charcount)) - return; + // Stupid CreateProcess requires a non-const command line and may modify it. + // We also want to use the wide string. + std::wstring cmdline_string = base::SysUTF8ToWide(str); + wchar_t* cmdline = const_cast<wchar_t*>(cmdline_string.c_str()); STARTUPINFO startup_info; memset(&startup_info, 0, sizeof(startup_info)); startup_info.cb = sizeof(startup_info); PROCESS_INFORMATION process_info; - if (CreateProcessW(prog_name, cmdline.get(), NULL, NULL, false, 0, NULL, + if (CreateProcessW(prog_name, cmdline, NULL, NULL, false, 0, NULL, NULL, &startup_info, &process_info)) { WaitForSingleObject(process_info.hProcess, INFINITE); CloseHandle(process_info.hThread); CloseHandle(process_info.hProcess); } else { // debug process broken, let's just do a message box - MessageBoxW(NULL, cmdline.get(), L"Fatal error", + MessageBoxW(NULL, cmdline, L"Fatal error", MB_OK | MB_ICONHAND | MB_TOPMOST); } } @@ -380,17 +377,5 @@ void CloseLogFile() { } // namespace logging std::ostream& operator<<(std::ostream& out, const wchar_t* wstr) { - if (!wstr || !wstr[0]) - return out; - - // compute the length of the buffer we'll need - int charcount = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, - NULL, 0, NULL, NULL); - if (charcount == 0) - return out; - - // convert - scoped_array<char> buf(new char[charcount]); - WideCharToMultiByte(CP_UTF8, 0, wstr, -1, buf.get(), charcount, NULL, NULL); - return out << buf.get(); + return out << base::SysWideToUTF8(std::wstring(wstr)); } diff --git a/base/sys_string_conversions.h b/base/sys_string_conversions.h new file mode 100644 index 0000000..52317ab --- /dev/null +++ b/base/sys_string_conversions.h @@ -0,0 +1,67 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef BASE_SYS_STRING_CONVERSIONS_H_ +#define BASE_SYS_STRING_CONVERSIONS_H_ + +// Provides system-dependent string type conversions for cases where it's +// necessary to not use ICU. Generally, you should not need this in Chrome, +// but it is used in some shared code. Dependencies should be minimal. + +#include <string> +#include "base/basictypes.h" + +namespace base { + +// Converts between wide and UTF-8 representations of a string. On error, the +// result is system-dependent. +std::string SysWideToUTF8(const std::wstring& wide); +std::wstring SysUTF8ToWide(const std::string& utf8); + +// Converts between wide and the system multi-byte representations of a string. +// DANGER: This will lose information and can change (on Windows, this can +// change between reboots), +std::string SysWideToNativeMB(const std::wstring& wide); +std::wstring SysNativeMBToWide(const std::string& native_mb); + +// Windows-specific ------------------------------------------------------------ + +#ifdef WIN32 + +// Converts between an 8-bit string into a wide string, using the given +// code page. The code page identifier is one accepted by the Windows function +// MultiByteToWideChar(). +std::wstring SysMultiByteToWide(const std::string& mb, uint32 code_page); +std::string SysWideToMultiByte(const std::wstring& wide, uint32 code_page); + +#endif // WIN32 + +} // namespace base + +#endif // BASE_SYS_STRING_CONVERSIONS_H_ diff --git a/base/sys_string_conversions_mac.cc b/base/sys_string_conversions_mac.cc new file mode 100644 index 0000000..e752a60 --- /dev/null +++ b/base/sys_string_conversions_mac.cc @@ -0,0 +1,156 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/sys_strings.h" + +#include <CoreFoundation/CoreFoundation.h> + +namespace base { + +namespace { + +// Convert the supplied cfsring into the specified encoding, and return it as +// an STL string of the template type. Returns an empty string on failure. +// +// Do not assert in this function since it is used by the asssertion code! +template<typename StringType> +static StringType CFStringToSTLStringWithEncodingT(CFStringRef cfstring, + CFStringEncoding encoding) { + CFIndex length = CFStringGetLength(cfstring); + if (length == 0) + return StringType(); + + CFRange whole_string = CFRangeMake(0, length); + CFIndex out_size; + CFIndex converted = CFStringGetBytes(cfstring, + whole_string, + encoding, + 0, // lossByte + false, // isExternalRepresentation + NULL, // buffer + 0, // maxBufLen + &out_size); + DCHECK(converted != 0 && out_size != 0); + if (converted == 0 || out_size == 0) + return StringType(); + + // out_size is the number of UInt8-sized units needed in the destination. + // A buffer allocated as UInt8 units might not be properly aligned to + // contain elements of StringType::value_type. Use a container for the + // proper value_type, and convert out_size by figuring the number of + // value_type elements per UInt8. Leave room for a NUL terminator. + typename StringType::size_type elements = + out_size * sizeof(UInt8) / sizeof(typename StringType::value_type) + 1; + + std::vector<typename StringType::value_type> out_buffer(elements); + converted = CFStringGetBytes(cfstring, + whole_string, + encoding, + 0, // lossByte + false, // isExternalRepresentation + reinterpret_cast<UInt8*>(&out_buffer[0]), + out_size, + NULL); // usedBufLen + if (converted == 0) + return StringType(); + + out_buffer[elements - 1] = '\0'; + return StringType(&out_buffer[0]); +} + +// Given an STL string |in| with an encoding specified by |in_encoding|, +// convert it to |out_encoding| and return it as an STL string of the +// |OutStringType| template type. Returns an empty string on failure. +// +// Do not assert in this function since it is used by the asssertion code! +template<typename OutStringType, typename InStringType> +static OutStringType STLStringToSTLStringWithEncodingsT( + const InStringType& in, + CFStringEncoding in_encoding, + CFStringEncoding out_encoding) { + typename InStringType::size_type in_length = in.length(); + if (in_length == 0) + return OutStringType(); + + scoped_cftyperef<CFStringRef> cfstring( + CFStringCreateWithBytesNoCopy(NULL, + reinterpret_cast<const UInt8*>(in.c_str()), + in_length * + sizeof(typename InStringType::value_type), + in_encoding, + false, + kCFAllocatorNull)); + DCHECK(cfstring); + if (!cfstring) + return OutStringType(); + + return CFStringToSTLStringWithEncodingT<OutStringType>(cfstring, + out_encoding); +} + +// Specify the byte ordering explicitly, otherwise CFString will be confused +// when strings don't carry BOMs, as they typically won't. +static const CFStringEncoding kNarrowStringEncoding = kCFStringEncodingUTF8; +#ifdef __BIG_ENDIAN__ +#if defined(__WCHAR_MAX__) && __WCHAR_MAX__ == 0xffff +static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF16BE; +#else // __WCHAR_MAX__ +static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32BE; +#endif // __WCHAR_MAX__ +#else // __BIG_ENDIAN__ +#if defined(__WCHAR_MAX__) && __WCHAR_MAX__ == 0xffff +static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF16LE; +#else // __WCHAR_MAX__ +static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32LE; +#endif // __WCHAR_MAX__ +#endif // __BIG_ENDIAN__ + +} // namespace + +// Do not assert in this function since it is used by the asssertion code! +std::string SysWideToUTF8(const std::wstring& wide) { + return STLStringToSTLStringWithEncodingsT<std::string>( + wide, kWideStringEncoding, kNarrowStringEncoding); +} + +// Do not assert in this function since it is used by the asssertion code! +std::wstring SysUTF8ToWide(const std::string& utf8) { + return STLStringToSTLStringWithEncodingsT<std::wstring>( + utf8, kNarrowStringEncoding, kWideStringEncoding); +} + +std::string SysWideToNativeMB(const std::wstring& wide) { + return WideToUTF8(wide); +} + +std::wstring SysNativeMBToWide(const std::string& native_mb) { + return UTF8ToWide(native_mb); +} + +} // namespace base diff --git a/base/sys_string_conversions_win.cc b/base/sys_string_conversions_win.cc new file mode 100644 index 0000000..c736549 --- /dev/null +++ b/base/sys_string_conversions_win.cc @@ -0,0 +1,93 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/sys_string_conversions.h" + +#include <windows.h> + +namespace base { + +// Do not assert in this function since it is used by the asssertion code! +std::string SysWideToUTF8(const std::wstring& wide) { + return SysWideToMultiByte(wide, CP_UTF8); +} + +// Do not assert in this function since it is used by the asssertion code! +std::wstring SysUTF8ToWide(const std::string& utf8) { + return SysMultiByteToWide(utf8, CP_UTF8); +} + +std::string SysWideToNativeMB(const std::wstring& wide) { + return SysWideToMultiByte(wide, CP_ACP); +} + +std::wstring SysNativeMBToWide(const std::string& native_mb) { + return SysMultiByteToWide(native_mb, CP_ACP); +} + +// Do not assert in this function since it is used by the asssertion code! +std::wstring SysMultiByteToWide(const std::string& mb, uint32 code_page) { + int mb_length = static_cast<int>(mb.length()); + if (mb_length == 0) + return std::wstring(); + + // Compute the length of the buffer. + int charcount = MultiByteToWideChar(code_page, 0, + mb.data(), mb_length, NULL, 0); + if (charcount == 0) + return std::wstring(); + + std::wstring wide; + wide.resize(charcount); + MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount); + + return wide; +} + +// Do not assert in this function since it is used by the asssertion code! +std::string SysWideToMultiByte(const std::wstring& wide, uint32 code_page) { + int wide_length = static_cast<int>(wide.length()); + if (wide_length == 0) + return std::string(); + + // Compute the length of the buffer we'll need. + int charcount = WideCharToMultiByte(code_page, 0, wide.data(), wide_length, + NULL, 0, NULL, NULL); + if (charcount == 0) + return std::string(); + + std::string mb; + mb.resize(charcount); + WideCharToMultiByte(code_page, 0, wide.data(), wide_length, + &mb[0], charcount, NULL, NULL); + + return mb; +} + +} // namespace base diff --git a/base/sys_string_conversions_win_unittest.cc b/base/sys_string_conversions_win_unittest.cc new file mode 100644 index 0000000..3a5b9ad7 --- /dev/null +++ b/base/sys_string_conversions_win_unittest.cc @@ -0,0 +1,80 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/sys_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Apparently Windows doesn't have constants for these. +static const int kCpLatin1 = 850; +static const int kCpBig5 = 950; + +TEST(SysStringsWin, SysWideToUTF8) { + using base::SysWideToUTF8; + EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); + EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); + EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(L"\xd800\xdf00")); // >16 bits + + // Error case. When Windows finds a UTF-16 character going off the end of + // a string, it just converts that literal value to UTF-8, even though this + // is invalid. + EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", SysWideToUTF8(L"\x4f60\xd800zyxw")); + + // Test embedded NULLs. + std::wstring wide_null(L"a"); + wide_null.push_back(0); + wide_null.push_back('b'); + + std::string expected_null("a"); + expected_null.push_back(0); + expected_null.push_back('b'); + + EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); +} + +TEST(SysStringsWin, SysUTF8ToWide) { + using base::SysUTF8ToWide; + EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); + EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); + EXPECT_EQ(L"\xd800\xdf00", SysUTF8ToWide("\xF0\x90\x8C\x80")); // >16 bits + + // Error case. When Windows finds an invalid UTF-8 character, it just skips + // it. This seems weird because it's inconsistent with the reverse conversion. + EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); + + // Test embedded NULLs. + std::string utf8_null("a"); + utf8_null.push_back(0); + utf8_null.push_back('b'); + + std::wstring expected_null(L"a"); + expected_null.push_back(0); + expected_null.push_back('b'); + + EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); +} |