From d7cae12696b96500c05dd2d430f6238922c20c96 Mon Sep 17 00:00:00 2001 From: "initial.commit" Date: Sat, 26 Jul 2008 21:49:38 +0000 Subject: Add base to the repository. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@8 0039d316-1c4b-4281-b951-d872f2087c98 --- base/string_util_icu.cc | 201 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 base/string_util_icu.cc (limited to 'base/string_util_icu.cc') diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc new file mode 100644 index 0000000..797ccbd --- /dev/null +++ b/base/string_util_icu.cc @@ -0,0 +1,201 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "base/string_util.h" + +#include +#include + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/singleton.h" +#include "unicode/ucnv.h" +#include "unicode/numfmt.h" +#include "unicode/ustring.h" + +// Codepage <-> Wide ----------------------------------------------------------- + +// Convert a unicode string into the specified codepage_name. If the codepage +// isn't found, return false. +bool WideToCodepage(const std::wstring& wide, + const char* codepage_name, + OnStringUtilConversionError::Type on_error, + std::string* encoded) { + encoded->clear(); + + UErrorCode status = U_ZERO_ERROR; + UConverter* converter = ucnv_open(codepage_name, &status); + if (!U_SUCCESS(status)) + return false; + + const UChar* uchar_src; + int uchar_len; +#ifdef U_WCHAR_IS_UTF16 + uchar_src = wide.c_str(); + uchar_len = static_cast(wide.length()); +#else // U_WCHAR_IS_UTF16 + // When wchar_t is wider than UChar (16 bits), transform |wide| into a + // UChar* string. Size the UChar* buffer to be large enough to hold twice + // as many UTF-16 code points as there are UCS-4 characters, in case each + // character translates to a UTF-16 surrogate pair, and leave room for a NUL + // terminator. + std::vector wide_uchar(wide.length() * 2 + 1); + u_strFromWCS(&wide_uchar[0], wide_uchar.size(), &uchar_len, + wide.c_str(), wide.length(), &status); + uchar_src = &wide_uchar[0]; + DCHECK(U_SUCCESS(status)) << "failed to convert wstring to UChar*"; +#endif // U_WCHAR_IS_UTF16 + + int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(uchar_len, + ucnv_getMaxCharSize(converter)); + encoded->resize(encoded_max_length); + + // Setup our error handler. + switch (on_error) { + case OnStringUtilConversionError::FAIL: + ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0, + NULL, NULL, &status); + break; + case OnStringUtilConversionError::SKIP: + ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0, + NULL, NULL, &status); + break; + default: + NOTREACHED(); + } + + // ucnv_fromUChars returns size not including terminating null + int actual_size = ucnv_fromUChars(converter, &(*encoded)[0], + encoded_max_length, uchar_src, uchar_len, &status); + encoded->resize(actual_size); + ucnv_close(converter); + if (U_SUCCESS(status)) + return true; + encoded->clear(); // Make sure the output is empty on error. + return false; +} + +// Converts a string of the given codepage into unicode. +// If the codepage isn't found, return false. +bool CodepageToWide(const std::string& encoded, + const char* codepage_name, + OnStringUtilConversionError::Type on_error, + std::wstring* wide) { + wide->clear(); + + UErrorCode status = U_ZERO_ERROR; + UConverter* converter = ucnv_open(codepage_name, &status); + if (!U_SUCCESS(status)) + return false; + + // The worst case is all the input characters are non-BMP (32-bit) ones. + size_t uchar_max_length = encoded.length() * 2 + 1; + + UChar* uchar_dst; +#ifdef U_WCHAR_IS_UTF16 + uchar_dst = WriteInto(wide, uchar_max_length); +#else + // When wchar_t is wider than UChar (16 bits), convert into a temporary + // UChar* buffer. + std::vector wide_uchar(uchar_max_length); + uchar_dst = &wide_uchar[0]; +#endif // U_WCHAR_IS_UTF16 + + // Setup our error handler. + switch (on_error) { + case OnStringUtilConversionError::FAIL: + ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, 0, + NULL, NULL, &status); + break; + case OnStringUtilConversionError::SKIP: + ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, 0, + NULL, NULL, &status); + break; + default: + NOTREACHED(); + } + + int actual_size = ucnv_toUChars(converter, + uchar_dst, + static_cast(uchar_max_length), + encoded.data(), + static_cast(encoded.length()), + &status); + ucnv_close(converter); + if (!U_SUCCESS(status)) { + wide->clear(); // Make sure the output is empty on error. + return false; + } + +#ifndef U_WCHAR_IS_UTF16 + // When wchar_t is wider than UChar (16 bits), it's not possible to wind up + // with any more wchar_t elements than UChar elements. ucnv_toUChars + // returns the number of UChar elements not including the NUL terminator, so + // leave extra room for that. + u_strToWCS(WriteInto(wide, actual_size + 1), actual_size + 1, &actual_size, + uchar_dst, actual_size, &status); + DCHECK(U_SUCCESS(status)) << "failed to convert UChar* to wstring"; +#endif // U_WCHAR_IS_UTF16 + + wide->resize(actual_size); + return true; +} + +// Number formatting ----------------------------------------------------------- + +// TODO: http://b/id=1092584 Come up with a portable pthread_once, and use +// that to keep a singleton instead of putting it in the platform-dependent +// file. +NumberFormat* NumberFormatSingleton(); + +std::wstring FormatNumber(int64 number) { + NumberFormat* number_format = NumberFormatSingleton(); + if (!number_format) { + // As a fallback, just return the raw number in a string. + return StringPrintf(L"%lld", number); + } + UnicodeString ustr; + number_format->format(number, ustr); + +#ifdef U_WCHAR_IS_UTF16 + return std::wstring(ustr.getBuffer(), + static_cast(ustr.length())); +#else // U_WCHAR_IS_UTF16 + wchar_t buffer[64]; // A int64 is less than 20 chars long, so 64 chars + // leaves plenty of room for formating stuff. + int length = 0; + UErrorCode error = U_ZERO_ERROR; + u_strToWCS(buffer, 64, &length, ustr.getBuffer(), ustr.length() , &error); + if (U_FAILURE(error)) { + NOTREACHED(); + // As a fallback, just return the raw number in a string. + return StringPrintf(L"%lld", number); + } + return std::wstring(buffer, static_cast(length)); +#endif // U_WCHAR_IS_UTF16 +} -- cgit v1.1