diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-13 19:27:48 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-13 19:27:48 +0000 |
commit | b9f9383602610ef1f94e9bee2d91d7c044e16051 (patch) | |
tree | 8f16f6ff2f2b40bd69cae63fb6ae517ad9d0e35b /base/utf_offset_string_conversions.cc | |
parent | 7bc3ca67f1395c1744cb09ac50ff9ddf92f66ab7 (diff) | |
download | chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.zip chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.tar.gz chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.tar.bz2 |
Split *AndAdjustOffset() functions into their own header, to restore utf_string_conversions.h to a simple, readable state.
BUG=4010
TEST=none
Review URL: http://codereview.chromium.org/387012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31928 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/utf_offset_string_conversions.cc')
-rw-r--r-- | base/utf_offset_string_conversions.cc | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/base/utf_offset_string_conversions.cc b/base/utf_offset_string_conversions.cc new file mode 100644 index 0000000..69b572e --- /dev/null +++ b/base/utf_offset_string_conversions.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/utf_offset_string_conversions.h" + +#include "base/string_piece.h" +#include "base/utf_string_conversion_utils.h" + +using base::PrepareForUTF16Or32Output; +using base::ReadUnicodeCharacter; +using base::WriteUnicodeCharacter; + +// Generalized Unicode converter ----------------------------------------------- + +// Converts the given source Unicode character type to the given destination +// Unicode character type as a STL string. The given input buffer and size +// determine the source, and the given output STL string will be replaced by +// the result. +template<typename SRC_CHAR> +bool ConvertUnicode(const SRC_CHAR* src, + size_t src_len, + std::wstring* output, + size_t* offset_for_adjustment) { + size_t output_offset = + (offset_for_adjustment && *offset_for_adjustment < src_len) ? + *offset_for_adjustment : std::wstring::npos; + + // ICU requires 32-bit numbers. + bool success = true; + int32 src_len32 = static_cast<int32>(src_len); + for (int32 i = 0; i < src_len32; i++) { + uint32 code_point; + size_t original_i = i; + size_t chars_written = 0; + if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { + chars_written = WriteUnicodeCharacter(code_point, output); + } else { + // TODO(jungshik): consider adding 'Replacement character' (U+FFFD) + // in place of an invalid codepoint. + success = false; + } + if ((output_offset != std::wstring::npos) && + (*offset_for_adjustment > original_i)) { + // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last + // character read, not after it (so that incrementing it in the loop + // increment will place it at the right location), so we need to account + // for that in determining the amount that was read. + if (*offset_for_adjustment <= static_cast<size_t>(i)) + output_offset = std::wstring::npos; + else + output_offset += chars_written - (i - original_i + 1); + } + } + + if (offset_for_adjustment) + *offset_for_adjustment = output_offset; + return success; +} + +// UTF-8 <-> Wide -------------------------------------------------------------- + +bool UTF8ToWideAndAdjustOffset(const char* src, + size_t src_len, + std::wstring* output, + size_t* offset_for_adjustment) { + PrepareForUTF16Or32Output(src, src_len, output); + return ConvertUnicode(src, src_len, output, offset_for_adjustment); +} + +std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, + size_t* offset_for_adjustment) { + std::wstring ret; + UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret, + offset_for_adjustment); + return ret; +} + +// UTF-16 <-> Wide ------------------------------------------------------------- + +#if defined(WCHAR_T_IS_UTF16) + +// When wide == UTF-16, then conversions are a NOP. +bool UTF16ToWideAndAdjustOffset(const char16* src, + size_t src_len, + std::wstring* output, + size_t* offset_for_adjustment) { + output->assign(src, src_len); + if (offset_for_adjustment && (*offset_for_adjustment >= src_len)) + *offset_for_adjustment = std::wstring::npos; + return true; +} + +std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, + size_t* offset_for_adjustment) { + if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length())) + *offset_for_adjustment = std::wstring::npos; + return utf16; +} + +#elif defined(WCHAR_T_IS_UTF32) + +bool UTF16ToWideAndAdjustOffset(const char16* src, + size_t src_len, + std::wstring* output, + size_t* offset_for_adjustment) { + output->clear(); + // Assume that normally we won't have any non-BMP characters so the counts + // will be the same. + output->reserve(src_len); + return ConvertUnicode(src, src_len, output, offset_for_adjustment); +} + +std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, + size_t* offset_for_adjustment) { + std::wstring ret; + UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret, + offset_for_adjustment); + return ret; +} + +#endif // defined(WCHAR_T_IS_UTF32) |