diff options
author | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-13 19:27:48 +0000 |
---|---|---|
committer | pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-11-13 19:27:48 +0000 |
commit | b9f9383602610ef1f94e9bee2d91d7c044e16051 (patch) | |
tree | 8f16f6ff2f2b40bd69cae63fb6ae517ad9d0e35b /base/utf_string_conversion_utils.h | |
parent | 7bc3ca67f1395c1744cb09ac50ff9ddf92f66ab7 (diff) | |
download | chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.zip chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.tar.gz chromium_src-b9f9383602610ef1f94e9bee2d91d7c044e16051.tar.bz2 |
Split *AndAdjustOffset() functions into their own header, to restore utf_string_conversions.h to a simple, readable state.
BUG=4010
TEST=none
Review URL: http://codereview.chromium.org/387012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31928 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/utf_string_conversion_utils.h')
-rw-r--r-- | base/utf_string_conversion_utils.h | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/base/utf_string_conversion_utils.h b/base/utf_string_conversion_utils.h new file mode 100644 index 0000000..a8a76c5 --- /dev/null +++ b/base/utf_string_conversion_utils.h @@ -0,0 +1,86 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_UTF_STRING_CONVERSION_UTILS_H_ +#define BASE_UTF_STRING_CONVERSION_UTILS_H_ + +// This should only be used by the various UTF string conversion files. + +#include "base/string16.h" + +namespace base { + +inline bool IsValidCodepoint(uint32 code_point) { + // Excludes the surrogate code points ([0xD800, 0xDFFF]) and + // codepoints larger than 0x10FFFF (the highest codepoint allowed). + // Non-characters and unassigned codepoints are allowed. + return code_point < 0xD800u || + (code_point >= 0xE000u && code_point <= 0x10FFFFu); +} + +// ReadUnicodeCharacter -------------------------------------------------------- + +// Reads a UTF-8 stream, placing the next code point into the given output +// |*code_point|. |src| represents the entire string to read, and |*char_index| +// is the character offset within the string to start reading at. |*char_index| +// will be updated to index the last character read, such that incrementing it +// (as in a for loop) will take the reader to the next character. +// +// Returns true on success. On false, |*code_point| will be invalid. +bool ReadUnicodeCharacter(const char* src, + int32 src_len, + int32* char_index, + uint32* code_point_out); + +// Reads a UTF-16 character. The usage is the same as the 8-bit version above. +bool ReadUnicodeCharacter(const char16* src, + int32 src_len, + int32* char_index, + uint32* code_point); + +#if defined(WCHAR_T_IS_UTF32) +// Reads UTF-32 character. The usage is the same as the 8-bit version above. +bool ReadUnicodeCharacter(const wchar_t* src, + int32 src_len, + int32* char_index, + uint32* code_point); +#endif // defined(WCHAR_T_IS_UTF32) + +// WriteUnicodeCharacter ------------------------------------------------------- + +// Appends a UTF-8 character to the given 8-bit string. Returns the number of +// bytes written. +size_t WriteUnicodeCharacter(uint32 code_point, std::string* output); + +// Appends the given code point as a UTF-16 character to the given 16-bit +// string. Returns the number of 16-bit values written. +size_t WriteUnicodeCharacter(uint32 code_point, string16* output); + +#if defined(WCHAR_T_IS_UTF32) +// Appends the given UTF-32 character to the given 32-bit string. Returns the +// number of 32-bit values written. +inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) { + // This is the easy case, just append the character. + output->push_back(code_point); + return 1; +} +#endif // defined(WCHAR_T_IS_UTF32) + +// Generalized Unicode converter ----------------------------------------------- + +// Guesses the length of the output in UTF-8 in bytes, clears that output +// string, and reserves that amount of space. We assume that the input +// character types are unsigned, which will be true for UTF-16 and -32 on our +// systems. +template<typename CHAR> +void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); + +// Prepares an output buffer (containing either UTF-16 or -32 data) given some +// UTF-8 input that will be converted to it. See PrepareForUTF8Output(). +template<typename STRING> +void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); + +} // namespace base + +#endif // BASE_UTF_STRING_CONVERSION_UTILS_H_ |