From d13701900a71240fe1b4074b778133ba9a3461e9 Mon Sep 17 00:00:00 2001 From: "mmentovai@google.com" Date: Wed, 27 Aug 2008 20:57:35 +0000 Subject: Move std::string16 to base::string16. Don't pollute the std namespace. Don't assume that all string types can be represented as std::basic_string. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1464 0039d316-1c4b-4281-b951-d872f2087c98 --- base/string16.h | 156 ++++++++++++++++++++------------------------------------ 1 file changed, 55 insertions(+), 101 deletions(-) (limited to 'base/string16.h') diff --git a/base/string16.h b/base/string16.h index 25725af..f545286 100644 --- a/base/string16.h +++ b/base/string16.h @@ -6,112 +6,62 @@ #define BASE_STRING16_H_ // WHAT: -// A version of std::basic_string that works even on Linux when 2-byte wchar_t -// values (-fshort-wchar) are used. You can access this class as std::string16. -// We also define char16, which std::string16 is based upon. +// A version of std::basic_string that provides 2-byte characters even when +// wchar_t is not implemented as a 2-byte type. You can access this class as +// string16. We also define char16, which string16 is based upon. // // WHY: -// Firefox uses 2-byte wide characters (UTF-16). On Windows, this is -// mostly compatible with wchar_t, which is 2 bytes (UCS2). +// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 +// data. Plenty of existing code operates on strings encoded as UTF-16. // -// On Linux, sizeof(wchar_t) is 4 bytes by default. We can make it 2 bytes -// using the GCC flag -fshort-wchar. But then std::wstring fails at run time, -// because it calls some functions (like wcslen) that come from glibc -- which -// was built with a 4-byte wchar_t! +// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make +// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails +// at run time, because it calls some functions (like wcslen) that come from +// the system's native C library -- which was built with a 4-byte wchar_t! +// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's +// entirely improper on those systems where the encoding of wchar_t is defined +// as UTF-32. // -// So we define std::string16, which is similar to std::wstring but replaces -// all glibc functions with custom, 2-byte-char compatible routines. Fortuntely -// for us, std::wstring uses mostly *inline* wchar_t-based functions (like -// wmemcmp) that are defined in .h files and do not need to be overridden. +// Here, we define string16, which is similar to std::wstring but replaces all +// libc functions with custom, 2-byte-char compatible routines. It is capable +// of carrying UTF-16-encoded data. #include -#include "build/build_config.h" +#include "base/basictypes.h" -#ifdef WCHAR_T_IS_UTF16 +#if defined(WCHAR_T_IS_UTF16) typedef wchar_t char16; +typedef std::wstring string16; -namespace std { -typedef wstring string16; -} +#elif defined(WCHAR_T_IS_UTF32) -#else // !WCHAR_T_IS_UTF16 +typedef uint16 char16; -typedef unsigned short char16; +namespace base { -namespace std { -typedef basic_string string16; -} - - -// Define char16 versions of functions required below in char_traits -extern "C" { - -inline char16 *char16_wmemmove(char16 *s1, const char16 *s2, size_t n) { - return reinterpret_cast(memmove(s1, s2, n * sizeof(char16))); -} - -inline char16 *char16_wmemcpy(char16 *s1, const char16 *s2, size_t n) { - return reinterpret_cast(memcpy(s1, s2, n * sizeof(char16))); -} - -inline int char16_wmemcmp(const char16 *s1, const char16 *s2, size_t n) { - // We cannot call memcmp because that changes the semantics. - while (n > 0) { - if (*s1 != *s2) { - // We cannot use (*s1 - *s2) because char16 is unsigned. - return ((*s1 < *s2) ? -1 : 1); - } - ++s1; - ++s2; - --n; - } - return 0; -} - -inline const char16 *char16_wmemchr(const char16 *s, char16 c, size_t n) { - while (n > 0) { - if (*s == c) { - return s; - } - ++s; - --n; - } - return 0; -} - -inline char16 *char16_wmemset(char16 *s, char16 c, size_t n) { - char16 *s_orig = s; - while (n > 0) { - *s = c; - ++s; - --n; - } - return s_orig; -} - -inline size_t char16_wcslen(const char16 *s) { - const char16 *s_orig = s; - while (*s) - ++s; - return (s - s_orig); -} - -} // extern "C" +// char16 versions of the functions required by string16_char_traits; these +// are based on the wide character functions of similar names ("w" or "wcs" +// instead of "c16"). +int c16memcmp(const char16* s1, const char16* s2, size_t n); +size_t c16len(const char16* s); +const char16* c16memchr(const char16* s, char16 c, size_t n); +char16* c16memmove(char16* s1, const char16* s2, size_t n); +char16* c16memcpy(char16* s1, const char16* s2, size_t n); +char16* c16memset(char16* s, char16 c, size_t n); +struct string16_char_traits { + typedef char16 char_type; + typedef int int_type; -// Definition of char_traits, which enables basic_string -// -// This is a slightly modified version of char_traits from gcc 3.2.2 -namespace std { + // int_type needs to be able to hold each possible value of char_type, and in + // addition, the distinct value of eof(). + COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width); -template<> struct char_traits { - typedef char16 char_type; - typedef wint_t int_type; - typedef streamoff off_type; - typedef wstreampos pos_type; + typedef std::streamoff off_type; typedef mbstate_t state_type; + typedef std::fpos pos_type; static void assign(char_type& c1, const char_type& c2) { c1 = c2; @@ -125,51 +75,55 @@ template<> struct char_traits { } static int compare(const char_type* s1, const char_type* s2, size_t n) { - return char16_wmemcmp(s1, s2, n); + return c16memcmp(s1, s2, n); } static size_t length(const char_type* s) { - return char16_wcslen(s); + return c16len(s); } static const char_type* find(const char_type* s, size_t n, const char_type& a) { - return char16_wmemchr(s, a, n); + return c16memchr(s, a, n); } static char_type* move(char_type* s1, const char_type* s2, int_type n) { - return char16_wmemmove(s1, s2, n); + return c16memmove(s1, s2, n); } static char_type* copy(char_type* s1, const char_type* s2, size_t n) { - return char16_wmemcpy(s1, s2, n); + return c16memcpy(s1, s2, n); } static char_type* assign(char_type* s, size_t n, char_type a) { - return char16_wmemset(s, a, n); + return c16memset(s, a, n); + } + + static int_type not_eof(const int_type& c) { + return eq_int_type(c, eof()) ? 0 : c; } static char_type to_char_type(const int_type& c) { return char_type(c); } + static int_type to_int_type(const char_type& c) { return int_type(c); } + static bool eq_int_type(const int_type& c1, const int_type& c2) { return c1 == c2; } static int_type eof() { - return static_cast(WEOF); - } - static int_type not_eof(const int_type& c) { - return eq_int_type(c, eof()) ? 0 : c; + return static_cast(EOF); } }; -} // namespace std +} // namespace base -#endif // !WCHAR_T_IS_UTF16 +typedef std::basic_string string16; -#endif // BASE_STRING16_H_ +#endif // WCHAR_T_IS_UTF32 +#endif // BASE_STRING16_H_ -- cgit v1.1