summaryrefslogtreecommitdiffstats
path: root/base/string16.h
diff options
context:
space:
mode:
authormmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-08-27 20:57:35 +0000
committermmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-08-27 20:57:35 +0000
commitd13701900a71240fe1b4074b778133ba9a3461e9 (patch)
tree3ca3fd78ec07a47dcc2cad83defb977b0ad61e81 /base/string16.h
parentb8c87bb8ea9f514a8f09a0f2154f8469ba7faf00 (diff)
downloadchromium_src-d13701900a71240fe1b4074b778133ba9a3461e9.zip
chromium_src-d13701900a71240fe1b4074b778133ba9a3461e9.tar.gz
chromium_src-d13701900a71240fe1b4074b778133ba9a3461e9.tar.bz2
Move std::string16 to base::string16. Don't pollute the std namespace. Don't
assume that all string types can be represented as std::basic_string<CHAR>. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1464 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/string16.h')
-rw-r--r--base/string16.h156
1 files changed, 55 insertions, 101 deletions
diff --git a/base/string16.h b/base/string16.h
index 25725af..f545286 100644
--- a/base/string16.h
+++ b/base/string16.h
@@ -6,112 +6,62 @@
#define BASE_STRING16_H_
// WHAT:
-// A version of std::basic_string that works even on Linux when 2-byte wchar_t
-// values (-fshort-wchar) are used. You can access this class as std::string16.
-// We also define char16, which std::string16 is based upon.
+// A version of std::basic_string that provides 2-byte characters even when
+// wchar_t is not implemented as a 2-byte type. You can access this class as
+// string16. We also define char16, which string16 is based upon.
//
// WHY:
-// Firefox uses 2-byte wide characters (UTF-16). On Windows, this is
-// mostly compatible with wchar_t, which is 2 bytes (UCS2).
+// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
+// data. Plenty of existing code operates on strings encoded as UTF-16.
//
-// On Linux, sizeof(wchar_t) is 4 bytes by default. We can make it 2 bytes
-// using the GCC flag -fshort-wchar. But then std::wstring fails at run time,
-// because it calls some functions (like wcslen) that come from glibc -- which
-// was built with a 4-byte wchar_t!
+// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
+// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
+// at run time, because it calls some functions (like wcslen) that come from
+// the system's native C library -- which was built with a 4-byte wchar_t!
+// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
+// entirely improper on those systems where the encoding of wchar_t is defined
+// as UTF-32.
//
-// So we define std::string16, which is similar to std::wstring but replaces
-// all glibc functions with custom, 2-byte-char compatible routines. Fortuntely
-// for us, std::wstring uses mostly *inline* wchar_t-based functions (like
-// wmemcmp) that are defined in .h files and do not need to be overridden.
+// Here, we define string16, which is similar to std::wstring but replaces all
+// libc functions with custom, 2-byte-char compatible routines. It is capable
+// of carrying UTF-16-encoded data.
#include <string>
-#include "build/build_config.h"
+#include "base/basictypes.h"
-#ifdef WCHAR_T_IS_UTF16
+#if defined(WCHAR_T_IS_UTF16)
typedef wchar_t char16;
+typedef std::wstring string16;
-namespace std {
-typedef wstring string16;
-}
+#elif defined(WCHAR_T_IS_UTF32)
-#else // !WCHAR_T_IS_UTF16
+typedef uint16 char16;
-typedef unsigned short char16;
+namespace base {
-namespace std {
-typedef basic_string<char16> string16;
-}
-
-
-// Define char16 versions of functions required below in char_traits<char16>
-extern "C" {
-
-inline char16 *char16_wmemmove(char16 *s1, const char16 *s2, size_t n) {
- return reinterpret_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
-}
-
-inline char16 *char16_wmemcpy(char16 *s1, const char16 *s2, size_t n) {
- return reinterpret_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
-}
-
-inline int char16_wmemcmp(const char16 *s1, const char16 *s2, size_t n) {
- // We cannot call memcmp because that changes the semantics.
- while (n > 0) {
- if (*s1 != *s2) {
- // We cannot use (*s1 - *s2) because char16 is unsigned.
- return ((*s1 < *s2) ? -1 : 1);
- }
- ++s1;
- ++s2;
- --n;
- }
- return 0;
-}
-
-inline const char16 *char16_wmemchr(const char16 *s, char16 c, size_t n) {
- while (n > 0) {
- if (*s == c) {
- return s;
- }
- ++s;
- --n;
- }
- return 0;
-}
-
-inline char16 *char16_wmemset(char16 *s, char16 c, size_t n) {
- char16 *s_orig = s;
- while (n > 0) {
- *s = c;
- ++s;
- --n;
- }
- return s_orig;
-}
-
-inline size_t char16_wcslen(const char16 *s) {
- const char16 *s_orig = s;
- while (*s)
- ++s;
- return (s - s_orig);
-}
-
-} // extern "C"
+// char16 versions of the functions required by string16_char_traits; these
+// are based on the wide character functions of similar names ("w" or "wcs"
+// instead of "c16").
+int c16memcmp(const char16* s1, const char16* s2, size_t n);
+size_t c16len(const char16* s);
+const char16* c16memchr(const char16* s, char16 c, size_t n);
+char16* c16memmove(char16* s1, const char16* s2, size_t n);
+char16* c16memcpy(char16* s1, const char16* s2, size_t n);
+char16* c16memset(char16* s, char16 c, size_t n);
+struct string16_char_traits {
+ typedef char16 char_type;
+ typedef int int_type;
-// Definition of char_traits<char16>, which enables basic_string<char16>
-//
-// This is a slightly modified version of char_traits<wchar_t> from gcc 3.2.2
-namespace std {
+ // int_type needs to be able to hold each possible value of char_type, and in
+ // addition, the distinct value of eof().
+ COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width);
-template<> struct char_traits<char16> {
- typedef char16 char_type;
- typedef wint_t int_type;
- typedef streamoff off_type;
- typedef wstreampos pos_type;
+ typedef std::streamoff off_type;
typedef mbstate_t state_type;
+ typedef std::fpos<state_type> pos_type;
static void assign(char_type& c1, const char_type& c2) {
c1 = c2;
@@ -125,51 +75,55 @@ template<> struct char_traits<char16> {
}
static int compare(const char_type* s1, const char_type* s2, size_t n) {
- return char16_wmemcmp(s1, s2, n);
+ return c16memcmp(s1, s2, n);
}
static size_t length(const char_type* s) {
- return char16_wcslen(s);
+ return c16len(s);
}
static const char_type* find(const char_type* s, size_t n,
const char_type& a) {
- return char16_wmemchr(s, a, n);
+ return c16memchr(s, a, n);
}
static char_type* move(char_type* s1, const char_type* s2, int_type n) {
- return char16_wmemmove(s1, s2, n);
+ return c16memmove(s1, s2, n);
}
static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
- return char16_wmemcpy(s1, s2, n);
+ return c16memcpy(s1, s2, n);
}
static char_type* assign(char_type* s, size_t n, char_type a) {
- return char16_wmemset(s, a, n);
+ return c16memset(s, a, n);
+ }
+
+ static int_type not_eof(const int_type& c) {
+ return eq_int_type(c, eof()) ? 0 : c;
}
static char_type to_char_type(const int_type& c) {
return char_type(c);
}
+
static int_type to_int_type(const char_type& c) {
return int_type(c);
}
+
static bool eq_int_type(const int_type& c1, const int_type& c2) {
return c1 == c2;
}
static int_type eof() {
- return static_cast<int_type>(WEOF);
- }
- static int_type not_eof(const int_type& c) {
- return eq_int_type(c, eof()) ? 0 : c;
+ return static_cast<int_type>(EOF);
}
};
-} // namespace std
+} // namespace base
-#endif // !WCHAR_T_IS_UTF16
+typedef std::basic_string<char16, base::string16_char_traits> string16;
-#endif // BASE_STRING16_H_
+#endif // WCHAR_T_IS_UTF32
+#endif // BASE_STRING16_H_