diff options
Diffstat (limited to 'base')
-rw-r--r-- | base/base.gyp | 6 | ||||
-rw-r--r-- | base/base.gypi | 20 | ||||
-rw-r--r-- | base/base_untrusted.gyp | 2 | ||||
-rw-r--r-- | base/string16.h | 190 | ||||
-rw-r--r-- | base/string_util.h | 577 | ||||
-rw-r--r-- | base/string_util_posix.h | 54 | ||||
-rw-r--r-- | base/string_util_win.h | 62 | ||||
-rw-r--r-- | base/stringprintf.h | 63 | ||||
-rw-r--r-- | base/strings/string16.cc (renamed from base/string16.cc) | 4 | ||||
-rw-r--r-- | base/strings/string16.h | 189 | ||||
-rw-r--r-- | base/strings/string16_unittest.cc (renamed from base/string16_unittest.cc) | 5 | ||||
-rw-r--r-- | base/strings/string_util.cc (renamed from base/string_util.cc) | 7 | ||||
-rw-r--r-- | base/strings/string_util.h | 576 | ||||
-rw-r--r-- | base/strings/string_util_constants.cc (renamed from base/string_util_constants.cc) | 2 | ||||
-rw-r--r-- | base/strings/string_util_posix.h | 53 | ||||
-rw-r--r-- | base/strings/string_util_unittest.cc (renamed from base/string_util_unittest.cc) | 7 | ||||
-rw-r--r-- | base/strings/string_util_win.h | 61 | ||||
-rw-r--r-- | base/strings/stringprintf.cc (renamed from base/stringprintf.cc) | 6 | ||||
-rw-r--r-- | base/strings/stringprintf.h | 62 | ||||
-rw-r--r-- | base/strings/stringprintf_unittest.cc (renamed from base/stringprintf_unittest.cc) | 5 |
20 files changed, 994 insertions, 957 deletions
diff --git a/base/base.gyp b/base/base.gyp index 19e8af1..3ca7543 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -573,13 +573,13 @@ 'sequence_checker_impl_unittest.cc', 'sha1_unittest.cc', 'stl_util_unittest.cc', - 'string16_unittest.cc', - 'string_util_unittest.cc', - 'stringprintf_unittest.cc', + 'strings/string16_unittest.cc', + 'strings/stringprintf_unittest.cc', 'strings/string_number_conversions_unittest.cc', 'strings/string_piece_unittest.cc', 'strings/string_split_unittest.cc', 'strings/string_tokenizer_unittest.cc', + 'strings/string_util_unittest.cc', 'strings/stringize_macros_unittest.cc', 'strings/sys_string_conversions_mac_unittest.mm', 'strings/sys_string_conversions_unittest.cc', diff --git a/base/base.gypi b/base/base.gypi index 5a5e67f..a227e56 100644 --- a/base/base.gypi +++ b/base/base.gypi @@ -427,17 +427,10 @@ 'sha1_win.cc', 'single_thread_task_runner.h', 'stl_util.h', - 'string_util.cc', - 'string_util.h', - 'string_util_constants.cc', - 'string_util_posix.h', - 'string_util_win.h', - 'string16.cc', - 'string16.h', - 'stringprintf.cc', - 'stringprintf.h', 'strings/latin1_string_conversions.cc', 'strings/latin1_string_conversions.h', + 'strings/string16.cc', + 'strings/string16.h', 'strings/string_number_conversions.cc', 'strings/string_split.cc', 'strings/string_split.h', @@ -445,7 +438,14 @@ 'strings/string_piece.cc', 'strings/string_piece.h', 'strings/string_tokenizer.h', + 'strings/string_util.cc', + 'strings/string_util.h', + 'strings/string_util_constants.cc', + 'strings/string_util_posix.h', + 'strings/string_util_win.h', 'strings/stringize_macros.h', + 'strings/stringprintf.cc', + 'strings/stringprintf.h', 'strings/sys_string_conversions.h', 'strings/sys_string_conversions_mac.mm', 'strings/sys_string_conversions_posix.cc', @@ -790,7 +790,7 @@ # Not using sha1_win.cc because it may have caused a # regression to page cycler moz. 'sha1_win.cc', - 'string16.cc', + 'strings/string16.cc', ], },], ['<(use_ozone) == 1', { diff --git a/base/base_untrusted.gyp b/base/base_untrusted.gyp index a4c26b1..d4891dc 100644 --- a/base/base_untrusted.gyp +++ b/base/base_untrusted.gyp @@ -25,7 +25,7 @@ 'sources': [ 'base_switches.cc', 'base_switches.h', - 'string16.cc', + 'strings/string16.cc', 'sync_socket_nacl.cc', 'time_posix.cc', ], diff --git a/base/string16.h b/base/string16.h index f7a35d0..4fea704 100644 --- a/base/string16.h +++ b/base/string16.h @@ -1,189 +1,7 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef BASE_STRING16_H_ -#define BASE_STRING16_H_ - -// WHAT: -// A version of std::basic_string that provides 2-byte characters even when -// wchar_t is not implemented as a 2-byte type. You can access this class as -// string16. We also define char16, which string16 is based upon. -// -// WHY: -// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 -// data. Plenty of existing code operates on strings encoded as UTF-16. -// -// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make -// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails -// at run time, because it calls some functions (like wcslen) that come from -// the system's native C library -- which was built with a 4-byte wchar_t! -// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's -// entirely improper on those systems where the encoding of wchar_t is defined -// as UTF-32. -// -// Here, we define string16, which is similar to std::wstring but replaces all -// libc functions with custom, 2-byte-char compatible routines. It is capable -// of carrying UTF-16-encoded data. - -#include <stdio.h> -#include <string> - -#include "base/base_export.h" -#include "base/basictypes.h" - -#if defined(WCHAR_T_IS_UTF16) - -namespace base { - -typedef wchar_t char16; -typedef std::wstring string16; -typedef std::char_traits<wchar_t> string16_char_traits; - -} // namespace base - -#elif defined(WCHAR_T_IS_UTF32) - -namespace base { - -typedef uint16 char16; - -// char16 versions of the functions required by string16_char_traits; these -// are based on the wide character functions of similar names ("w" or "wcs" -// instead of "c16"). -BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); -BASE_EXPORT size_t c16len(const char16* s); -BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); -BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); -BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); -BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); - -struct string16_char_traits { - typedef char16 char_type; - typedef int int_type; - - // int_type needs to be able to hold each possible value of char_type, and in - // addition, the distinct value of eof(). - COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width); - - typedef std::streamoff off_type; - typedef mbstate_t state_type; - typedef std::fpos<state_type> pos_type; - - static void assign(char_type& c1, const char_type& c2) { - c1 = c2; - } - - static bool eq(const char_type& c1, const char_type& c2) { - return c1 == c2; - } - static bool lt(const char_type& c1, const char_type& c2) { - return c1 < c2; - } - - static int compare(const char_type* s1, const char_type* s2, size_t n) { - return c16memcmp(s1, s2, n); - } - - static size_t length(const char_type* s) { - return c16len(s); - } - - static const char_type* find(const char_type* s, size_t n, - const char_type& a) { - return c16memchr(s, a, n); - } - - static char_type* move(char_type* s1, const char_type* s2, int_type n) { - return c16memmove(s1, s2, n); - } - - static char_type* copy(char_type* s1, const char_type* s2, size_t n) { - return c16memcpy(s1, s2, n); - } - - static char_type* assign(char_type* s, size_t n, char_type a) { - return c16memset(s, a, n); - } - - static int_type not_eof(const int_type& c) { - return eq_int_type(c, eof()) ? 0 : c; - } - - static char_type to_char_type(const int_type& c) { - return char_type(c); - } - - static int_type to_int_type(const char_type& c) { - return int_type(c); - } - - static bool eq_int_type(const int_type& c1, const int_type& c2) { - return c1 == c2; - } - - static int_type eof() { - return static_cast<int_type>(EOF); - } -}; - -typedef std::basic_string<char16, base::string16_char_traits> string16; - -BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, - const string16& str); - -// This is required by googletest to print a readable output on test failures. -BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); - -} // namespace base - -// The string class will be explicitly instantiated only once, in string16.cc. -// -// std::basic_string<> in GNU libstdc++ contains a static data member, -// _S_empty_rep_storage, to represent empty strings. When an operation such -// as assignment or destruction is performed on a string, causing its existing -// data member to be invalidated, it must not be freed if this static data -// member is being used. Otherwise, it counts as an attempt to free static -// (and not allocated) data, which is a memory error. -// -// Generally, due to C++ template magic, _S_empty_rep_storage will be marked -// as a coalesced symbol, meaning that the linker will combine multiple -// instances into a single one when generating output. -// -// If a string class is used by multiple shared libraries, a problem occurs. -// Each library will get its own copy of _S_empty_rep_storage. When strings -// are passed across a library boundary for alteration or destruction, memory -// errors will result. GNU libstdc++ contains a configuration option, -// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which -// disables the static data member optimization, but it's a good optimization -// and non-STL code is generally at the mercy of the system's STL -// configuration. Fully-dynamic strings are not the default for GNU libstdc++ -// libstdc++ itself or for the libstdc++ installations on the systems we care -// about, such as Mac OS X and relevant flavors of Linux. -// -// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . -// -// To avoid problems, string classes need to be explicitly instantiated only -// once, in exactly one library. All other string users see it via an "extern" -// declaration. This is precisely how GNU libstdc++ handles -// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring). -// -// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), -// in which the linker does not fully coalesce symbols when dead code -// stripping is enabled. This bug causes the memory errors described above -// to occur even when a std::basic_string<> does not cross shared library -// boundaries, such as in statically-linked executables. -// -// TODO(mark): File this bug with Apple and update this note with a bug number. - -extern template -class BASE_EXPORT std::basic_string<base::char16, base::string16_char_traits>; - -#endif // WCHAR_T_IS_UTF32 - -// TODO(brettw) update users of string16 to use the namespace and remove -// this "using". -using base::char16; -using base::string16; - -#endif // BASE_STRING16_H_ +// This file has moved, please use the new location. +// TODO(avi) remove this file when all users have been updated. +#include "base/strings/string16.h" diff --git a/base/string_util.h b/base/string_util.h index 99ba091..400795e 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -1,576 +1,7 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// -// This file defines utility functions for working with strings. -#ifndef BASE_STRING_UTIL_H_ -#define BASE_STRING_UTIL_H_ - -#include <ctype.h> -#include <stdarg.h> // va_list - -#include <string> -#include <vector> - -#include "base/base_export.h" -#include "base/basictypes.h" -#include "base/compiler_specific.h" -#include "base/string16.h" -#include "base/strings/string_piece.h" // For implicit conversions. - -// Safe standard library wrappers for all platforms. - -namespace base { - -// C standard-library functions like "strncasecmp" and "snprintf" that aren't -// cross-platform are provided as "base::strncasecmp", and their prototypes -// are listed below. These functions are then implemented as inline calls -// to the platform-specific equivalents in the platform-specific headers. - -// Compares the two strings s1 and s2 without regard to case using -// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if -// s2 > s1 according to a lexicographic comparison. -int strcasecmp(const char* s1, const char* s2); - -// Compares up to count characters of s1 and s2 without regard to case using -// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if -// s2 > s1 according to a lexicographic comparison. -int strncasecmp(const char* s1, const char* s2, size_t count); - -// Same as strncmp but for char16 strings. -int strncmp16(const char16* s1, const char16* s2, size_t count); - -// Wrapper for vsnprintf that always null-terminates and always returns the -// number of characters that would be in an untruncated formatted -// string, even when truncation occurs. -int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) - PRINTF_FORMAT(3, 0); - -// vswprintf always null-terminates, but when truncation occurs, it will either -// return -1 or the number of characters that would be in an untruncated -// formatted string. The actual return value depends on the underlying -// C library's vswprintf implementation. -int vswprintf(wchar_t* buffer, size_t size, - const wchar_t* format, va_list arguments) - WPRINTF_FORMAT(3, 0); - -// Some of these implementations need to be inlined. - -// We separate the declaration from the implementation of this inline -// function just so the PRINTF_FORMAT works. -inline int snprintf(char* buffer, size_t size, const char* format, ...) - PRINTF_FORMAT(3, 4); -inline int snprintf(char* buffer, size_t size, const char* format, ...) { - va_list arguments; - va_start(arguments, format); - int result = vsnprintf(buffer, size, format, arguments); - va_end(arguments); - return result; -} - -// We separate the declaration from the implementation of this inline -// function just so the WPRINTF_FORMAT works. -inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) - WPRINTF_FORMAT(3, 4); -inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { - va_list arguments; - va_start(arguments, format); - int result = vswprintf(buffer, size, format, arguments); - va_end(arguments); - return result; -} - -// BSD-style safe and consistent string copy functions. -// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. -// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as -// long as |dst_size| is not 0. Returns the length of |src| in characters. -// If the return value is >= dst_size, then the output was truncated. -// NOTE: All sizes are in number of characters, NOT in bytes. -BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); -BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); - -// Scan a wprintf format string to determine whether it's portable across a -// variety of systems. This function only checks that the conversion -// specifiers used by the format string are supported and have the same meaning -// on a variety of systems. It doesn't check for other errors that might occur -// within a format string. -// -// Nonportable conversion specifiers for wprintf are: -// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char -// data on all systems except Windows, which treat them as wchar_t data. -// Use %ls and %lc for wchar_t data instead. -// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, -// which treat them as char data. Use %ls and %lc for wchar_t data -// instead. -// - 'F', which is not identified by Windows wprintf documentation. -// - 'D', 'O', and 'U', which are deprecated and not available on all systems. -// Use %ld, %lo, and %lu instead. -// -// Note that there is no portable conversion specifier for char data when -// working with wprintf. -// -// This function is intended to be called from base::vswprintf. -BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); - -// ASCII-specific tolower. The standard library's tolower is locale sensitive, -// so we don't want to use it here. -template <class Char> inline Char ToLowerASCII(Char c) { - return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; -} - -// ASCII-specific toupper. The standard library's toupper is locale sensitive, -// so we don't want to use it here. -template <class Char> inline Char ToUpperASCII(Char c) { - return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; -} - -// Function objects to aid in comparing/searching strings. - -template<typename Char> struct CaseInsensitiveCompare { - public: - bool operator()(Char x, Char y) const { - // TODO(darin): Do we really want to do locale sensitive comparisons here? - // See http://crbug.com/24917 - return tolower(x) == tolower(y); - } -}; - -template<typename Char> struct CaseInsensitiveCompareASCII { - public: - bool operator()(Char x, Char y) const { - return ToLowerASCII(x) == ToLowerASCII(y); - } -}; - -} // namespace base - -#if defined(OS_WIN) -#include "base/string_util_win.h" -#elif defined(OS_POSIX) -#include "base/string_util_posix.h" -#else -#error Define string operations appropriately for your platform -#endif - -// These threadsafe functions return references to globally unique empty -// strings. -// -// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. -// There is only one case where you should use these: functions which need to -// return a string by reference (e.g. as a class member accessor), and don't -// have an empty string to use (e.g. in an error case). These should not be -// used as initializers, function arguments, or return values for functions -// which return by value or outparam. -BASE_EXPORT const std::string& EmptyString(); -BASE_EXPORT const std::wstring& EmptyWString(); -BASE_EXPORT const string16& EmptyString16(); - -BASE_EXPORT extern const wchar_t kWhitespaceWide[]; -BASE_EXPORT extern const char16 kWhitespaceUTF16[]; -BASE_EXPORT extern const char kWhitespaceASCII[]; - -BASE_EXPORT extern const char kUtf8ByteOrderMark[]; - -// Removes characters in |remove_chars| from anywhere in |input|. Returns true -// if any characters were removed. |remove_chars| must be null-terminated. -// NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool RemoveChars(const string16& input, - const char16 remove_chars[], - string16* output); -BASE_EXPORT bool RemoveChars(const std::string& input, - const char remove_chars[], - std::string* output); - -// Replaces characters in |replace_chars| from anywhere in |input| with -// |replace_with|. Each character in |replace_chars| will be replaced with -// the |replace_with| string. Returns true if any characters were replaced. -// |replace_chars| must be null-terminated. -// NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool ReplaceChars(const string16& input, - const char16 replace_chars[], - const string16& replace_with, - string16* output); -BASE_EXPORT bool ReplaceChars(const std::string& input, - const char replace_chars[], - const std::string& replace_with, - std::string* output); - -// Removes characters in |trim_chars| from the beginning and end of |input|. -// |trim_chars| must be null-terminated. -// NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool TrimString(const std::wstring& input, - const wchar_t trim_chars[], - std::wstring* output); -BASE_EXPORT bool TrimString(const string16& input, - const char16 trim_chars[], - string16* output); -BASE_EXPORT bool TrimString(const std::string& input, - const char trim_chars[], - std::string* output); - -// Truncates a string to the nearest UTF-8 character that will leave -// the string less than or equal to the specified byte size. -BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, - const size_t byte_size, - std::string* output); - -// Trims any whitespace from either end of the input string. Returns where -// whitespace was found. -// The non-wide version has two functions: -// * TrimWhitespaceASCII() -// This function is for ASCII strings and only looks for ASCII whitespace; -// Please choose the best one according to your usage. -// NOTE: Safe to use the same variable for both input and output. -enum TrimPositions { - TRIM_NONE = 0, - TRIM_LEADING = 1 << 0, - TRIM_TRAILING = 1 << 1, - TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, -}; -BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, - TrimPositions positions, - string16* output); -BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, - TrimPositions positions, - std::string* output); - -// Deprecated. This function is only for backward compatibility and calls -// TrimWhitespaceASCII(). -BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, - TrimPositions positions, - std::string* output); - -// Searches for CR or LF characters. Removes all contiguous whitespace -// strings that contain them. This is useful when trying to deal with text -// copied from terminals. -// Returns |text|, with the following three transformations: -// (1) Leading and trailing whitespace is trimmed. -// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace -// sequences containing a CR or LF are trimmed. -// (3) All other whitespace sequences are converted to single spaces. -BASE_EXPORT std::wstring CollapseWhitespace( - const std::wstring& text, - bool trim_sequences_with_line_breaks); -BASE_EXPORT string16 CollapseWhitespace( - const string16& text, - bool trim_sequences_with_line_breaks); -BASE_EXPORT std::string CollapseWhitespaceASCII( - const std::string& text, - bool trim_sequences_with_line_breaks); - -// Returns true if the passed string is empty or contains only white-space -// characters. -BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); -BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); - -// Returns true if |input| is empty or contains only characters found in -// |characters|. -BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, - const std::wstring& characters); -BASE_EXPORT bool ContainsOnlyChars(const string16& input, - const string16& characters); -BASE_EXPORT bool ContainsOnlyChars(const std::string& input, - const std::string& characters); - -// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII -// beforehand. -BASE_EXPORT std::string WideToASCII(const std::wstring& wide); -BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); - -// Converts the given wide string to the corresponding Latin1. This will fail -// (return false) if any characters are more than 255. -BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); - -// Returns true if the specified string matches the criteria. How can a wide -// string be 8-bit or UTF8? It contains only characters that are < 256 (in the -// first case) or characters that use only 8-bits and whose 8-bit -// representation looks like a UTF-8 string (the second case). -// -// Note that IsStringUTF8 checks not only if the input is structurally -// valid but also if it doesn't contain any non-character codepoint -// (e.g. U+FFFE). It's done on purpose because all the existing callers want -// to have the maximum 'discriminating' power from other encodings. If -// there's a use case for just checking the structural validity, we have to -// add a new function for that. -BASE_EXPORT bool IsStringUTF8(const std::string& str); -BASE_EXPORT bool IsStringASCII(const std::wstring& str); -BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); -BASE_EXPORT bool IsStringASCII(const string16& str); - -// Converts the elements of the given string. This version uses a pointer to -// clearly differentiate it from the non-pointer variant. -template <class str> inline void StringToLowerASCII(str* s) { - for (typename str::iterator i = s->begin(); i != s->end(); ++i) - *i = base::ToLowerASCII(*i); -} - -template <class str> inline str StringToLowerASCII(const str& s) { - // for std::string and std::wstring - str output(s); - StringToLowerASCII(&output); - return output; -} - -// Converts the elements of the given string. This version uses a pointer to -// clearly differentiate it from the non-pointer variant. -template <class str> inline void StringToUpperASCII(str* s) { - for (typename str::iterator i = s->begin(); i != s->end(); ++i) - *i = base::ToUpperASCII(*i); -} - -template <class str> inline str StringToUpperASCII(const str& s) { - // for std::string and std::wstring - str output(s); - StringToUpperASCII(&output); - return output; -} - -// Compare the lower-case form of the given string against the given ASCII -// string. This is useful for doing checking if an input string matches some -// token, and it is optimized to avoid intermediate string copies. This API is -// borrowed from the equivalent APIs in Mozilla. -BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); - -// Same thing, but with string iterators instead. -BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, - std::string::const_iterator a_end, - const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, - std::wstring::const_iterator a_end, - const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, - string16::const_iterator a_end, - const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, - const char* a_end, - const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, - const wchar_t* a_end, - const char* b); -BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, - const char16* a_end, - const char* b); - -// Performs a case-sensitive string compare. The behavior is undefined if both -// strings are not ASCII. -BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); - -// Returns true if str starts with search, or false otherwise. -BASE_EXPORT bool StartsWithASCII(const std::string& str, - const std::string& search, - bool case_sensitive); -BASE_EXPORT bool StartsWith(const std::wstring& str, - const std::wstring& search, - bool case_sensitive); -BASE_EXPORT bool StartsWith(const string16& str, - const string16& search, - bool case_sensitive); - -// Returns true if str ends with search, or false otherwise. -BASE_EXPORT bool EndsWith(const std::string& str, - const std::string& search, - bool case_sensitive); -BASE_EXPORT bool EndsWith(const std::wstring& str, - const std::wstring& search, - bool case_sensitive); -BASE_EXPORT bool EndsWith(const string16& str, - const string16& search, - bool case_sensitive); - - -// Determines the type of ASCII character, independent of locale (the C -// library versions will change based on locale). -template <typename Char> -inline bool IsAsciiWhitespace(Char c) { - return c == ' ' || c == '\r' || c == '\n' || c == '\t'; -} -template <typename Char> -inline bool IsAsciiAlpha(Char c) { - return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); -} -template <typename Char> -inline bool IsAsciiDigit(Char c) { - return c >= '0' && c <= '9'; -} - -template <typename Char> -inline bool IsHexDigit(Char c) { - return (c >= '0' && c <= '9') || - (c >= 'A' && c <= 'F') || - (c >= 'a' && c <= 'f'); -} - -template <typename Char> -inline Char HexDigitToInt(Char c) { - DCHECK(IsHexDigit(c)); - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return 0; -} - -// Returns true if it's a whitespace character. -inline bool IsWhitespace(wchar_t c) { - return wcschr(kWhitespaceWide, c) != NULL; -} - -// Return a byte string in human-readable format with a unit suffix. Not -// appropriate for use in any UI; use of FormatBytes and friends in ui/base is -// highly recommended instead. TODO(avi): Figure out how to get callers to use -// FormatBytes instead; remove this. -BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); - -// Starting at |start_offset| (usually 0), replace the first instance of -// |find_this| with |replace_with|. -BASE_EXPORT void ReplaceFirstSubstringAfterOffset( - string16* str, - string16::size_type start_offset, - const string16& find_this, - const string16& replace_with); -BASE_EXPORT void ReplaceFirstSubstringAfterOffset( - std::string* str, - std::string::size_type start_offset, - const std::string& find_this, - const std::string& replace_with); - -// Starting at |start_offset| (usually 0), look through |str| and replace all -// instances of |find_this| with |replace_with|. -// -// This does entire substrings; use std::replace in <algorithm> for single -// characters, for example: -// std::replace(str.begin(), str.end(), 'a', 'b'); -BASE_EXPORT void ReplaceSubstringsAfterOffset( - string16* str, - string16::size_type start_offset, - const string16& find_this, - const string16& replace_with); -BASE_EXPORT void ReplaceSubstringsAfterOffset( - std::string* str, - std::string::size_type start_offset, - const std::string& find_this, - const std::string& replace_with); - -// Reserves enough memory in |str| to accommodate |length_with_null| characters, -// sets the size of |str| to |length_with_null - 1| characters, and returns a -// pointer to the underlying contiguous array of characters. This is typically -// used when calling a function that writes results into a character array, but -// the caller wants the data to be managed by a string-like object. It is -// convenient in that is can be used inline in the call, and fast in that it -// avoids copying the results of the call from a char* into a string. -// -// |length_with_null| must be at least 2, since otherwise the underlying string -// would have size 0, and trying to access &((*str)[0]) in that case can result -// in a number of problems. -// -// Internally, this takes linear time because the resize() call 0-fills the -// underlying array for potentially all -// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we -// could avoid this aspect of the resize() call, as we expect the caller to -// immediately write over this memory, but there is no other way to set the size -// of the string, and not doing that will mean people who access |str| rather -// than str.c_str() will get back a string of whatever size |str| had on entry -// to this function (probably 0). -template <class string_type> -inline typename string_type::value_type* WriteInto(string_type* str, - size_t length_with_null) { - DCHECK_GT(length_with_null, 1u); - str->reserve(length_with_null); - str->resize(length_with_null - 1); - return &((*str)[0]); -} - -//----------------------------------------------------------------------------- - -// Splits a string into its fields delimited by any of the characters in -// |delimiters|. Each field is added to the |tokens| vector. Returns the -// number of tokens found. -BASE_EXPORT size_t Tokenize(const std::wstring& str, - const std::wstring& delimiters, - std::vector<std::wstring>* tokens); -BASE_EXPORT size_t Tokenize(const string16& str, - const string16& delimiters, - std::vector<string16>* tokens); -BASE_EXPORT size_t Tokenize(const std::string& str, - const std::string& delimiters, - std::vector<std::string>* tokens); -BASE_EXPORT size_t Tokenize(const base::StringPiece& str, - const base::StringPiece& delimiters, - std::vector<base::StringPiece>* tokens); - -// Does the opposite of SplitString(). -BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); -BASE_EXPORT std::string JoinString( - const std::vector<std::string>& parts, char s); - -// Join |parts| using |separator|. -BASE_EXPORT std::string JoinString( - const std::vector<std::string>& parts, - const std::string& separator); -BASE_EXPORT string16 JoinString( - const std::vector<string16>& parts, - const string16& separator); - -// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. -// Additionally, any number of consecutive '$' characters is replaced by that -// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be -// NULL. This only allows you to use up to nine replacements. -BASE_EXPORT string16 ReplaceStringPlaceholders( - const string16& format_string, - const std::vector<string16>& subst, - std::vector<size_t>* offsets); - -BASE_EXPORT std::string ReplaceStringPlaceholders( - const base::StringPiece& format_string, - const std::vector<std::string>& subst, - std::vector<size_t>* offsets); - -// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. -BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, - const string16& a, - size_t* offset); - -// Returns true if the string passed in matches the pattern. The pattern -// string can contain wildcards like * and ? -// The backslash character (\) is an escape character for * and ? -// We limit the patterns to having a max of 16 * or ? characters. -// ? matches 0 or 1 character, while * matches 0 or more characters. -BASE_EXPORT bool MatchPattern(const base::StringPiece& string, - const base::StringPiece& pattern); -BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); - -// Hack to convert any char-like type to its unsigned counterpart. -// For example, it will convert char, signed char and unsigned char to unsigned -// char. -template<typename T> -struct ToUnsigned { - typedef T Unsigned; -}; - -template<> -struct ToUnsigned<char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<signed char> { - typedef unsigned char Unsigned; -}; -template<> -struct ToUnsigned<wchar_t> { -#if defined(WCHAR_T_IS_UTF16) - typedef unsigned short Unsigned; -#elif defined(WCHAR_T_IS_UTF32) - typedef uint32 Unsigned; -#endif -}; -template<> -struct ToUnsigned<short> { - typedef unsigned short Unsigned; -}; - -#endif // BASE_STRING_UTIL_H_ +// This file has moved, please use the new location. +// TODO(avi) remove this file when all users have been updated. +#include "base/strings/string_util.h" diff --git a/base/string_util_posix.h b/base/string_util_posix.h index 9b776e6..e826690 100644 --- a/base/string_util_posix.h +++ b/base/string_util_posix.h @@ -1,53 +1,7 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef BASE_STRING_UTIL_POSIX_H_ -#define BASE_STRING_UTIL_POSIX_H_ - -#include <stdarg.h> -#include <stdio.h> -#include <string.h> -#include <wchar.h> - -#include "base/logging.h" -#include "base/string_util.h" - -namespace base { - -// Chromium code style is to not use malloc'd strings; this is only for use -// for interaction with APIs that require it. -inline char* strdup(const char* str) { - return ::strdup(str); -} - -inline int strcasecmp(const char* string1, const char* string2) { - return ::strcasecmp(string1, string2); -} - -inline int strncasecmp(const char* string1, const char* string2, size_t count) { - return ::strncasecmp(string1, string2, count); -} - -inline int vsnprintf(char* buffer, size_t size, - const char* format, va_list arguments) { - return ::vsnprintf(buffer, size, format, arguments); -} - -inline int strncmp16(const char16* s1, const char16* s2, size_t count) { -#if defined(WCHAR_T_IS_UTF16) - return ::wcsncmp(s1, s2, count); -#elif defined(WCHAR_T_IS_UTF32) - return c16memcmp(s1, s2, count); -#endif -} - -inline int vswprintf(wchar_t* buffer, size_t size, - const wchar_t* format, va_list arguments) { - DCHECK(IsWprintfFormatPortable(format)); - return ::vswprintf(buffer, size, format, arguments); -} - -} // namespace base - -#endif // BASE_STRING_UTIL_POSIX_H_ +// This file has moved, please use the new location. +// TODO(avi) remove this file when all users have been updated. +#include "base/strings/string_util_posix.h" diff --git a/base/string_util_win.h b/base/string_util_win.h index 2a4d3fd..29bc4aa 100644 --- a/base/string_util_win.h +++ b/base/string_util_win.h @@ -1,61 +1,7 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef BASE_STRING_UTIL_WIN_H_ -#define BASE_STRING_UTIL_WIN_H_ - -#include <stdarg.h> -#include <stdio.h> -#include <string.h> -#include <wchar.h> - -#include "base/logging.h" - -namespace base { - -// Chromium code style is to not use malloc'd strings; this is only for use -// for interaction with APIs that require it. -inline char* strdup(const char* str) { - return _strdup(str); -} - -inline int strcasecmp(const char* s1, const char* s2) { - return _stricmp(s1, s2); -} - -inline int strncasecmp(const char* s1, const char* s2, size_t count) { - return _strnicmp(s1, s2, count); -} - -inline int strncmp16(const char16* s1, const char16* s2, size_t count) { - return ::wcsncmp(s1, s2, count); -} - -inline int vsnprintf(char* buffer, size_t size, - const char* format, va_list arguments) { - int length = _vsprintf_p(buffer, size, format, arguments); - if (length < 0) { - if (size > 0) - buffer[0] = 0; - return _vscprintf_p(format, arguments); - } - return length; -} - -inline int vswprintf(wchar_t* buffer, size_t size, - const wchar_t* format, va_list arguments) { - DCHECK(IsWprintfFormatPortable(format)); - - int length = _vswprintf_p(buffer, size, format, arguments); - if (length < 0) { - if (size > 0) - buffer[0] = 0; - return _vscwprintf_p(format, arguments); - } - return length; -} - -} // namespace base - -#endif // BASE_STRING_UTIL_WIN_H_ +// This file has moved, please use the new location. +// TODO(avi) remove this file when all users have been updated. +#include "base/strings/string_util_win.h" diff --git a/base/stringprintf.h b/base/stringprintf.h index 3946f3d..17c3a5e 100644 --- a/base/stringprintf.h +++ b/base/stringprintf.h @@ -1,62 +1,7 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef BASE_STRINGPRINTF_H_ -#define BASE_STRINGPRINTF_H_ - -#include <stdarg.h> // va_list - -#include <string> - -#include "base/base_export.h" -#include "base/compiler_specific.h" - -namespace base { - -// Return a C++ string given printf-like input. -BASE_EXPORT std::string StringPrintf(const char* format, ...) - PRINTF_FORMAT(1, 2); -// OS_ANDROID's libc does not support wchar_t, so several overloads are omitted. -#if !defined(OS_ANDROID) -BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...) - WPRINTF_FORMAT(1, 2); -#endif - -// Return a C++ string given vprintf-like input. -BASE_EXPORT std::string StringPrintV(const char* format, va_list ap) - PRINTF_FORMAT(1, 0); - -// Store result into a supplied string and return it. -BASE_EXPORT const std::string& SStringPrintf(std::string* dst, - const char* format, ...) - PRINTF_FORMAT(2, 3); -#if !defined(OS_ANDROID) -BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst, - const wchar_t* format, ...) - WPRINTF_FORMAT(2, 3); -#endif - -// Append result to a supplied string. -BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...) - PRINTF_FORMAT(2, 3); -#if !defined(OS_ANDROID) -// TODO(evanm): this is only used in a few places in the code; -// replace with string16 version. -BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...) - WPRINTF_FORMAT(2, 3); -#endif - -// Lower-level routine that takes a va_list and appends to a specified -// string. All other routines are just convenience wrappers around it. -BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap) - PRINTF_FORMAT(2, 0); -#if !defined(OS_ANDROID) -BASE_EXPORT void StringAppendV(std::wstring* dst, - const wchar_t* format, va_list ap) - WPRINTF_FORMAT(2, 0); -#endif - -} // namespace base - -#endif // BASE_STRINGPRINTF_H_ +// This file has moved, please use the new location. +// TODO(avi) remove this file when all users have been updated. +#include "base/strings/stringprintf.h" diff --git a/base/string16.cc b/base/strings/string16.cc index a473786..c802eef 100644 --- a/base/string16.cc +++ b/base/strings/string16.cc @@ -1,8 +1,8 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/string16.h" +#include "base/strings/string16.h" #if defined(WCHAR_T_IS_UTF16) diff --git a/base/strings/string16.h b/base/strings/string16.h new file mode 100644 index 0000000..fd98f1b --- /dev/null +++ b/base/strings/string16.h @@ -0,0 +1,189 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING16_H_ +#define BASE_STRINGS_STRING16_H_ + +// WHAT: +// A version of std::basic_string that provides 2-byte characters even when +// wchar_t is not implemented as a 2-byte type. You can access this class as +// string16. We also define char16, which string16 is based upon. +// +// WHY: +// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 +// data. Plenty of existing code operates on strings encoded as UTF-16. +// +// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make +// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails +// at run time, because it calls some functions (like wcslen) that come from +// the system's native C library -- which was built with a 4-byte wchar_t! +// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's +// entirely improper on those systems where the encoding of wchar_t is defined +// as UTF-32. +// +// Here, we define string16, which is similar to std::wstring but replaces all +// libc functions with custom, 2-byte-char compatible routines. It is capable +// of carrying UTF-16-encoded data. + +#include <stdio.h> +#include <string> + +#include "base/base_export.h" +#include "base/basictypes.h" + +#if defined(WCHAR_T_IS_UTF16) + +namespace base { + +typedef wchar_t char16; +typedef std::wstring string16; +typedef std::char_traits<wchar_t> string16_char_traits; + +} // namespace base + +#elif defined(WCHAR_T_IS_UTF32) + +namespace base { + +typedef uint16 char16; + +// char16 versions of the functions required by string16_char_traits; these +// are based on the wide character functions of similar names ("w" or "wcs" +// instead of "c16"). +BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); +BASE_EXPORT size_t c16len(const char16* s); +BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); +BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); + +struct string16_char_traits { + typedef char16 char_type; + typedef int int_type; + + // int_type needs to be able to hold each possible value of char_type, and in + // addition, the distinct value of eof(). + COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width); + + typedef std::streamoff off_type; + typedef mbstate_t state_type; + typedef std::fpos<state_type> pos_type; + + static void assign(char_type& c1, const char_type& c2) { + c1 = c2; + } + + static bool eq(const char_type& c1, const char_type& c2) { + return c1 == c2; + } + static bool lt(const char_type& c1, const char_type& c2) { + return c1 < c2; + } + + static int compare(const char_type* s1, const char_type* s2, size_t n) { + return c16memcmp(s1, s2, n); + } + + static size_t length(const char_type* s) { + return c16len(s); + } + + static const char_type* find(const char_type* s, size_t n, + const char_type& a) { + return c16memchr(s, a, n); + } + + static char_type* move(char_type* s1, const char_type* s2, int_type n) { + return c16memmove(s1, s2, n); + } + + static char_type* copy(char_type* s1, const char_type* s2, size_t n) { + return c16memcpy(s1, s2, n); + } + + static char_type* assign(char_type* s, size_t n, char_type a) { + return c16memset(s, a, n); + } + + static int_type not_eof(const int_type& c) { + return eq_int_type(c, eof()) ? 0 : c; + } + + static char_type to_char_type(const int_type& c) { + return char_type(c); + } + + static int_type to_int_type(const char_type& c) { + return int_type(c); + } + + static bool eq_int_type(const int_type& c1, const int_type& c2) { + return c1 == c2; + } + + static int_type eof() { + return static_cast<int_type>(EOF); + } +}; + +typedef std::basic_string<char16, base::string16_char_traits> string16; + +BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, + const string16& str); + +// This is required by googletest to print a readable output on test failures. +BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); + +} // namespace base + +// The string class will be explicitly instantiated only once, in string16.cc. +// +// std::basic_string<> in GNU libstdc++ contains a static data member, +// _S_empty_rep_storage, to represent empty strings. When an operation such +// as assignment or destruction is performed on a string, causing its existing +// data member to be invalidated, it must not be freed if this static data +// member is being used. Otherwise, it counts as an attempt to free static +// (and not allocated) data, which is a memory error. +// +// Generally, due to C++ template magic, _S_empty_rep_storage will be marked +// as a coalesced symbol, meaning that the linker will combine multiple +// instances into a single one when generating output. +// +// If a string class is used by multiple shared libraries, a problem occurs. +// Each library will get its own copy of _S_empty_rep_storage. When strings +// are passed across a library boundary for alteration or destruction, memory +// errors will result. GNU libstdc++ contains a configuration option, +// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which +// disables the static data member optimization, but it's a good optimization +// and non-STL code is generally at the mercy of the system's STL +// configuration. Fully-dynamic strings are not the default for GNU libstdc++ +// libstdc++ itself or for the libstdc++ installations on the systems we care +// about, such as Mac OS X and relevant flavors of Linux. +// +// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . +// +// To avoid problems, string classes need to be explicitly instantiated only +// once, in exactly one library. All other string users see it via an "extern" +// declaration. This is precisely how GNU libstdc++ handles +// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring). +// +// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), +// in which the linker does not fully coalesce symbols when dead code +// stripping is enabled. This bug causes the memory errors described above +// to occur even when a std::basic_string<> does not cross shared library +// boundaries, such as in statically-linked executables. +// +// TODO(mark): File this bug with Apple and update this note with a bug number. + +extern template +class BASE_EXPORT std::basic_string<base::char16, base::string16_char_traits>; + +#endif // WCHAR_T_IS_UTF32 + +// TODO(brettw) update users of string16 to use the namespace and remove +// this "using". +using base::char16; +using base::string16; + +#endif // BASE_STRINGS_STRING16_H_ diff --git a/base/string16_unittest.cc b/base/strings/string16_unittest.cc index cd8f4f6..d98b2a9 100644 --- a/base/string16_unittest.cc +++ b/base/strings/string16_unittest.cc @@ -1,10 +1,11 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include <sstream> -#include "base/string16.h" +#include "base/strings/string16.h" + #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" diff --git a/base/string_util.cc b/base/strings/string_util.cc index bbded57..3ed7069 100644 --- a/base/string_util.cc +++ b/base/strings/string_util.cc @@ -1,10 +1,8 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/string_util.h" - -#include "build/build_config.h" +#include "base/strings/string_util.h" #include <ctype.h> #include <errno.h> @@ -26,6 +24,7 @@ #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" #include "base/third_party/icu/icu_utf.h" +#include "build/build_config.h" namespace { diff --git a/base/strings/string_util.h b/base/strings/string_util.h new file mode 100644 index 0000000..7b4af7d --- /dev/null +++ b/base/strings/string_util.h @@ -0,0 +1,576 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file defines utility functions for working with strings. + +#ifndef BASE_STRINGS_STRING_UTIL_H_ +#define BASE_STRINGS_STRING_UTIL_H_ + +#include <ctype.h> +#include <stdarg.h> // va_list + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/string16.h" +#include "base/strings/string_piece.h" // For implicit conversions. + +// Safe standard library wrappers for all platforms. + +namespace base { + +// C standard-library functions like "strncasecmp" and "snprintf" that aren't +// cross-platform are provided as "base::strncasecmp", and their prototypes +// are listed below. These functions are then implemented as inline calls +// to the platform-specific equivalents in the platform-specific headers. + +// Compares the two strings s1 and s2 without regard to case using +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if +// s2 > s1 according to a lexicographic comparison. +int strcasecmp(const char* s1, const char* s2); + +// Compares up to count characters of s1 and s2 without regard to case using +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if +// s2 > s1 according to a lexicographic comparison. +int strncasecmp(const char* s1, const char* s2, size_t count); + +// Same as strncmp but for char16 strings. +int strncmp16(const char16* s1, const char16* s2, size_t count); + +// Wrapper for vsnprintf that always null-terminates and always returns the +// number of characters that would be in an untruncated formatted +// string, even when truncation occurs. +int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) + PRINTF_FORMAT(3, 0); + +// vswprintf always null-terminates, but when truncation occurs, it will either +// return -1 or the number of characters that would be in an untruncated +// formatted string. The actual return value depends on the underlying +// C library's vswprintf implementation. +int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) + WPRINTF_FORMAT(3, 0); + +// Some of these implementations need to be inlined. + +// We separate the declaration from the implementation of this inline +// function just so the PRINTF_FORMAT works. +inline int snprintf(char* buffer, size_t size, const char* format, ...) + PRINTF_FORMAT(3, 4); +inline int snprintf(char* buffer, size_t size, const char* format, ...) { + va_list arguments; + va_start(arguments, format); + int result = vsnprintf(buffer, size, format, arguments); + va_end(arguments); + return result; +} + +// We separate the declaration from the implementation of this inline +// function just so the WPRINTF_FORMAT works. +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) + WPRINTF_FORMAT(3, 4); +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { + va_list arguments; + va_start(arguments, format); + int result = vswprintf(buffer, size, format, arguments); + va_end(arguments); + return result; +} + +// BSD-style safe and consistent string copy functions. +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as +// long as |dst_size| is not 0. Returns the length of |src| in characters. +// If the return value is >= dst_size, then the output was truncated. +// NOTE: All sizes are in number of characters, NOT in bytes. +BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); +BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); + +// Scan a wprintf format string to determine whether it's portable across a +// variety of systems. This function only checks that the conversion +// specifiers used by the format string are supported and have the same meaning +// on a variety of systems. It doesn't check for other errors that might occur +// within a format string. +// +// Nonportable conversion specifiers for wprintf are: +// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char +// data on all systems except Windows, which treat them as wchar_t data. +// Use %ls and %lc for wchar_t data instead. +// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, +// which treat them as char data. Use %ls and %lc for wchar_t data +// instead. +// - 'F', which is not identified by Windows wprintf documentation. +// - 'D', 'O', and 'U', which are deprecated and not available on all systems. +// Use %ld, %lo, and %lu instead. +// +// Note that there is no portable conversion specifier for char data when +// working with wprintf. +// +// This function is intended to be called from base::vswprintf. +BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); + +// ASCII-specific tolower. The standard library's tolower is locale sensitive, +// so we don't want to use it here. +template <class Char> inline Char ToLowerASCII(Char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +// ASCII-specific toupper. The standard library's toupper is locale sensitive, +// so we don't want to use it here. +template <class Char> inline Char ToUpperASCII(Char c) { + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; +} + +// Function objects to aid in comparing/searching strings. + +template<typename Char> struct CaseInsensitiveCompare { + public: + bool operator()(Char x, Char y) const { + // TODO(darin): Do we really want to do locale sensitive comparisons here? + // See http://crbug.com/24917 + return tolower(x) == tolower(y); + } +}; + +template<typename Char> struct CaseInsensitiveCompareASCII { + public: + bool operator()(Char x, Char y) const { + return ToLowerASCII(x) == ToLowerASCII(y); + } +}; + +} // namespace base + +#if defined(OS_WIN) +#include "base/string_util_win.h" +#elif defined(OS_POSIX) +#include "base/string_util_posix.h" +#else +#error Define string operations appropriately for your platform +#endif + +// These threadsafe functions return references to globally unique empty +// strings. +// +// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. +// There is only one case where you should use these: functions which need to +// return a string by reference (e.g. as a class member accessor), and don't +// have an empty string to use (e.g. in an error case). These should not be +// used as initializers, function arguments, or return values for functions +// which return by value or outparam. +BASE_EXPORT const std::string& EmptyString(); +BASE_EXPORT const std::wstring& EmptyWString(); +BASE_EXPORT const string16& EmptyString16(); + +BASE_EXPORT extern const wchar_t kWhitespaceWide[]; +BASE_EXPORT extern const char16 kWhitespaceUTF16[]; +BASE_EXPORT extern const char kWhitespaceASCII[]; + +BASE_EXPORT extern const char kUtf8ByteOrderMark[]; + +// Removes characters in |remove_chars| from anywhere in |input|. Returns true +// if any characters were removed. |remove_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool RemoveChars(const string16& input, + const char16 remove_chars[], + string16* output); +BASE_EXPORT bool RemoveChars(const std::string& input, + const char remove_chars[], + std::string* output); + +// Replaces characters in |replace_chars| from anywhere in |input| with +// |replace_with|. Each character in |replace_chars| will be replaced with +// the |replace_with| string. Returns true if any characters were replaced. +// |replace_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool ReplaceChars(const string16& input, + const char16 replace_chars[], + const string16& replace_with, + string16* output); +BASE_EXPORT bool ReplaceChars(const std::string& input, + const char replace_chars[], + const std::string& replace_with, + std::string* output); + +// Removes characters in |trim_chars| from the beginning and end of |input|. +// |trim_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool TrimString(const std::wstring& input, + const wchar_t trim_chars[], + std::wstring* output); +BASE_EXPORT bool TrimString(const string16& input, + const char16 trim_chars[], + string16* output); +BASE_EXPORT bool TrimString(const std::string& input, + const char trim_chars[], + std::string* output); + +// Truncates a string to the nearest UTF-8 character that will leave +// the string less than or equal to the specified byte size. +BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, + const size_t byte_size, + std::string* output); + +// Trims any whitespace from either end of the input string. Returns where +// whitespace was found. +// The non-wide version has two functions: +// * TrimWhitespaceASCII() +// This function is for ASCII strings and only looks for ASCII whitespace; +// Please choose the best one according to your usage. +// NOTE: Safe to use the same variable for both input and output. +enum TrimPositions { + TRIM_NONE = 0, + TRIM_LEADING = 1 << 0, + TRIM_TRAILING = 1 << 1, + TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, +}; +BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, + TrimPositions positions, + string16* output); +BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, + TrimPositions positions, + std::string* output); + +// Deprecated. This function is only for backward compatibility and calls +// TrimWhitespaceASCII(). +BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, + TrimPositions positions, + std::string* output); + +// Searches for CR or LF characters. Removes all contiguous whitespace +// strings that contain them. This is useful when trying to deal with text +// copied from terminals. +// Returns |text|, with the following three transformations: +// (1) Leading and trailing whitespace is trimmed. +// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace +// sequences containing a CR or LF are trimmed. +// (3) All other whitespace sequences are converted to single spaces. +BASE_EXPORT std::wstring CollapseWhitespace( + const std::wstring& text, + bool trim_sequences_with_line_breaks); +BASE_EXPORT string16 CollapseWhitespace( + const string16& text, + bool trim_sequences_with_line_breaks); +BASE_EXPORT std::string CollapseWhitespaceASCII( + const std::string& text, + bool trim_sequences_with_line_breaks); + +// Returns true if the passed string is empty or contains only white-space +// characters. +BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); +BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); + +// Returns true if |input| is empty or contains only characters found in +// |characters|. +BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, + const std::wstring& characters); +BASE_EXPORT bool ContainsOnlyChars(const string16& input, + const string16& characters); +BASE_EXPORT bool ContainsOnlyChars(const std::string& input, + const std::string& characters); + +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII +// beforehand. +BASE_EXPORT std::string WideToASCII(const std::wstring& wide); +BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); + +// Converts the given wide string to the corresponding Latin1. This will fail +// (return false) if any characters are more than 255. +BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); + +// Returns true if the specified string matches the criteria. How can a wide +// string be 8-bit or UTF8? It contains only characters that are < 256 (in the +// first case) or characters that use only 8-bits and whose 8-bit +// representation looks like a UTF-8 string (the second case). +// +// Note that IsStringUTF8 checks not only if the input is structurally +// valid but also if it doesn't contain any non-character codepoint +// (e.g. U+FFFE). It's done on purpose because all the existing callers want +// to have the maximum 'discriminating' power from other encodings. If +// there's a use case for just checking the structural validity, we have to +// add a new function for that. +BASE_EXPORT bool IsStringUTF8(const std::string& str); +BASE_EXPORT bool IsStringASCII(const std::wstring& str); +BASE_EXPORT bool IsStringASCII(const base::StringPiece& str); +BASE_EXPORT bool IsStringASCII(const string16& str); + +// Converts the elements of the given string. This version uses a pointer to +// clearly differentiate it from the non-pointer variant. +template <class str> inline void StringToLowerASCII(str* s) { + for (typename str::iterator i = s->begin(); i != s->end(); ++i) + *i = base::ToLowerASCII(*i); +} + +template <class str> inline str StringToLowerASCII(const str& s) { + // for std::string and std::wstring + str output(s); + StringToLowerASCII(&output); + return output; +} + +// Converts the elements of the given string. This version uses a pointer to +// clearly differentiate it from the non-pointer variant. +template <class str> inline void StringToUpperASCII(str* s) { + for (typename str::iterator i = s->begin(); i != s->end(); ++i) + *i = base::ToUpperASCII(*i); +} + +template <class str> inline str StringToUpperASCII(const str& s) { + // for std::string and std::wstring + str output(s); + StringToUpperASCII(&output); + return output; +} + +// Compare the lower-case form of the given string against the given ASCII +// string. This is useful for doing checking if an input string matches some +// token, and it is optimized to avoid intermediate string copies. This API is +// borrowed from the equivalent APIs in Mozilla. +BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); + +// Same thing, but with string iterators instead. +BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, + std::string::const_iterator a_end, + const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, + std::wstring::const_iterator a_end, + const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, + string16::const_iterator a_end, + const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, + const char* a_end, + const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, + const wchar_t* a_end, + const char* b); +BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, + const char16* a_end, + const char* b); + +// Performs a case-sensitive string compare. The behavior is undefined if both +// strings are not ASCII. +BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); + +// Returns true if str starts with search, or false otherwise. +BASE_EXPORT bool StartsWithASCII(const std::string& str, + const std::string& search, + bool case_sensitive); +BASE_EXPORT bool StartsWith(const std::wstring& str, + const std::wstring& search, + bool case_sensitive); +BASE_EXPORT bool StartsWith(const string16& str, + const string16& search, + bool case_sensitive); + +// Returns true if str ends with search, or false otherwise. +BASE_EXPORT bool EndsWith(const std::string& str, + const std::string& search, + bool case_sensitive); +BASE_EXPORT bool EndsWith(const std::wstring& str, + const std::wstring& search, + bool case_sensitive); +BASE_EXPORT bool EndsWith(const string16& str, + const string16& search, + bool case_sensitive); + + +// Determines the type of ASCII character, independent of locale (the C +// library versions will change based on locale). +template <typename Char> +inline bool IsAsciiWhitespace(Char c) { + return c == ' ' || c == '\r' || c == '\n' || c == '\t'; +} +template <typename Char> +inline bool IsAsciiAlpha(Char c) { + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); +} +template <typename Char> +inline bool IsAsciiDigit(Char c) { + return c >= '0' && c <= '9'; +} + +template <typename Char> +inline bool IsHexDigit(Char c) { + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); +} + +template <typename Char> +inline Char HexDigitToInt(Char c) { + DCHECK(IsHexDigit(c)); + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return 0; +} + +// Returns true if it's a whitespace character. +inline bool IsWhitespace(wchar_t c) { + return wcschr(kWhitespaceWide, c) != NULL; +} + +// Return a byte string in human-readable format with a unit suffix. Not +// appropriate for use in any UI; use of FormatBytes and friends in ui/base is +// highly recommended instead. TODO(avi): Figure out how to get callers to use +// FormatBytes instead; remove this. +BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); + +// Starting at |start_offset| (usually 0), replace the first instance of +// |find_this| with |replace_with|. +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + string16* str, + string16::size_type start_offset, + const string16& find_this, + const string16& replace_with); +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + std::string* str, + std::string::size_type start_offset, + const std::string& find_this, + const std::string& replace_with); + +// Starting at |start_offset| (usually 0), look through |str| and replace all +// instances of |find_this| with |replace_with|. +// +// This does entire substrings; use std::replace in <algorithm> for single +// characters, for example: +// std::replace(str.begin(), str.end(), 'a', 'b'); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + string16* str, + string16::size_type start_offset, + const string16& find_this, + const string16& replace_with); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + std::string* str, + std::string::size_type start_offset, + const std::string& find_this, + const std::string& replace_with); + +// Reserves enough memory in |str| to accommodate |length_with_null| characters, +// sets the size of |str| to |length_with_null - 1| characters, and returns a +// pointer to the underlying contiguous array of characters. This is typically +// used when calling a function that writes results into a character array, but +// the caller wants the data to be managed by a string-like object. It is +// convenient in that is can be used inline in the call, and fast in that it +// avoids copying the results of the call from a char* into a string. +// +// |length_with_null| must be at least 2, since otherwise the underlying string +// would have size 0, and trying to access &((*str)[0]) in that case can result +// in a number of problems. +// +// Internally, this takes linear time because the resize() call 0-fills the +// underlying array for potentially all +// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we +// could avoid this aspect of the resize() call, as we expect the caller to +// immediately write over this memory, but there is no other way to set the size +// of the string, and not doing that will mean people who access |str| rather +// than str.c_str() will get back a string of whatever size |str| had on entry +// to this function (probably 0). +template <class string_type> +inline typename string_type::value_type* WriteInto(string_type* str, + size_t length_with_null) { + DCHECK_GT(length_with_null, 1u); + str->reserve(length_with_null); + str->resize(length_with_null - 1); + return &((*str)[0]); +} + +//----------------------------------------------------------------------------- + +// Splits a string into its fields delimited by any of the characters in +// |delimiters|. Each field is added to the |tokens| vector. Returns the +// number of tokens found. +BASE_EXPORT size_t Tokenize(const std::wstring& str, + const std::wstring& delimiters, + std::vector<std::wstring>* tokens); +BASE_EXPORT size_t Tokenize(const string16& str, + const string16& delimiters, + std::vector<string16>* tokens); +BASE_EXPORT size_t Tokenize(const std::string& str, + const std::string& delimiters, + std::vector<std::string>* tokens); +BASE_EXPORT size_t Tokenize(const base::StringPiece& str, + const base::StringPiece& delimiters, + std::vector<base::StringPiece>* tokens); + +// Does the opposite of SplitString(). +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); +BASE_EXPORT std::string JoinString( + const std::vector<std::string>& parts, char s); + +// Join |parts| using |separator|. +BASE_EXPORT std::string JoinString( + const std::vector<std::string>& parts, + const std::string& separator); +BASE_EXPORT string16 JoinString( + const std::vector<string16>& parts, + const string16& separator); + +// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. +// Additionally, any number of consecutive '$' characters is replaced by that +// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be +// NULL. This only allows you to use up to nine replacements. +BASE_EXPORT string16 ReplaceStringPlaceholders( + const string16& format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); + +BASE_EXPORT std::string ReplaceStringPlaceholders( + const base::StringPiece& format_string, + const std::vector<std::string>& subst, + std::vector<size_t>* offsets); + +// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, + const string16& a, + size_t* offset); + +// Returns true if the string passed in matches the pattern. The pattern +// string can contain wildcards like * and ? +// The backslash character (\) is an escape character for * and ? +// We limit the patterns to having a max of 16 * or ? characters. +// ? matches 0 or 1 character, while * matches 0 or more characters. +BASE_EXPORT bool MatchPattern(const base::StringPiece& string, + const base::StringPiece& pattern); +BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); + +// Hack to convert any char-like type to its unsigned counterpart. +// For example, it will convert char, signed char and unsigned char to unsigned +// char. +template<typename T> +struct ToUnsigned { + typedef T Unsigned; +}; + +template<> +struct ToUnsigned<char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<signed char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<wchar_t> { +#if defined(WCHAR_T_IS_UTF16) + typedef unsigned short Unsigned; +#elif defined(WCHAR_T_IS_UTF32) + typedef uint32 Unsigned; +#endif +}; +template<> +struct ToUnsigned<short> { + typedef unsigned short Unsigned; +}; + +#endif // BASE_STRINGS_STRING_UTIL_H_ diff --git a/base/string_util_constants.cc b/base/strings/string_util_constants.cc index 225bd92..d92e40c 100644 --- a/base/string_util_constants.cc +++ b/base/strings/string_util_constants.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/string_util.h" +#include "base/strings/string_util.h" #define WHITESPACE_UNICODE \ 0x0009, /* <control-0009> to <control-000D> */ \ diff --git a/base/strings/string_util_posix.h b/base/strings/string_util_posix.h new file mode 100644 index 0000000..34b14f1 --- /dev/null +++ b/base/strings/string_util_posix.h @@ -0,0 +1,53 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_ +#define BASE_STRINGS_STRING_UTIL_POSIX_H_ + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" +#include "base/strings/string_util.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return ::strdup(str); +} + +inline int strcasecmp(const char* string1, const char* string2) { + return ::strcasecmp(string1, string2); +} + +inline int strncasecmp(const char* string1, const char* string2, size_t count) { + return ::strncasecmp(string1, string2, count); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + return ::vsnprintf(buffer, size, format, arguments); +} + +inline int strncmp16(const char16* s1, const char16* s2, size_t count) { +#if defined(WCHAR_T_IS_UTF16) + return ::wcsncmp(s1, s2, count); +#elif defined(WCHAR_T_IS_UTF32) + return c16memcmp(s1, s2, count); +#endif +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + return ::vswprintf(buffer, size, format, arguments); +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_ diff --git a/base/string_util_unittest.cc b/base/strings/string_util_unittest.cc index 6478eae..58b7620 100644 --- a/base/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc @@ -1,7 +1,9 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "base/strings/string_util.h" + #include <math.h> #include <stdarg.h> @@ -9,8 +11,7 @@ #include <sstream> #include "base/basictypes.h" -#include "base/string16.h" -#include "base/string_util.h" +#include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h new file mode 100644 index 0000000..602ba27 --- /dev/null +++ b/base/strings/string_util_win.h @@ -0,0 +1,61 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_ +#define BASE_STRINGS_STRING_UTIL_WIN_H_ + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return _strdup(str); +} + +inline int strcasecmp(const char* s1, const char* s2) { + return _stricmp(s1, s2); +} + +inline int strncasecmp(const char* s1, const char* s2, size_t count) { + return _strnicmp(s1, s2, count); +} + +inline int strncmp16(const char16* s1, const char16* s2, size_t count) { + return ::wcsncmp(s1, s2, count); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + int length = _vsprintf_p(buffer, size, format, arguments); + if (length < 0) { + if (size > 0) + buffer[0] = 0; + return _vscprintf_p(format, arguments); + } + return length; +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + + int length = _vswprintf_p(buffer, size, format, arguments); + if (length < 0) { + if (size > 0) + buffer[0] = 0; + return _vscwprintf_p(format, arguments); + } + return length; +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_WIN_H_ diff --git a/base/stringprintf.cc b/base/strings/stringprintf.cc index 946ec72..fe23daa 100644 --- a/base/stringprintf.cc +++ b/base/strings/stringprintf.cc @@ -1,13 +1,13 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/stringprintf.h" +#include "base/strings/stringprintf.h" #include <errno.h> #include "base/scoped_clear_errno.h" -#include "base/string_util.h" +#include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" namespace base { diff --git a/base/strings/stringprintf.h b/base/strings/stringprintf.h new file mode 100644 index 0000000..3c0e399 --- /dev/null +++ b/base/strings/stringprintf.h @@ -0,0 +1,62 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRINGPRINTF_H_ +#define BASE_STRINGS_STRINGPRINTF_H_ + +#include <stdarg.h> // va_list + +#include <string> + +#include "base/base_export.h" +#include "base/compiler_specific.h" + +namespace base { + +// Return a C++ string given printf-like input. +BASE_EXPORT std::string StringPrintf(const char* format, ...) + PRINTF_FORMAT(1, 2); +// OS_ANDROID's libc does not support wchar_t, so several overloads are omitted. +#if !defined(OS_ANDROID) +BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...) + WPRINTF_FORMAT(1, 2); +#endif + +// Return a C++ string given vprintf-like input. +BASE_EXPORT std::string StringPrintV(const char* format, va_list ap) + PRINTF_FORMAT(1, 0); + +// Store result into a supplied string and return it. +BASE_EXPORT const std::string& SStringPrintf(std::string* dst, + const char* format, ...) + PRINTF_FORMAT(2, 3); +#if !defined(OS_ANDROID) +BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst, + const wchar_t* format, ...) + WPRINTF_FORMAT(2, 3); +#endif + +// Append result to a supplied string. +BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...) + PRINTF_FORMAT(2, 3); +#if !defined(OS_ANDROID) +// TODO(evanm): this is only used in a few places in the code; +// replace with string16 version. +BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...) + WPRINTF_FORMAT(2, 3); +#endif + +// Lower-level routine that takes a va_list and appends to a specified +// string. All other routines are just convenience wrappers around it. +BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap) + PRINTF_FORMAT(2, 0); +#if !defined(OS_ANDROID) +BASE_EXPORT void StringAppendV(std::wstring* dst, + const wchar_t* format, va_list ap) + WPRINTF_FORMAT(2, 0); +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRINGPRINTF_H_ diff --git a/base/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc index c04b17e..a1bf2da 100644 --- a/base/stringprintf_unittest.cc +++ b/base/strings/stringprintf_unittest.cc @@ -1,11 +1,12 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "base/strings/stringprintf.h" + #include <errno.h> #include "base/basictypes.h" -#include "base/stringprintf.h" #include "testing/gtest/include/gtest/gtest.h" namespace base { |