summaryrefslogtreecommitdiffstats
path: root/base/strings
diff options
context:
space:
mode:
authoravi@chromium.org <avi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-06-08 06:05:47 +0000
committeravi@chromium.org <avi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-06-08 06:05:47 +0000
commit8ad97adcaf4f453660f3d3057995ef7cd5730faa (patch)
treed549b8276831be0e10b132b01b878202e0187cc4 /base/strings
parent67c0e08dc5d8c31f7c071f2f47e1dac4134b95ae (diff)
downloadchromium_src-8ad97adcaf4f453660f3d3057995ef7cd5730faa.zip
chromium_src-8ad97adcaf4f453660f3d3057995ef7cd5730faa.tar.gz
chromium_src-8ad97adcaf4f453660f3d3057995ef7cd5730faa.tar.bz2
Move string files in base/ to the string subdirectory.
BUG=247723 TEST=no change Review URL: https://chromiumcodereview.appspot.com/16331011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@205050 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/strings')
-rw-r--r--base/strings/string16.cc82
-rw-r--r--base/strings/string16.h189
-rw-r--r--base/strings/string16_unittest.cc54
-rw-r--r--base/strings/string_util.cc1011
-rw-r--r--base/strings/string_util.h576
-rw-r--r--base/strings/string_util_constants.cc55
-rw-r--r--base/strings/string_util_posix.h53
-rw-r--r--base/strings/string_util_unittest.cc1191
-rw-r--r--base/strings/string_util_win.h61
-rw-r--r--base/strings/stringprintf.cc186
-rw-r--r--base/strings/stringprintf.h62
-rw-r--r--base/strings/stringprintf_unittest.cc188
12 files changed, 3708 insertions, 0 deletions
diff --git a/base/strings/string16.cc b/base/strings/string16.cc
new file mode 100644
index 0000000..c802eef
--- /dev/null
+++ b/base/strings/string16.cc
@@ -0,0 +1,82 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string16.h"
+
+#if defined(WCHAR_T_IS_UTF16)
+
+#error This file should not be used on 2-byte wchar_t systems
+// If this winds up being needed on 2-byte wchar_t systems, either the
+// definitions below can be used, or the host system's wide character
+// functions like wmemcmp can be wrapped.
+
+#elif defined(WCHAR_T_IS_UTF32)
+
+#include <ostream>
+
+#include "base/strings/utf_string_conversions.h"
+
+namespace base {
+
+int c16memcmp(const char16* s1, const char16* s2, size_t n) {
+ // We cannot call memcmp because that changes the semantics.
+ while (n-- > 0) {
+ if (*s1 != *s2) {
+ // We cannot use (*s1 - *s2) because char16 is unsigned.
+ return ((*s1 < *s2) ? -1 : 1);
+ }
+ ++s1;
+ ++s2;
+ }
+ return 0;
+}
+
+size_t c16len(const char16* s) {
+ const char16 *s_orig = s;
+ while (*s) {
+ ++s;
+ }
+ return s - s_orig;
+}
+
+const char16* c16memchr(const char16* s, char16 c, size_t n) {
+ while (n-- > 0) {
+ if (*s == c) {
+ return s;
+ }
+ ++s;
+ }
+ return 0;
+}
+
+char16* c16memmove(char16* s1, const char16* s2, size_t n) {
+ return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
+}
+
+char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
+ return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
+}
+
+char16* c16memset(char16* s, char16 c, size_t n) {
+ char16 *s_orig = s;
+ while (n-- > 0) {
+ *s = c;
+ ++s;
+ }
+ return s_orig;
+}
+
+std::ostream& operator<<(std::ostream& out, const string16& str) {
+ return out << UTF16ToUTF8(str);
+}
+
+void PrintTo(const string16& str, std::ostream* out) {
+ *out << str;
+}
+
+} // namespace base
+
+template class std::basic_string<char16, base::string16_char_traits>;
+
+#endif // WCHAR_T_IS_UTF32
diff --git a/base/strings/string16.h b/base/strings/string16.h
new file mode 100644
index 0000000..fd98f1b
--- /dev/null
+++ b/base/strings/string16.h
@@ -0,0 +1,189 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING16_H_
+#define BASE_STRINGS_STRING16_H_
+
+// WHAT:
+// A version of std::basic_string that provides 2-byte characters even when
+// wchar_t is not implemented as a 2-byte type. You can access this class as
+// string16. We also define char16, which string16 is based upon.
+//
+// WHY:
+// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
+// data. Plenty of existing code operates on strings encoded as UTF-16.
+//
+// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
+// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
+// at run time, because it calls some functions (like wcslen) that come from
+// the system's native C library -- which was built with a 4-byte wchar_t!
+// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
+// entirely improper on those systems where the encoding of wchar_t is defined
+// as UTF-32.
+//
+// Here, we define string16, which is similar to std::wstring but replaces all
+// libc functions with custom, 2-byte-char compatible routines. It is capable
+// of carrying UTF-16-encoded data.
+
+#include <stdio.h>
+#include <string>
+
+#include "base/base_export.h"
+#include "base/basictypes.h"
+
+#if defined(WCHAR_T_IS_UTF16)
+
+namespace base {
+
+typedef wchar_t char16;
+typedef std::wstring string16;
+typedef std::char_traits<wchar_t> string16_char_traits;
+
+} // namespace base
+
+#elif defined(WCHAR_T_IS_UTF32)
+
+namespace base {
+
+typedef uint16 char16;
+
+// char16 versions of the functions required by string16_char_traits; these
+// are based on the wide character functions of similar names ("w" or "wcs"
+// instead of "c16").
+BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
+BASE_EXPORT size_t c16len(const char16* s);
+BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
+BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
+BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
+BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
+
+struct string16_char_traits {
+ typedef char16 char_type;
+ typedef int int_type;
+
+ // int_type needs to be able to hold each possible value of char_type, and in
+ // addition, the distinct value of eof().
+ COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width);
+
+ typedef std::streamoff off_type;
+ typedef mbstate_t state_type;
+ typedef std::fpos<state_type> pos_type;
+
+ static void assign(char_type& c1, const char_type& c2) {
+ c1 = c2;
+ }
+
+ static bool eq(const char_type& c1, const char_type& c2) {
+ return c1 == c2;
+ }
+ static bool lt(const char_type& c1, const char_type& c2) {
+ return c1 < c2;
+ }
+
+ static int compare(const char_type* s1, const char_type* s2, size_t n) {
+ return c16memcmp(s1, s2, n);
+ }
+
+ static size_t length(const char_type* s) {
+ return c16len(s);
+ }
+
+ static const char_type* find(const char_type* s, size_t n,
+ const char_type& a) {
+ return c16memchr(s, a, n);
+ }
+
+ static char_type* move(char_type* s1, const char_type* s2, int_type n) {
+ return c16memmove(s1, s2, n);
+ }
+
+ static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
+ return c16memcpy(s1, s2, n);
+ }
+
+ static char_type* assign(char_type* s, size_t n, char_type a) {
+ return c16memset(s, a, n);
+ }
+
+ static int_type not_eof(const int_type& c) {
+ return eq_int_type(c, eof()) ? 0 : c;
+ }
+
+ static char_type to_char_type(const int_type& c) {
+ return char_type(c);
+ }
+
+ static int_type to_int_type(const char_type& c) {
+ return int_type(c);
+ }
+
+ static bool eq_int_type(const int_type& c1, const int_type& c2) {
+ return c1 == c2;
+ }
+
+ static int_type eof() {
+ return static_cast<int_type>(EOF);
+ }
+};
+
+typedef std::basic_string<char16, base::string16_char_traits> string16;
+
+BASE_EXPORT extern std::ostream& operator<<(std::ostream& out,
+ const string16& str);
+
+// This is required by googletest to print a readable output on test failures.
+BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out);
+
+} // namespace base
+
+// The string class will be explicitly instantiated only once, in string16.cc.
+//
+// std::basic_string<> in GNU libstdc++ contains a static data member,
+// _S_empty_rep_storage, to represent empty strings. When an operation such
+// as assignment or destruction is performed on a string, causing its existing
+// data member to be invalidated, it must not be freed if this static data
+// member is being used. Otherwise, it counts as an attempt to free static
+// (and not allocated) data, which is a memory error.
+//
+// Generally, due to C++ template magic, _S_empty_rep_storage will be marked
+// as a coalesced symbol, meaning that the linker will combine multiple
+// instances into a single one when generating output.
+//
+// If a string class is used by multiple shared libraries, a problem occurs.
+// Each library will get its own copy of _S_empty_rep_storage. When strings
+// are passed across a library boundary for alteration or destruction, memory
+// errors will result. GNU libstdc++ contains a configuration option,
+// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
+// disables the static data member optimization, but it's a good optimization
+// and non-STL code is generally at the mercy of the system's STL
+// configuration. Fully-dynamic strings are not the default for GNU libstdc++
+// libstdc++ itself or for the libstdc++ installations on the systems we care
+// about, such as Mac OS X and relevant flavors of Linux.
+//
+// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
+//
+// To avoid problems, string classes need to be explicitly instantiated only
+// once, in exactly one library. All other string users see it via an "extern"
+// declaration. This is precisely how GNU libstdc++ handles
+// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
+//
+// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
+// in which the linker does not fully coalesce symbols when dead code
+// stripping is enabled. This bug causes the memory errors described above
+// to occur even when a std::basic_string<> does not cross shared library
+// boundaries, such as in statically-linked executables.
+//
+// TODO(mark): File this bug with Apple and update this note with a bug number.
+
+extern template
+class BASE_EXPORT std::basic_string<base::char16, base::string16_char_traits>;
+
+#endif // WCHAR_T_IS_UTF32
+
+// TODO(brettw) update users of string16 to use the namespace and remove
+// this "using".
+using base::char16;
+using base::string16;
+
+#endif // BASE_STRINGS_STRING16_H_
diff --git a/base/strings/string16_unittest.cc b/base/strings/string16_unittest.cc
new file mode 100644
index 0000000..d98b2a9
--- /dev/null
+++ b/base/strings/string16_unittest.cc
@@ -0,0 +1,54 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <sstream>
+
+#include "base/strings/string16.h"
+
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+#if defined(WCHAR_T_IS_UTF32)
+
+// We define a custom operator<< for string16 so we can use it with logging.
+// This tests that conversion.
+TEST(String16Test, OutputStream) {
+ // Basic stream test.
+ {
+ std::ostringstream stream;
+ stream << "Empty '" << string16() << "' standard '"
+ << string16(ASCIIToUTF16("Hello, world")) << "'";
+ EXPECT_STREQ("Empty '' standard 'Hello, world'",
+ stream.str().c_str());
+ }
+
+ // Interesting edge cases.
+ {
+ // These should each get converted to the invalid character: EF BF BD.
+ string16 initial_surrogate;
+ initial_surrogate.push_back(0xd800);
+ string16 final_surrogate;
+ final_surrogate.push_back(0xdc00);
+
+ // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
+ string16 surrogate_pair;
+ surrogate_pair.push_back(0xd800);
+ surrogate_pair.push_back(0xdf00);
+ surrogate_pair.push_back('z');
+
+ // Will get converted to the invalid char + 's': EF BF BD 's'.
+ string16 unterminated_surrogate;
+ unterminated_surrogate.push_back(0xd800);
+ unterminated_surrogate.push_back('s');
+
+ std::ostringstream stream;
+ stream << initial_surrogate << "," << final_surrogate << ","
+ << surrogate_pair << "," << unterminated_surrogate;
+
+ EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
+ stream.str().c_str());
+ }
+}
+
+#endif
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
new file mode 100644
index 0000000..3ed7069
--- /dev/null
+++ b/base/strings/string_util.cc
@@ -0,0 +1,1011 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_util.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/memory/singleton.h"
+#include "base/strings/utf_string_conversion_utils.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/third_party/icu/icu_utf.h"
+#include "build/build_config.h"
+
+namespace {
+
+// Force the singleton used by Empty[W]String[16] to be a unique type. This
+// prevents other code that might accidentally use Singleton<string> from
+// getting our internal one.
+struct EmptyStrings {
+ EmptyStrings() {}
+ const std::string s;
+ const std::wstring ws;
+ const string16 s16;
+
+ static EmptyStrings* GetInstance() {
+ return Singleton<EmptyStrings>::get();
+ }
+};
+
+// Used by ReplaceStringPlaceholders to track the position in the string of
+// replaced parameters.
+struct ReplacementOffset {
+ ReplacementOffset(uintptr_t parameter, size_t offset)
+ : parameter(parameter),
+ offset(offset) {}
+
+ // Index of the parameter.
+ uintptr_t parameter;
+
+ // Starting position in the string.
+ size_t offset;
+};
+
+static bool CompareParameter(const ReplacementOffset& elem1,
+ const ReplacementOffset& elem2) {
+ return elem1.parameter < elem2.parameter;
+}
+
+} // namespace
+
+namespace base {
+
+bool IsWprintfFormatPortable(const wchar_t* format) {
+ for (const wchar_t* position = format; *position != '\0'; ++position) {
+ if (*position == '%') {
+ bool in_specification = true;
+ bool modifier_l = false;
+ while (in_specification) {
+ // Eat up characters until reaching a known specifier.
+ if (*++position == '\0') {
+ // The format string ended in the middle of a specification. Call
+ // it portable because no unportable specifications were found. The
+ // string is equally broken on all platforms.
+ return true;
+ }
+
+ if (*position == 'l') {
+ // 'l' is the only thing that can save the 's' and 'c' specifiers.
+ modifier_l = true;
+ } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
+ *position == 'S' || *position == 'C' || *position == 'F' ||
+ *position == 'D' || *position == 'O' || *position == 'U') {
+ // Not portable.
+ return false;
+ }
+
+ if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
+ // Portable, keep scanning the rest of the format string.
+ in_specification = false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+} // namespace base
+
+
+const std::string& EmptyString() {
+ return EmptyStrings::GetInstance()->s;
+}
+
+const std::wstring& EmptyWString() {
+ return EmptyStrings::GetInstance()->ws;
+}
+
+const string16& EmptyString16() {
+ return EmptyStrings::GetInstance()->s16;
+}
+
+template<typename STR>
+bool ReplaceCharsT(const STR& input,
+ const typename STR::value_type replace_chars[],
+ const STR& replace_with,
+ STR* output) {
+ bool removed = false;
+ size_t replace_length = replace_with.length();
+
+ *output = input;
+
+ size_t found = output->find_first_of(replace_chars);
+ while (found != STR::npos) {
+ removed = true;
+ output->replace(found, 1, replace_with);
+ found = output->find_first_of(replace_chars, found + replace_length);
+ }
+
+ return removed;
+}
+
+bool ReplaceChars(const string16& input,
+ const char16 replace_chars[],
+ const string16& replace_with,
+ string16* output) {
+ return ReplaceCharsT(input, replace_chars, replace_with, output);
+}
+
+bool ReplaceChars(const std::string& input,
+ const char replace_chars[],
+ const std::string& replace_with,
+ std::string* output) {
+ return ReplaceCharsT(input, replace_chars, replace_with, output);
+}
+
+bool RemoveChars(const string16& input,
+ const char16 remove_chars[],
+ string16* output) {
+ return ReplaceChars(input, remove_chars, string16(), output);
+}
+
+bool RemoveChars(const std::string& input,
+ const char remove_chars[],
+ std::string* output) {
+ return ReplaceChars(input, remove_chars, std::string(), output);
+}
+
+template<typename STR>
+TrimPositions TrimStringT(const STR& input,
+ const typename STR::value_type trim_chars[],
+ TrimPositions positions,
+ STR* output) {
+ // Find the edges of leading/trailing whitespace as desired.
+ const typename STR::size_type last_char = input.length() - 1;
+ const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
+ input.find_first_not_of(trim_chars) : 0;
+ const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
+ input.find_last_not_of(trim_chars) : last_char;
+
+ // When the string was all whitespace, report that we stripped off whitespace
+ // from whichever position the caller was interested in. For empty input, we
+ // stripped no whitespace, but we still need to clear |output|.
+ if (input.empty() ||
+ (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
+ bool input_was_empty = input.empty(); // in case output == &input
+ output->clear();
+ return input_was_empty ? TRIM_NONE : positions;
+ }
+
+ // Trim the whitespace.
+ *output =
+ input.substr(first_good_char, last_good_char - first_good_char + 1);
+
+ // Return where we trimmed from.
+ return static_cast<TrimPositions>(
+ ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
+ ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
+}
+
+bool TrimString(const std::wstring& input,
+ const wchar_t trim_chars[],
+ std::wstring* output) {
+ return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool TrimString(const string16& input,
+ const char16 trim_chars[],
+ string16* output) {
+ return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
+}
+#endif
+
+bool TrimString(const std::string& input,
+ const char trim_chars[],
+ std::string* output) {
+ return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
+}
+
+void TruncateUTF8ToByteSize(const std::string& input,
+ const size_t byte_size,
+ std::string* output) {
+ DCHECK(output);
+ if (byte_size > input.length()) {
+ *output = input;
+ return;
+ }
+ DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
+ // Note: This cast is necessary because CBU8_NEXT uses int32s.
+ int32 truncation_length = static_cast<int32>(byte_size);
+ int32 char_index = truncation_length - 1;
+ const char* data = input.data();
+
+ // Using CBU8, we will move backwards from the truncation point
+ // to the beginning of the string looking for a valid UTF8
+ // character. Once a full UTF8 character is found, we will
+ // truncate the string to the end of that character.
+ while (char_index >= 0) {
+ int32 prev = char_index;
+ uint32 code_point = 0;
+ CBU8_NEXT(data, char_index, truncation_length, code_point);
+ if (!base::IsValidCharacter(code_point) ||
+ !base::IsValidCodepoint(code_point)) {
+ char_index = prev - 1;
+ } else {
+ break;
+ }
+ }
+
+ if (char_index >= 0 )
+ *output = input.substr(0, char_index);
+ else
+ output->clear();
+}
+
+TrimPositions TrimWhitespace(const string16& input,
+ TrimPositions positions,
+ string16* output) {
+ return TrimStringT(input, kWhitespaceUTF16, positions, output);
+}
+
+TrimPositions TrimWhitespaceASCII(const std::string& input,
+ TrimPositions positions,
+ std::string* output) {
+ return TrimStringT(input, kWhitespaceASCII, positions, output);
+}
+
+// This function is only for backward-compatibility.
+// To be removed when all callers are updated.
+TrimPositions TrimWhitespace(const std::string& input,
+ TrimPositions positions,
+ std::string* output) {
+ return TrimWhitespaceASCII(input, positions, output);
+}
+
+template<typename STR>
+STR CollapseWhitespaceT(const STR& text,
+ bool trim_sequences_with_line_breaks) {
+ STR result;
+ result.resize(text.size());
+
+ // Set flags to pretend we're already in a trimmed whitespace sequence, so we
+ // will trim any leading whitespace.
+ bool in_whitespace = true;
+ bool already_trimmed = true;
+
+ int chars_written = 0;
+ for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
+ if (IsWhitespace(*i)) {
+ if (!in_whitespace) {
+ // Reduce all whitespace sequences to a single space.
+ in_whitespace = true;
+ result[chars_written++] = L' ';
+ }
+ if (trim_sequences_with_line_breaks && !already_trimmed &&
+ ((*i == '\n') || (*i == '\r'))) {
+ // Whitespace sequences containing CR or LF are eliminated entirely.
+ already_trimmed = true;
+ --chars_written;
+ }
+ } else {
+ // Non-whitespace chracters are copied straight across.
+ in_whitespace = false;
+ already_trimmed = false;
+ result[chars_written++] = *i;
+ }
+ }
+
+ if (in_whitespace && !already_trimmed) {
+ // Any trailing whitespace is eliminated.
+ --chars_written;
+ }
+
+ result.resize(chars_written);
+ return result;
+}
+
+std::wstring CollapseWhitespace(const std::wstring& text,
+ bool trim_sequences_with_line_breaks) {
+ return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+string16 CollapseWhitespace(const string16& text,
+ bool trim_sequences_with_line_breaks) {
+ return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+}
+#endif
+
+std::string CollapseWhitespaceASCII(const std::string& text,
+ bool trim_sequences_with_line_breaks) {
+ return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
+}
+
+bool ContainsOnlyWhitespaceASCII(const std::string& str) {
+ for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
+ if (!IsAsciiWhitespace(*i))
+ return false;
+ }
+ return true;
+}
+
+bool ContainsOnlyWhitespace(const string16& str) {
+ return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
+}
+
+template<typename STR>
+static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
+ for (typename STR::const_iterator iter = input.begin();
+ iter != input.end(); ++iter) {
+ if (characters.find(*iter) == STR::npos)
+ return false;
+ }
+ return true;
+}
+
+bool ContainsOnlyChars(const std::wstring& input,
+ const std::wstring& characters) {
+ return ContainsOnlyCharsT(input, characters);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool ContainsOnlyChars(const string16& input, const string16& characters) {
+ return ContainsOnlyCharsT(input, characters);
+}
+#endif
+
+bool ContainsOnlyChars(const std::string& input,
+ const std::string& characters) {
+ return ContainsOnlyCharsT(input, characters);
+}
+
+std::string WideToASCII(const std::wstring& wide) {
+ DCHECK(IsStringASCII(wide)) << wide;
+ return std::string(wide.begin(), wide.end());
+}
+
+std::string UTF16ToASCII(const string16& utf16) {
+ DCHECK(IsStringASCII(utf16)) << utf16;
+ return std::string(utf16.begin(), utf16.end());
+}
+
+// Latin1 is just the low range of Unicode, so we can copy directly to convert.
+bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
+ std::string output;
+ output.resize(wide.size());
+ latin1->clear();
+ for (size_t i = 0; i < wide.size(); i++) {
+ if (wide[i] > 255)
+ return false;
+ output[i] = static_cast<char>(wide[i]);
+ }
+ latin1->swap(output);
+ return true;
+}
+
+template<class STR>
+static bool DoIsStringASCII(const STR& str) {
+ for (size_t i = 0; i < str.length(); i++) {
+ typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
+ if (c > 0x7F)
+ return false;
+ }
+ return true;
+}
+
+bool IsStringASCII(const std::wstring& str) {
+ return DoIsStringASCII(str);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool IsStringASCII(const string16& str) {
+ return DoIsStringASCII(str);
+}
+#endif
+
+bool IsStringASCII(const base::StringPiece& str) {
+ return DoIsStringASCII(str);
+}
+
+bool IsStringUTF8(const std::string& str) {
+ const char *src = str.data();
+ int32 src_len = static_cast<int32>(str.length());
+ int32 char_index = 0;
+
+ while (char_index < src_len) {
+ int32 code_point;
+ CBU8_NEXT(src, char_index, src_len, code_point);
+ if (!base::IsValidCharacter(code_point))
+ return false;
+ }
+ return true;
+}
+
+template<typename Iter>
+static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
+ Iter a_end,
+ const char* b) {
+ for (Iter it = a_begin; it != a_end; ++it, ++b) {
+ if (!*b || base::ToLowerASCII(*it) != *b)
+ return false;
+ }
+ return *b == 0;
+}
+
+// Front-ends for LowerCaseEqualsASCII.
+bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
+ return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
+}
+
+bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
+ return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool LowerCaseEqualsASCII(const string16& a, const char* b) {
+ return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
+}
+#endif
+
+bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
+ std::string::const_iterator a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
+ std::wstring::const_iterator a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
+ string16::const_iterator a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+#endif
+
+// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
+#if !defined(OS_ANDROID)
+bool LowerCaseEqualsASCII(const char* a_begin,
+ const char* a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+bool LowerCaseEqualsASCII(const wchar_t* a_begin,
+ const wchar_t* a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool LowerCaseEqualsASCII(const char16* a_begin,
+ const char16* a_end,
+ const char* b) {
+ return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+#endif
+
+#endif // !defined(OS_ANDROID)
+
+bool EqualsASCII(const string16& a, const base::StringPiece& b) {
+ if (a.length() != b.length())
+ return false;
+ return std::equal(b.begin(), b.end(), a.begin());
+}
+
+bool StartsWithASCII(const std::string& str,
+ const std::string& search,
+ bool case_sensitive) {
+ if (case_sensitive)
+ return str.compare(0, search.length(), search) == 0;
+ else
+ return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
+}
+
+template <typename STR>
+bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
+ if (case_sensitive) {
+ return str.compare(0, search.length(), search) == 0;
+ } else {
+ if (search.size() > str.size())
+ return false;
+ return std::equal(search.begin(), search.end(), str.begin(),
+ base::CaseInsensitiveCompare<typename STR::value_type>());
+ }
+}
+
+bool StartsWith(const std::wstring& str, const std::wstring& search,
+ bool case_sensitive) {
+ return StartsWithT(str, search, case_sensitive);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool StartsWith(const string16& str, const string16& search,
+ bool case_sensitive) {
+ return StartsWithT(str, search, case_sensitive);
+}
+#endif
+
+template <typename STR>
+bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
+ typename STR::size_type str_length = str.length();
+ typename STR::size_type search_length = search.length();
+ if (search_length > str_length)
+ return false;
+ if (case_sensitive) {
+ return str.compare(str_length - search_length, search_length, search) == 0;
+ } else {
+ return std::equal(search.begin(), search.end(),
+ str.begin() + (str_length - search_length),
+ base::CaseInsensitiveCompare<typename STR::value_type>());
+ }
+}
+
+bool EndsWith(const std::string& str, const std::string& search,
+ bool case_sensitive) {
+ return EndsWithT(str, search, case_sensitive);
+}
+
+bool EndsWith(const std::wstring& str, const std::wstring& search,
+ bool case_sensitive) {
+ return EndsWithT(str, search, case_sensitive);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+bool EndsWith(const string16& str, const string16& search,
+ bool case_sensitive) {
+ return EndsWithT(str, search, case_sensitive);
+}
+#endif
+
+static const char* const kByteStringsUnlocalized[] = {
+ " B",
+ " kB",
+ " MB",
+ " GB",
+ " TB",
+ " PB"
+};
+
+string16 FormatBytesUnlocalized(int64 bytes) {
+ double unit_amount = static_cast<double>(bytes);
+ size_t dimension = 0;
+ const int kKilo = 1024;
+ while (unit_amount >= kKilo &&
+ dimension < arraysize(kByteStringsUnlocalized) - 1) {
+ unit_amount /= kKilo;
+ dimension++;
+ }
+
+ char buf[64];
+ if (bytes != 0 && dimension > 0 && unit_amount < 100) {
+ base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
+ kByteStringsUnlocalized[dimension]);
+ } else {
+ base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
+ kByteStringsUnlocalized[dimension]);
+ }
+
+ return ASCIIToUTF16(buf);
+}
+
+template<class StringType>
+void DoReplaceSubstringsAfterOffset(StringType* str,
+ typename StringType::size_type start_offset,
+ const StringType& find_this,
+ const StringType& replace_with,
+ bool replace_all) {
+ if ((start_offset == StringType::npos) || (start_offset >= str->length()))
+ return;
+
+ DCHECK(!find_this.empty());
+ for (typename StringType::size_type offs(str->find(find_this, start_offset));
+ offs != StringType::npos; offs = str->find(find_this, offs)) {
+ str->replace(offs, find_this.length(), replace_with);
+ offs += replace_with.length();
+
+ if (!replace_all)
+ break;
+ }
+}
+
+void ReplaceFirstSubstringAfterOffset(string16* str,
+ string16::size_type start_offset,
+ const string16& find_this,
+ const string16& replace_with) {
+ DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
+ false); // replace first instance
+}
+
+void ReplaceFirstSubstringAfterOffset(std::string* str,
+ std::string::size_type start_offset,
+ const std::string& find_this,
+ const std::string& replace_with) {
+ DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
+ false); // replace first instance
+}
+
+void ReplaceSubstringsAfterOffset(string16* str,
+ string16::size_type start_offset,
+ const string16& find_this,
+ const string16& replace_with) {
+ DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
+ true); // replace all instances
+}
+
+void ReplaceSubstringsAfterOffset(std::string* str,
+ std::string::size_type start_offset,
+ const std::string& find_this,
+ const std::string& replace_with) {
+ DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
+ true); // replace all instances
+}
+
+
+template<typename STR>
+static size_t TokenizeT(const STR& str,
+ const STR& delimiters,
+ std::vector<STR>* tokens) {
+ tokens->clear();
+
+ typename STR::size_type start = str.find_first_not_of(delimiters);
+ while (start != STR::npos) {
+ typename STR::size_type end = str.find_first_of(delimiters, start + 1);
+ if (end == STR::npos) {
+ tokens->push_back(str.substr(start));
+ break;
+ } else {
+ tokens->push_back(str.substr(start, end - start));
+ start = str.find_first_not_of(delimiters, end + 1);
+ }
+ }
+
+ return tokens->size();
+}
+
+size_t Tokenize(const std::wstring& str,
+ const std::wstring& delimiters,
+ std::vector<std::wstring>* tokens) {
+ return TokenizeT(str, delimiters, tokens);
+}
+
+#if !defined(WCHAR_T_IS_UTF16)
+size_t Tokenize(const string16& str,
+ const string16& delimiters,
+ std::vector<string16>* tokens) {
+ return TokenizeT(str, delimiters, tokens);
+}
+#endif
+
+size_t Tokenize(const std::string& str,
+ const std::string& delimiters,
+ std::vector<std::string>* tokens) {
+ return TokenizeT(str, delimiters, tokens);
+}
+
+size_t Tokenize(const base::StringPiece& str,
+ const base::StringPiece& delimiters,
+ std::vector<base::StringPiece>* tokens) {
+ return TokenizeT(str, delimiters, tokens);
+}
+
+template<typename STR>
+static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
+ if (parts.empty())
+ return STR();
+
+ STR result(parts[0]);
+ typename std::vector<STR>::const_iterator iter = parts.begin();
+ ++iter;
+
+ for (; iter != parts.end(); ++iter) {
+ result += sep;
+ result += *iter;
+ }
+
+ return result;
+}
+
+std::string JoinString(const std::vector<std::string>& parts, char sep) {
+ return JoinStringT(parts, std::string(1, sep));
+}
+
+string16 JoinString(const std::vector<string16>& parts, char16 sep) {
+ return JoinStringT(parts, string16(1, sep));
+}
+
+std::string JoinString(const std::vector<std::string>& parts,
+ const std::string& separator) {
+ return JoinStringT(parts, separator);
+}
+
+string16 JoinString(const std::vector<string16>& parts,
+ const string16& separator) {
+ return JoinStringT(parts, separator);
+}
+
+template<class FormatStringType, class OutStringType>
+OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
+ const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
+ size_t substitutions = subst.size();
+
+ size_t sub_length = 0;
+ for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
+ iter != subst.end(); ++iter) {
+ sub_length += iter->length();
+ }
+
+ OutStringType formatted;
+ formatted.reserve(format_string.length() + sub_length);
+
+ std::vector<ReplacementOffset> r_offsets;
+ for (typename FormatStringType::const_iterator i = format_string.begin();
+ i != format_string.end(); ++i) {
+ if ('$' == *i) {
+ if (i + 1 != format_string.end()) {
+ ++i;
+ DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
+ if ('$' == *i) {
+ while (i != format_string.end() && '$' == *i) {
+ formatted.push_back('$');
+ ++i;
+ }
+ --i;
+ } else {
+ uintptr_t index = 0;
+ while (i != format_string.end() && '0' <= *i && *i <= '9') {
+ index *= 10;
+ index += *i - '0';
+ ++i;
+ }
+ --i;
+ index -= 1;
+ if (offsets) {
+ ReplacementOffset r_offset(index,
+ static_cast<int>(formatted.size()));
+ r_offsets.insert(std::lower_bound(r_offsets.begin(),
+ r_offsets.end(),
+ r_offset,
+ &CompareParameter),
+ r_offset);
+ }
+ if (index < substitutions)
+ formatted.append(subst.at(index));
+ }
+ }
+ } else {
+ formatted.push_back(*i);
+ }
+ }
+ if (offsets) {
+ for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
+ i != r_offsets.end(); ++i) {
+ offsets->push_back(i->offset);
+ }
+ }
+ return formatted;
+}
+
+string16 ReplaceStringPlaceholders(const string16& format_string,
+ const std::vector<string16>& subst,
+ std::vector<size_t>* offsets) {
+ return DoReplaceStringPlaceholders(format_string, subst, offsets);
+}
+
+std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
+ const std::vector<std::string>& subst,
+ std::vector<size_t>* offsets) {
+ return DoReplaceStringPlaceholders(format_string, subst, offsets);
+}
+
+string16 ReplaceStringPlaceholders(const string16& format_string,
+ const string16& a,
+ size_t* offset) {
+ std::vector<size_t> offsets;
+ std::vector<string16> subst;
+ subst.push_back(a);
+ string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
+
+ DCHECK(offsets.size() == 1);
+ if (offset) {
+ *offset = offsets[0];
+ }
+ return result;
+}
+
+static bool IsWildcard(base_icu::UChar32 character) {
+ return character == '*' || character == '?';
+}
+
+// Move the strings pointers to the point where they start to differ.
+template <typename CHAR, typename NEXT>
+static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
+ const CHAR** string, const CHAR* string_end,
+ NEXT next) {
+ const CHAR* escape = NULL;
+ while (*pattern != pattern_end && *string != string_end) {
+ if (!escape && IsWildcard(**pattern)) {
+ // We don't want to match wildcard here, except if it's escaped.
+ return;
+ }
+
+ // Check if the escapement char is found. If so, skip it and move to the
+ // next character.
+ if (!escape && **pattern == '\\') {
+ escape = *pattern;
+ next(pattern, pattern_end);
+ continue;
+ }
+
+ // Check if the chars match, if so, increment the ptrs.
+ const CHAR* pattern_next = *pattern;
+ const CHAR* string_next = *string;
+ base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
+ if (pattern_char == next(&string_next, string_end) &&
+ pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
+ *pattern = pattern_next;
+ *string = string_next;
+ } else {
+ // Uh ho, it did not match, we are done. If the last char was an
+ // escapement, that means that it was an error to advance the ptr here,
+ // let's put it back where it was. This also mean that the MatchPattern
+ // function will return false because if we can't match an escape char
+ // here, then no one will.
+ if (escape) {
+ *pattern = escape;
+ }
+ return;
+ }
+
+ escape = NULL;
+ }
+}
+
+template <typename CHAR, typename NEXT>
+static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
+ while (*pattern != end) {
+ if (!IsWildcard(**pattern))
+ return;
+ next(pattern, end);
+ }
+}
+
+template <typename CHAR, typename NEXT>
+static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
+ const CHAR* pattern, const CHAR* pattern_end,
+ int depth,
+ NEXT next) {
+ const int kMaxDepth = 16;
+ if (depth > kMaxDepth)
+ return false;
+
+ // Eat all the matching chars.
+ EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
+
+ // If the string is empty, then the pattern must be empty too, or contains
+ // only wildcards.
+ if (eval == eval_end) {
+ EatWildcard(&pattern, pattern_end, next);
+ return pattern == pattern_end;
+ }
+
+ // Pattern is empty but not string, this is not a match.
+ if (pattern == pattern_end)
+ return false;
+
+ // If this is a question mark, then we need to compare the rest with
+ // the current string or the string with one character eaten.
+ const CHAR* next_pattern = pattern;
+ next(&next_pattern, pattern_end);
+ if (pattern[0] == '?') {
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ const CHAR* next_eval = eval;
+ next(&next_eval, eval_end);
+ if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ }
+
+ // This is a *, try to match all the possible substrings with the remainder
+ // of the pattern.
+ if (pattern[0] == '*') {
+ // Collapse duplicate wild cards (********** into *) so that the
+ // method does not recurse unnecessarily. http://crbug.com/52839
+ EatWildcard(&next_pattern, pattern_end, next);
+
+ while (eval != eval_end) {
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ eval++;
+ }
+
+ // We reached the end of the string, let see if the pattern contains only
+ // wildcards.
+ if (eval == eval_end) {
+ EatWildcard(&pattern, pattern_end, next);
+ if (pattern != pattern_end)
+ return false;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct NextCharUTF8 {
+ base_icu::UChar32 operator()(const char** p, const char* end) {
+ base_icu::UChar32 c;
+ int offset = 0;
+ CBU8_NEXT(*p, offset, end - *p, c);
+ *p += offset;
+ return c;
+ }
+};
+
+struct NextCharUTF16 {
+ base_icu::UChar32 operator()(const char16** p, const char16* end) {
+ base_icu::UChar32 c;
+ int offset = 0;
+ CBU16_NEXT(*p, offset, end - *p, c);
+ *p += offset;
+ return c;
+ }
+};
+
+bool MatchPattern(const base::StringPiece& eval,
+ const base::StringPiece& pattern) {
+ return MatchPatternT(eval.data(), eval.data() + eval.size(),
+ pattern.data(), pattern.data() + pattern.size(),
+ 0, NextCharUTF8());
+}
+
+bool MatchPattern(const string16& eval, const string16& pattern) {
+ return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
+ pattern.c_str(), pattern.c_str() + pattern.size(),
+ 0, NextCharUTF16());
+}
+
+// The following code is compatible with the OpenBSD lcpy interface. See:
+// http://www.gratisoft.us/todd/papers/strlcpy.html
+// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
+
+namespace {
+
+template <typename CHAR>
+size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
+ for (size_t i = 0; i < dst_size; ++i) {
+ if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
+ return i;
+ }
+
+ // We were left off at dst_size. We over copied 1 byte. Null terminate.
+ if (dst_size != 0)
+ dst[dst_size - 1] = 0;
+
+ // Count the rest of the |src|, and return it's length in characters.
+ while (src[dst_size]) ++dst_size;
+ return dst_size;
+}
+
+} // namespace
+
+size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
+ return lcpyT<char>(dst, src, dst_size);
+}
+size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
+ return lcpyT<wchar_t>(dst, src, dst_size);
+}
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
new file mode 100644
index 0000000..7b4af7d
--- /dev/null
+++ b/base/strings/string_util.h
@@ -0,0 +1,576 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This file defines utility functions for working with strings.
+
+#ifndef BASE_STRINGS_STRING_UTIL_H_
+#define BASE_STRINGS_STRING_UTIL_H_
+
+#include <ctype.h>
+#include <stdarg.h> // va_list
+
+#include <string>
+#include <vector>
+
+#include "base/base_export.h"
+#include "base/basictypes.h"
+#include "base/compiler_specific.h"
+#include "base/string16.h"
+#include "base/strings/string_piece.h" // For implicit conversions.
+
+// Safe standard library wrappers for all platforms.
+
+namespace base {
+
+// C standard-library functions like "strncasecmp" and "snprintf" that aren't
+// cross-platform are provided as "base::strncasecmp", and their prototypes
+// are listed below. These functions are then implemented as inline calls
+// to the platform-specific equivalents in the platform-specific headers.
+
+// Compares the two strings s1 and s2 without regard to case using
+// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
+// s2 > s1 according to a lexicographic comparison.
+int strcasecmp(const char* s1, const char* s2);
+
+// Compares up to count characters of s1 and s2 without regard to case using
+// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
+// s2 > s1 according to a lexicographic comparison.
+int strncasecmp(const char* s1, const char* s2, size_t count);
+
+// Same as strncmp but for char16 strings.
+int strncmp16(const char16* s1, const char16* s2, size_t count);
+
+// Wrapper for vsnprintf that always null-terminates and always returns the
+// number of characters that would be in an untruncated formatted
+// string, even when truncation occurs.
+int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
+ PRINTF_FORMAT(3, 0);
+
+// vswprintf always null-terminates, but when truncation occurs, it will either
+// return -1 or the number of characters that would be in an untruncated
+// formatted string. The actual return value depends on the underlying
+// C library's vswprintf implementation.
+int vswprintf(wchar_t* buffer, size_t size,
+ const wchar_t* format, va_list arguments)
+ WPRINTF_FORMAT(3, 0);
+
+// Some of these implementations need to be inlined.
+
+// We separate the declaration from the implementation of this inline
+// function just so the PRINTF_FORMAT works.
+inline int snprintf(char* buffer, size_t size, const char* format, ...)
+ PRINTF_FORMAT(3, 4);
+inline int snprintf(char* buffer, size_t size, const char* format, ...) {
+ va_list arguments;
+ va_start(arguments, format);
+ int result = vsnprintf(buffer, size, format, arguments);
+ va_end(arguments);
+ return result;
+}
+
+// We separate the declaration from the implementation of this inline
+// function just so the WPRINTF_FORMAT works.
+inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...)
+ WPRINTF_FORMAT(3, 4);
+inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
+ va_list arguments;
+ va_start(arguments, format);
+ int result = vswprintf(buffer, size, format, arguments);
+ va_end(arguments);
+ return result;
+}
+
+// BSD-style safe and consistent string copy functions.
+// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
+// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
+// long as |dst_size| is not 0. Returns the length of |src| in characters.
+// If the return value is >= dst_size, then the output was truncated.
+// NOTE: All sizes are in number of characters, NOT in bytes.
+BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
+BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
+
+// Scan a wprintf format string to determine whether it's portable across a
+// variety of systems. This function only checks that the conversion
+// specifiers used by the format string are supported and have the same meaning
+// on a variety of systems. It doesn't check for other errors that might occur
+// within a format string.
+//
+// Nonportable conversion specifiers for wprintf are:
+// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char
+// data on all systems except Windows, which treat them as wchar_t data.
+// Use %ls and %lc for wchar_t data instead.
+// - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
+// which treat them as char data. Use %ls and %lc for wchar_t data
+// instead.
+// - 'F', which is not identified by Windows wprintf documentation.
+// - 'D', 'O', and 'U', which are deprecated and not available on all systems.
+// Use %ld, %lo, and %lu instead.
+//
+// Note that there is no portable conversion specifier for char data when
+// working with wprintf.
+//
+// This function is intended to be called from base::vswprintf.
+BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
+
+// ASCII-specific tolower. The standard library's tolower is locale sensitive,
+// so we don't want to use it here.
+template <class Char> inline Char ToLowerASCII(Char c) {
+ return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+
+// ASCII-specific toupper. The standard library's toupper is locale sensitive,
+// so we don't want to use it here.
+template <class Char> inline Char ToUpperASCII(Char c) {
+ return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
+}
+
+// Function objects to aid in comparing/searching strings.
+
+template<typename Char> struct CaseInsensitiveCompare {
+ public:
+ bool operator()(Char x, Char y) const {
+ // TODO(darin): Do we really want to do locale sensitive comparisons here?
+ // See http://crbug.com/24917
+ return tolower(x) == tolower(y);
+ }
+};
+
+template<typename Char> struct CaseInsensitiveCompareASCII {
+ public:
+ bool operator()(Char x, Char y) const {
+ return ToLowerASCII(x) == ToLowerASCII(y);
+ }
+};
+
+} // namespace base
+
+#if defined(OS_WIN)
+#include "base/string_util_win.h"
+#elif defined(OS_POSIX)
+#include "base/string_util_posix.h"
+#else
+#error Define string operations appropriately for your platform
+#endif
+
+// These threadsafe functions return references to globally unique empty
+// strings.
+//
+// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS.
+// There is only one case where you should use these: functions which need to
+// return a string by reference (e.g. as a class member accessor), and don't
+// have an empty string to use (e.g. in an error case). These should not be
+// used as initializers, function arguments, or return values for functions
+// which return by value or outparam.
+BASE_EXPORT const std::string& EmptyString();
+BASE_EXPORT const std::wstring& EmptyWString();
+BASE_EXPORT const string16& EmptyString16();
+
+BASE_EXPORT extern const wchar_t kWhitespaceWide[];
+BASE_EXPORT extern const char16 kWhitespaceUTF16[];
+BASE_EXPORT extern const char kWhitespaceASCII[];
+
+BASE_EXPORT extern const char kUtf8ByteOrderMark[];
+
+// Removes characters in |remove_chars| from anywhere in |input|. Returns true
+// if any characters were removed. |remove_chars| must be null-terminated.
+// NOTE: Safe to use the same variable for both |input| and |output|.
+BASE_EXPORT bool RemoveChars(const string16& input,
+ const char16 remove_chars[],
+ string16* output);
+BASE_EXPORT bool RemoveChars(const std::string& input,
+ const char remove_chars[],
+ std::string* output);
+
+// Replaces characters in |replace_chars| from anywhere in |input| with
+// |replace_with|. Each character in |replace_chars| will be replaced with
+// the |replace_with| string. Returns true if any characters were replaced.
+// |replace_chars| must be null-terminated.
+// NOTE: Safe to use the same variable for both |input| and |output|.
+BASE_EXPORT bool ReplaceChars(const string16& input,
+ const char16 replace_chars[],
+ const string16& replace_with,
+ string16* output);
+BASE_EXPORT bool ReplaceChars(const std::string& input,
+ const char replace_chars[],
+ const std::string& replace_with,
+ std::string* output);
+
+// Removes characters in |trim_chars| from the beginning and end of |input|.
+// |trim_chars| must be null-terminated.
+// NOTE: Safe to use the same variable for both |input| and |output|.
+BASE_EXPORT bool TrimString(const std::wstring& input,
+ const wchar_t trim_chars[],
+ std::wstring* output);
+BASE_EXPORT bool TrimString(const string16& input,
+ const char16 trim_chars[],
+ string16* output);
+BASE_EXPORT bool TrimString(const std::string& input,
+ const char trim_chars[],
+ std::string* output);
+
+// Truncates a string to the nearest UTF-8 character that will leave
+// the string less than or equal to the specified byte size.
+BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
+ const size_t byte_size,
+ std::string* output);
+
+// Trims any whitespace from either end of the input string. Returns where
+// whitespace was found.
+// The non-wide version has two functions:
+// * TrimWhitespaceASCII()
+// This function is for ASCII strings and only looks for ASCII whitespace;
+// Please choose the best one according to your usage.
+// NOTE: Safe to use the same variable for both input and output.
+enum TrimPositions {
+ TRIM_NONE = 0,
+ TRIM_LEADING = 1 << 0,
+ TRIM_TRAILING = 1 << 1,
+ TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
+};
+BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
+ TrimPositions positions,
+ string16* output);
+BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
+ TrimPositions positions,
+ std::string* output);
+
+// Deprecated. This function is only for backward compatibility and calls
+// TrimWhitespaceASCII().
+BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input,
+ TrimPositions positions,
+ std::string* output);
+
+// Searches for CR or LF characters. Removes all contiguous whitespace
+// strings that contain them. This is useful when trying to deal with text
+// copied from terminals.
+// Returns |text|, with the following three transformations:
+// (1) Leading and trailing whitespace is trimmed.
+// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
+// sequences containing a CR or LF are trimmed.
+// (3) All other whitespace sequences are converted to single spaces.
+BASE_EXPORT std::wstring CollapseWhitespace(
+ const std::wstring& text,
+ bool trim_sequences_with_line_breaks);
+BASE_EXPORT string16 CollapseWhitespace(
+ const string16& text,
+ bool trim_sequences_with_line_breaks);
+BASE_EXPORT std::string CollapseWhitespaceASCII(
+ const std::string& text,
+ bool trim_sequences_with_line_breaks);
+
+// Returns true if the passed string is empty or contains only white-space
+// characters.
+BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str);
+BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str);
+
+// Returns true if |input| is empty or contains only characters found in
+// |characters|.
+BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input,
+ const std::wstring& characters);
+BASE_EXPORT bool ContainsOnlyChars(const string16& input,
+ const string16& characters);
+BASE_EXPORT bool ContainsOnlyChars(const std::string& input,
+ const std::string& characters);
+
+// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
+// beforehand.
+BASE_EXPORT std::string WideToASCII(const std::wstring& wide);
+BASE_EXPORT std::string UTF16ToASCII(const string16& utf16);
+
+// Converts the given wide string to the corresponding Latin1. This will fail
+// (return false) if any characters are more than 255.
+BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1);
+
+// Returns true if the specified string matches the criteria. How can a wide
+// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
+// first case) or characters that use only 8-bits and whose 8-bit
+// representation looks like a UTF-8 string (the second case).
+//
+// Note that IsStringUTF8 checks not only if the input is structurally
+// valid but also if it doesn't contain any non-character codepoint
+// (e.g. U+FFFE). It's done on purpose because all the existing callers want
+// to have the maximum 'discriminating' power from other encodings. If
+// there's a use case for just checking the structural validity, we have to
+// add a new function for that.
+BASE_EXPORT bool IsStringUTF8(const std::string& str);
+BASE_EXPORT bool IsStringASCII(const std::wstring& str);
+BASE_EXPORT bool IsStringASCII(const base::StringPiece& str);
+BASE_EXPORT bool IsStringASCII(const string16& str);
+
+// Converts the elements of the given string. This version uses a pointer to
+// clearly differentiate it from the non-pointer variant.
+template <class str> inline void StringToLowerASCII(str* s) {
+ for (typename str::iterator i = s->begin(); i != s->end(); ++i)
+ *i = base::ToLowerASCII(*i);
+}
+
+template <class str> inline str StringToLowerASCII(const str& s) {
+ // for std::string and std::wstring
+ str output(s);
+ StringToLowerASCII(&output);
+ return output;
+}
+
+// Converts the elements of the given string. This version uses a pointer to
+// clearly differentiate it from the non-pointer variant.
+template <class str> inline void StringToUpperASCII(str* s) {
+ for (typename str::iterator i = s->begin(); i != s->end(); ++i)
+ *i = base::ToUpperASCII(*i);
+}
+
+template <class str> inline str StringToUpperASCII(const str& s) {
+ // for std::string and std::wstring
+ str output(s);
+ StringToUpperASCII(&output);
+ return output;
+}
+
+// Compare the lower-case form of the given string against the given ASCII
+// string. This is useful for doing checking if an input string matches some
+// token, and it is optimized to avoid intermediate string copies. This API is
+// borrowed from the equivalent APIs in Mozilla.
+BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b);
+
+// Same thing, but with string iterators instead.
+BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
+ std::string::const_iterator a_end,
+ const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
+ std::wstring::const_iterator a_end,
+ const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
+ string16::const_iterator a_end,
+ const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
+ const char* a_end,
+ const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin,
+ const wchar_t* a_end,
+ const char* b);
+BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
+ const char16* a_end,
+ const char* b);
+
+// Performs a case-sensitive string compare. The behavior is undefined if both
+// strings are not ASCII.
+BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b);
+
+// Returns true if str starts with search, or false otherwise.
+BASE_EXPORT bool StartsWithASCII(const std::string& str,
+ const std::string& search,
+ bool case_sensitive);
+BASE_EXPORT bool StartsWith(const std::wstring& str,
+ const std::wstring& search,
+ bool case_sensitive);
+BASE_EXPORT bool StartsWith(const string16& str,
+ const string16& search,
+ bool case_sensitive);
+
+// Returns true if str ends with search, or false otherwise.
+BASE_EXPORT bool EndsWith(const std::string& str,
+ const std::string& search,
+ bool case_sensitive);
+BASE_EXPORT bool EndsWith(const std::wstring& str,
+ const std::wstring& search,
+ bool case_sensitive);
+BASE_EXPORT bool EndsWith(const string16& str,
+ const string16& search,
+ bool case_sensitive);
+
+
+// Determines the type of ASCII character, independent of locale (the C
+// library versions will change based on locale).
+template <typename Char>
+inline bool IsAsciiWhitespace(Char c) {
+ return c == ' ' || c == '\r' || c == '\n' || c == '\t';
+}
+template <typename Char>
+inline bool IsAsciiAlpha(Char c) {
+ return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
+}
+template <typename Char>
+inline bool IsAsciiDigit(Char c) {
+ return c >= '0' && c <= '9';
+}
+
+template <typename Char>
+inline bool IsHexDigit(Char c) {
+ return (c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f');
+}
+
+template <typename Char>
+inline Char HexDigitToInt(Char c) {
+ DCHECK(IsHexDigit(c));
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ return 0;
+}
+
+// Returns true if it's a whitespace character.
+inline bool IsWhitespace(wchar_t c) {
+ return wcschr(kWhitespaceWide, c) != NULL;
+}
+
+// Return a byte string in human-readable format with a unit suffix. Not
+// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
+// highly recommended instead. TODO(avi): Figure out how to get callers to use
+// FormatBytes instead; remove this.
+BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
+
+// Starting at |start_offset| (usually 0), replace the first instance of
+// |find_this| with |replace_with|.
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
+ string16* str,
+ string16::size_type start_offset,
+ const string16& find_this,
+ const string16& replace_with);
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
+ std::string* str,
+ std::string::size_type start_offset,
+ const std::string& find_this,
+ const std::string& replace_with);
+
+// Starting at |start_offset| (usually 0), look through |str| and replace all
+// instances of |find_this| with |replace_with|.
+//
+// This does entire substrings; use std::replace in <algorithm> for single
+// characters, for example:
+// std::replace(str.begin(), str.end(), 'a', 'b');
+BASE_EXPORT void ReplaceSubstringsAfterOffset(
+ string16* str,
+ string16::size_type start_offset,
+ const string16& find_this,
+ const string16& replace_with);
+BASE_EXPORT void ReplaceSubstringsAfterOffset(
+ std::string* str,
+ std::string::size_type start_offset,
+ const std::string& find_this,
+ const std::string& replace_with);
+
+// Reserves enough memory in |str| to accommodate |length_with_null| characters,
+// sets the size of |str| to |length_with_null - 1| characters, and returns a
+// pointer to the underlying contiguous array of characters. This is typically
+// used when calling a function that writes results into a character array, but
+// the caller wants the data to be managed by a string-like object. It is
+// convenient in that is can be used inline in the call, and fast in that it
+// avoids copying the results of the call from a char* into a string.
+//
+// |length_with_null| must be at least 2, since otherwise the underlying string
+// would have size 0, and trying to access &((*str)[0]) in that case can result
+// in a number of problems.
+//
+// Internally, this takes linear time because the resize() call 0-fills the
+// underlying array for potentially all
+// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
+// could avoid this aspect of the resize() call, as we expect the caller to
+// immediately write over this memory, but there is no other way to set the size
+// of the string, and not doing that will mean people who access |str| rather
+// than str.c_str() will get back a string of whatever size |str| had on entry
+// to this function (probably 0).
+template <class string_type>
+inline typename string_type::value_type* WriteInto(string_type* str,
+ size_t length_with_null) {
+ DCHECK_GT(length_with_null, 1u);
+ str->reserve(length_with_null);
+ str->resize(length_with_null - 1);
+ return &((*str)[0]);
+}
+
+//-----------------------------------------------------------------------------
+
+// Splits a string into its fields delimited by any of the characters in
+// |delimiters|. Each field is added to the |tokens| vector. Returns the
+// number of tokens found.
+BASE_EXPORT size_t Tokenize(const std::wstring& str,
+ const std::wstring& delimiters,
+ std::vector<std::wstring>* tokens);
+BASE_EXPORT size_t Tokenize(const string16& str,
+ const string16& delimiters,
+ std::vector<string16>* tokens);
+BASE_EXPORT size_t Tokenize(const std::string& str,
+ const std::string& delimiters,
+ std::vector<std::string>* tokens);
+BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
+ const base::StringPiece& delimiters,
+ std::vector<base::StringPiece>* tokens);
+
+// Does the opposite of SplitString().
+BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s);
+BASE_EXPORT std::string JoinString(
+ const std::vector<std::string>& parts, char s);
+
+// Join |parts| using |separator|.
+BASE_EXPORT std::string JoinString(
+ const std::vector<std::string>& parts,
+ const std::string& separator);
+BASE_EXPORT string16 JoinString(
+ const std::vector<string16>& parts,
+ const string16& separator);
+
+// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
+// Additionally, any number of consecutive '$' characters is replaced by that
+// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
+// NULL. This only allows you to use up to nine replacements.
+BASE_EXPORT string16 ReplaceStringPlaceholders(
+ const string16& format_string,
+ const std::vector<string16>& subst,
+ std::vector<size_t>* offsets);
+
+BASE_EXPORT std::string ReplaceStringPlaceholders(
+ const base::StringPiece& format_string,
+ const std::vector<std::string>& subst,
+ std::vector<size_t>* offsets);
+
+// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
+BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
+ const string16& a,
+ size_t* offset);
+
+// Returns true if the string passed in matches the pattern. The pattern
+// string can contain wildcards like * and ?
+// The backslash character (\) is an escape character for * and ?
+// We limit the patterns to having a max of 16 * or ? characters.
+// ? matches 0 or 1 character, while * matches 0 or more characters.
+BASE_EXPORT bool MatchPattern(const base::StringPiece& string,
+ const base::StringPiece& pattern);
+BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern);
+
+// Hack to convert any char-like type to its unsigned counterpart.
+// For example, it will convert char, signed char and unsigned char to unsigned
+// char.
+template<typename T>
+struct ToUnsigned {
+ typedef T Unsigned;
+};
+
+template<>
+struct ToUnsigned<char> {
+ typedef unsigned char Unsigned;
+};
+template<>
+struct ToUnsigned<signed char> {
+ typedef unsigned char Unsigned;
+};
+template<>
+struct ToUnsigned<wchar_t> {
+#if defined(WCHAR_T_IS_UTF16)
+ typedef unsigned short Unsigned;
+#elif defined(WCHAR_T_IS_UTF32)
+ typedef uint32 Unsigned;
+#endif
+};
+template<>
+struct ToUnsigned<short> {
+ typedef unsigned short Unsigned;
+};
+
+#endif // BASE_STRINGS_STRING_UTIL_H_
diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc
new file mode 100644
index 0000000..d92e40c
--- /dev/null
+++ b/base/strings/string_util_constants.cc
@@ -0,0 +1,55 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_util.h"
+
+#define WHITESPACE_UNICODE \
+ 0x0009, /* <control-0009> to <control-000D> */ \
+ 0x000A, \
+ 0x000B, \
+ 0x000C, \
+ 0x000D, \
+ 0x0020, /* Space */ \
+ 0x0085, /* <control-0085> */ \
+ 0x00A0, /* No-Break Space */ \
+ 0x1680, /* Ogham Space Mark */ \
+ 0x180E, /* Mongolian Vowel Separator */ \
+ 0x2000, /* En Quad to Hair Space */ \
+ 0x2001, \
+ 0x2002, \
+ 0x2003, \
+ 0x2004, \
+ 0x2005, \
+ 0x2006, \
+ 0x2007, \
+ 0x2008, \
+ 0x2009, \
+ 0x200A, \
+ 0x200C, /* Zero Width Non-Joiner */ \
+ 0x2028, /* Line Separator */ \
+ 0x2029, /* Paragraph Separator */ \
+ 0x202F, /* Narrow No-Break Space */ \
+ 0x205F, /* Medium Mathematical Space */ \
+ 0x3000, /* Ideographic Space */ \
+ 0
+
+const wchar_t kWhitespaceWide[] = {
+ WHITESPACE_UNICODE
+};
+
+const char16 kWhitespaceUTF16[] = {
+ WHITESPACE_UNICODE
+};
+
+const char kWhitespaceASCII[] = {
+ 0x09, // <control-0009> to <control-000D>
+ 0x0A,
+ 0x0B,
+ 0x0C,
+ 0x0D,
+ 0x20, // Space
+ 0
+};
+
+const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
diff --git a/base/strings/string_util_posix.h b/base/strings/string_util_posix.h
new file mode 100644
index 0000000..34b14f1
--- /dev/null
+++ b/base/strings/string_util_posix.h
@@ -0,0 +1,53 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_
+#define BASE_STRINGS_STRING_UTIL_POSIX_H_
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "base/logging.h"
+#include "base/strings/string_util.h"
+
+namespace base {
+
+// Chromium code style is to not use malloc'd strings; this is only for use
+// for interaction with APIs that require it.
+inline char* strdup(const char* str) {
+ return ::strdup(str);
+}
+
+inline int strcasecmp(const char* string1, const char* string2) {
+ return ::strcasecmp(string1, string2);
+}
+
+inline int strncasecmp(const char* string1, const char* string2, size_t count) {
+ return ::strncasecmp(string1, string2, count);
+}
+
+inline int vsnprintf(char* buffer, size_t size,
+ const char* format, va_list arguments) {
+ return ::vsnprintf(buffer, size, format, arguments);
+}
+
+inline int strncmp16(const char16* s1, const char16* s2, size_t count) {
+#if defined(WCHAR_T_IS_UTF16)
+ return ::wcsncmp(s1, s2, count);
+#elif defined(WCHAR_T_IS_UTF32)
+ return c16memcmp(s1, s2, count);
+#endif
+}
+
+inline int vswprintf(wchar_t* buffer, size_t size,
+ const wchar_t* format, va_list arguments) {
+ DCHECK(IsWprintfFormatPortable(format));
+ return ::vswprintf(buffer, size, format, arguments);
+}
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
new file mode 100644
index 0000000..58b7620
--- /dev/null
+++ b/base/strings/string_util_unittest.cc
@@ -0,0 +1,1191 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_util.h"
+
+#include <math.h>
+#include <stdarg.h>
+
+#include <limits>
+#include <sstream>
+
+#include "base/basictypes.h"
+#include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace base {
+
+static const struct trim_case {
+ const wchar_t* input;
+ const TrimPositions positions;
+ const wchar_t* output;
+ const TrimPositions return_value;
+} trim_cases[] = {
+ {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
+ {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
+ {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
+ {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
+ {L"", TRIM_ALL, L"", TRIM_NONE},
+ {L" ", TRIM_LEADING, L"", TRIM_LEADING},
+ {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
+ {L" ", TRIM_ALL, L"", TRIM_ALL},
+ {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
+ {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
+};
+
+static const struct trim_case_ascii {
+ const char* input;
+ const TrimPositions positions;
+ const char* output;
+ const TrimPositions return_value;
+} trim_cases_ascii[] = {
+ {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
+ {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
+ {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
+ {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
+ {"", TRIM_ALL, "", TRIM_NONE},
+ {" ", TRIM_LEADING, "", TRIM_LEADING},
+ {" ", TRIM_TRAILING, "", TRIM_TRAILING},
+ {" ", TRIM_ALL, "", TRIM_ALL},
+ {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
+};
+
+namespace {
+
+// Helper used to test TruncateUTF8ToByteSize.
+bool Truncated(const std::string& input, const size_t byte_size,
+ std::string* output) {
+ size_t prev = input.length();
+ TruncateUTF8ToByteSize(input, byte_size, output);
+ return prev != output->length();
+}
+
+} // namespace
+
+TEST(StringUtilTest, TruncateUTF8ToByteSize) {
+ std::string output;
+
+ // Empty strings and invalid byte_size arguments
+ EXPECT_FALSE(Truncated(std::string(), 0, &output));
+ EXPECT_EQ(output, "");
+ EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
+ EXPECT_EQ(output, "");
+ EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
+ EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
+
+ // Testing the truncation of valid UTF8 correctly
+ EXPECT_TRUE(Truncated("abc", 2, &output));
+ EXPECT_EQ(output, "ab");
+ EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
+ EXPECT_EQ(output.compare("\xc2\x81"), 0);
+ EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
+ EXPECT_EQ(output.compare("\xc2\x81"), 0);
+ EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
+ EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
+
+ {
+ const char array[] = "\x00\x00\xc2\x81\xc2\x81";
+ const std::string array_string(array, arraysize(array));
+ EXPECT_TRUE(Truncated(array_string, 4, &output));
+ EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
+ }
+
+ {
+ const char array[] = "\x00\xc2\x81\xc2\x81";
+ const std::string array_string(array, arraysize(array));
+ EXPECT_TRUE(Truncated(array_string, 4, &output));
+ EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
+ }
+
+ // Testing invalid UTF8
+ EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ // Testing invalid UTF8 mixed with valid UTF8
+ EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
+ EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
+ EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
+ EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
+ EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
+ 10, &output));
+ EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
+ EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
+ 10, &output));
+ EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
+ EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
+ EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
+
+ // Overlong sequences
+ EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ // Beyond U+10FFFF (the upper limit of Unicode codespace)
+ EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
+ EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ {
+ const char array[] = "\x00\x00\xfe\xff";
+ const std::string array_string(array, arraysize(array));
+ EXPECT_TRUE(Truncated(array_string, 4, &output));
+ EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
+ }
+
+ // Variants on the previous test
+ {
+ const char array[] = "\xff\xfe\x00\x00";
+ const std::string array_string(array, 4);
+ EXPECT_FALSE(Truncated(array_string, 4, &output));
+ EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
+ }
+ {
+ const char array[] = "\xff\x00\x00\xfe";
+ const std::string array_string(array, arraysize(array));
+ EXPECT_TRUE(Truncated(array_string, 4, &output));
+ EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
+ }
+
+ // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
+ EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ // Strings in legacy encodings that are valid in UTF-8, but
+ // are invalid as UTF-8 in real data.
+ EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
+ EXPECT_EQ(output.compare("caf"), 0);
+ EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+ EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
+ EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
+ EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
+ &output));
+ EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
+
+ // Testing using the same string as input and output.
+ EXPECT_FALSE(Truncated(output, 4, &output));
+ EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
+ EXPECT_TRUE(Truncated(output, 3, &output));
+ EXPECT_EQ(output.compare("\xa7\x41"), 0);
+
+ // "abc" with U+201[CD] in windows-125[0-8]
+ EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
+ EXPECT_EQ(output.compare("\x93" "abc"), 0);
+
+ // U+0639 U+064E U+0644 U+064E in ISO-8859-6
+ EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+
+ // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
+ EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
+ EXPECT_EQ(output.compare(""), 0);
+}
+
+TEST(StringUtilTest, TrimWhitespace) {
+ string16 output; // Allow contents to carry over to next testcase
+ for (size_t i = 0; i < arraysize(trim_cases); ++i) {
+ const trim_case& value = trim_cases[i];
+ EXPECT_EQ(value.return_value,
+ TrimWhitespace(WideToUTF16(value.input), value.positions,
+ &output));
+ EXPECT_EQ(WideToUTF16(value.output), output);
+ }
+
+ // Test that TrimWhitespace() can take the same string for input and output
+ output = ASCIIToUTF16(" This is a test \r\n");
+ EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
+ EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
+
+ // Once more, but with a string of whitespace
+ output = ASCIIToUTF16(" \r\n");
+ EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
+ EXPECT_EQ(string16(), output);
+
+ std::string output_ascii;
+ for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
+ const trim_case_ascii& value = trim_cases_ascii[i];
+ EXPECT_EQ(value.return_value,
+ TrimWhitespace(value.input, value.positions, &output_ascii));
+ EXPECT_EQ(value.output, output_ascii);
+ }
+}
+
+static const struct collapse_case {
+ const wchar_t* input;
+ const bool trim;
+ const wchar_t* output;
+} collapse_cases[] = {
+ {L" Google Video ", false, L"Google Video"},
+ {L"Google Video", false, L"Google Video"},
+ {L"", false, L""},
+ {L" ", false, L""},
+ {L"\t\rTest String\n", false, L"Test String"},
+ {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
+ {L" Test \n \t String ", false, L"Test String"},
+ {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
+ {L" Test String", false, L"Test String"},
+ {L"Test String ", false, L"Test String"},
+ {L"Test String", false, L"Test String"},
+ {L"", true, L""},
+ {L"\n", true, L""},
+ {L" \r ", true, L""},
+ {L"\nFoo", true, L"Foo"},
+ {L"\r Foo ", true, L"Foo"},
+ {L" Foo bar ", true, L"Foo bar"},
+ {L" \tFoo bar \n", true, L"Foo bar"},
+ {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
+};
+
+TEST(StringUtilTest, CollapseWhitespace) {
+ for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
+ const collapse_case& value = collapse_cases[i];
+ EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
+ }
+}
+
+static const struct collapse_case_ascii {
+ const char* input;
+ const bool trim;
+ const char* output;
+} collapse_cases_ascii[] = {
+ {" Google Video ", false, "Google Video"},
+ {"Google Video", false, "Google Video"},
+ {"", false, ""},
+ {" ", false, ""},
+ {"\t\rTest String\n", false, "Test String"},
+ {" Test \n \t String ", false, "Test String"},
+ {" Test String", false, "Test String"},
+ {"Test String ", false, "Test String"},
+ {"Test String", false, "Test String"},
+ {"", true, ""},
+ {"\n", true, ""},
+ {" \r ", true, ""},
+ {"\nFoo", true, "Foo"},
+ {"\r Foo ", true, "Foo"},
+ {" Foo bar ", true, "Foo bar"},
+ {" \tFoo bar \n", true, "Foo bar"},
+ {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
+};
+
+TEST(StringUtilTest, CollapseWhitespaceASCII) {
+ for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
+ const collapse_case_ascii& value = collapse_cases_ascii[i];
+ EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
+ }
+}
+
+TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
+ EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
+ EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
+ EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
+ EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
+ EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
+ EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
+}
+
+TEST(StringUtilTest, ContainsOnlyWhitespace) {
+ EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
+ EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
+ EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
+ EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
+ EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
+ EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
+}
+
+TEST(StringUtilTest, IsStringUTF8) {
+ EXPECT_TRUE(IsStringUTF8("abc"));
+ EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
+ EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
+ EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
+ EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
+ EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
+
+ // surrogate code points
+ EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
+ EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
+ EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
+
+ // overlong sequences
+ EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
+ EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
+ EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
+ EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
+ EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
+ EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
+ EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
+ EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
+ EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
+ EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
+ EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
+
+ // Beyond U+10FFFF (the upper limit of Unicode codespace)
+ EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
+ EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
+ EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
+
+ // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
+ EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
+ EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
+ EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
+ EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
+
+ // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
+ EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
+ EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
+ EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
+ EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
+ EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
+ // Strings in legacy encodings. We can certainly make up strings
+ // in a legacy encoding that are valid in UTF-8, but in real data,
+ // most of them are invalid as UTF-8.
+ EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
+ EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
+ EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
+ // "abc" with U+201[CD] in windows-125[0-8]
+ EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
+ // U+0639 U+064E U+0644 U+064E in ISO-8859-6
+ EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
+ // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
+ EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
+
+ // Check that we support Embedded Nulls. The first uses the canonical UTF-8
+ // representation, and the second uses a 2-byte sequence. The second version
+ // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
+ // given codepoint must be used.
+ static const char kEmbeddedNull[] = "embedded\0null";
+ EXPECT_TRUE(IsStringUTF8(
+ std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
+ EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
+}
+
+TEST(StringUtilTest, ConvertASCII) {
+ static const char* char_cases[] = {
+ "Google Video",
+ "Hello, world\n",
+ "0123ABCDwxyz \a\b\t\r\n!+,.~"
+ };
+
+ static const wchar_t* const wchar_cases[] = {
+ L"Google Video",
+ L"Hello, world\n",
+ L"0123ABCDwxyz \a\b\t\r\n!+,.~"
+ };
+
+ for (size_t i = 0; i < arraysize(char_cases); ++i) {
+ EXPECT_TRUE(IsStringASCII(char_cases[i]));
+ std::wstring wide = ASCIIToWide(char_cases[i]);
+ EXPECT_EQ(wchar_cases[i], wide);
+
+ EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
+ std::string ascii = WideToASCII(wchar_cases[i]);
+ EXPECT_EQ(char_cases[i], ascii);
+ }
+
+ EXPECT_FALSE(IsStringASCII("Google \x80Video"));
+ EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
+
+ // Convert empty strings.
+ std::wstring wempty;
+ std::string empty;
+ EXPECT_EQ(empty, WideToASCII(wempty));
+ EXPECT_EQ(wempty, ASCIIToWide(empty));
+
+ // Convert strings with an embedded NUL character.
+ const char chars_with_nul[] = "test\0string";
+ const int length_with_nul = arraysize(chars_with_nul) - 1;
+ std::string string_with_nul(chars_with_nul, length_with_nul);
+ std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
+ EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
+ wide_with_nul.length());
+ std::string narrow_with_nul = WideToASCII(wide_with_nul);
+ EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
+ narrow_with_nul.length());
+ EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
+}
+
+TEST(StringUtilTest, ToUpperASCII) {
+ EXPECT_EQ('C', ToUpperASCII('C'));
+ EXPECT_EQ('C', ToUpperASCII('c'));
+ EXPECT_EQ('2', ToUpperASCII('2'));
+
+ EXPECT_EQ(L'C', ToUpperASCII(L'C'));
+ EXPECT_EQ(L'C', ToUpperASCII(L'c'));
+ EXPECT_EQ(L'2', ToUpperASCII(L'2'));
+
+ std::string in_place_a("Cc2");
+ StringToUpperASCII(&in_place_a);
+ EXPECT_EQ("CC2", in_place_a);
+
+ std::wstring in_place_w(L"Cc2");
+ StringToUpperASCII(&in_place_w);
+ EXPECT_EQ(L"CC2", in_place_w);
+
+ std::string original_a("Cc2");
+ std::string upper_a = StringToUpperASCII(original_a);
+ EXPECT_EQ("CC2", upper_a);
+
+ std::wstring original_w(L"Cc2");
+ std::wstring upper_w = StringToUpperASCII(original_w);
+ EXPECT_EQ(L"CC2", upper_w);
+}
+
+TEST(StringUtilTest, LowerCaseEqualsASCII) {
+ static const struct {
+ const wchar_t* src_w;
+ const char* src_a;
+ const char* dst;
+ } lowercase_cases[] = {
+ { L"FoO", "FoO", "foo" },
+ { L"foo", "foo", "foo" },
+ { L"FOO", "FOO", "foo" },
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
+ EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
+ lowercase_cases[i].dst));
+ EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
+ lowercase_cases[i].dst));
+ }
+}
+
+TEST(StringUtilTest, FormatBytesUnlocalized) {
+ static const struct {
+ int64 bytes;
+ const char* expected;
+ } cases[] = {
+ // Expected behavior: we show one post-decimal digit when we have
+ // under two pre-decimal digits, except in cases where it makes no
+ // sense (zero or bytes).
+ // Since we switch units once we cross the 1000 mark, this keeps
+ // the display of file sizes or bytes consistently around three
+ // digits.
+ {0, "0 B"},
+ {512, "512 B"},
+ {1024*1024, "1.0 MB"},
+ {1024*1024*1024, "1.0 GB"},
+ {10LL*1024*1024*1024, "10.0 GB"},
+ {99LL*1024*1024*1024, "99.0 GB"},
+ {105LL*1024*1024*1024, "105 GB"},
+ {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
+ {~(1LL<<63), "8192 PB"},
+
+ {99*1024 + 103, "99.1 kB"},
+ {1024*1024 + 103, "1.0 MB"},
+ {1024*1024 + 205 * 1024, "1.2 MB"},
+ {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
+ {10LL*1024*1024*1024, "10.0 GB"},
+ {100LL*1024*1024*1024, "100 GB"},
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
+ EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
+ FormatBytesUnlocalized(cases[i].bytes));
+ }
+}
+TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
+ static const struct {
+ const char* str;
+ string16::size_type start_offset;
+ const char* find_this;
+ const char* replace_with;
+ const char* expected;
+ } cases[] = {
+ {"aaa", 0, "a", "b", "bbb"},
+ {"abb", 0, "ab", "a", "ab"},
+ {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
+ {"Not found", 0, "x", "0", "Not found"},
+ {"Not found again", 5, "x", "0", "Not found again"},
+ {" Making it much longer ", 0, " ", "Four score and seven years ago",
+ "Four score and seven years agoMakingFour score and seven years agoit"
+ "Four score and seven years agomuchFour score and seven years agolonger"
+ "Four score and seven years ago"},
+ {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
+ {"Replace me only me once", 9, "me ", "", "Replace me only once"},
+ {"abababab", 2, "ab", "c", "abccc"},
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
+ string16 str = ASCIIToUTF16(cases[i].str);
+ ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
+ ASCIIToUTF16(cases[i].find_this),
+ ASCIIToUTF16(cases[i].replace_with));
+ EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
+ }
+}
+
+TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
+ static const struct {
+ const char* str;
+ string16::size_type start_offset;
+ const char* find_this;
+ const char* replace_with;
+ const char* expected;
+ } cases[] = {
+ {"aaa", 0, "a", "b", "baa"},
+ {"abb", 0, "ab", "a", "ab"},
+ {"Removing some substrings inging", 0, "ing", "",
+ "Remov some substrings inging"},
+ {"Not found", 0, "x", "0", "Not found"},
+ {"Not found again", 5, "x", "0", "Not found again"},
+ {" Making it much longer ", 0, " ", "Four score and seven years ago",
+ "Four score and seven years agoMaking it much longer "},
+ {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
+ {"Replace me only me once", 4, "me ", "", "Replace only me once"},
+ {"abababab", 2, "ab", "c", "abcabab"},
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
+ string16 str = ASCIIToUTF16(cases[i].str);
+ ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
+ ASCIIToUTF16(cases[i].find_this),
+ ASCIIToUTF16(cases[i].replace_with));
+ EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
+ }
+}
+
+TEST(StringUtilTest, HexDigitToInt) {
+ EXPECT_EQ(0, HexDigitToInt('0'));
+ EXPECT_EQ(1, HexDigitToInt('1'));
+ EXPECT_EQ(2, HexDigitToInt('2'));
+ EXPECT_EQ(3, HexDigitToInt('3'));
+ EXPECT_EQ(4, HexDigitToInt('4'));
+ EXPECT_EQ(5, HexDigitToInt('5'));
+ EXPECT_EQ(6, HexDigitToInt('6'));
+ EXPECT_EQ(7, HexDigitToInt('7'));
+ EXPECT_EQ(8, HexDigitToInt('8'));
+ EXPECT_EQ(9, HexDigitToInt('9'));
+ EXPECT_EQ(10, HexDigitToInt('A'));
+ EXPECT_EQ(11, HexDigitToInt('B'));
+ EXPECT_EQ(12, HexDigitToInt('C'));
+ EXPECT_EQ(13, HexDigitToInt('D'));
+ EXPECT_EQ(14, HexDigitToInt('E'));
+ EXPECT_EQ(15, HexDigitToInt('F'));
+
+ // Verify the lower case as well.
+ EXPECT_EQ(10, HexDigitToInt('a'));
+ EXPECT_EQ(11, HexDigitToInt('b'));
+ EXPECT_EQ(12, HexDigitToInt('c'));
+ EXPECT_EQ(13, HexDigitToInt('d'));
+ EXPECT_EQ(14, HexDigitToInt('e'));
+ EXPECT_EQ(15, HexDigitToInt('f'));
+}
+
+// This checks where we can use the assignment operator for a va_list. We need
+// a way to do this since Visual C doesn't support va_copy, but assignment on
+// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
+// capability.
+static void VariableArgsFunc(const char* format, ...) {
+ va_list org;
+ va_start(org, format);
+
+ va_list dup;
+ GG_VA_COPY(dup, org);
+ int i1 = va_arg(org, int);
+ int j1 = va_arg(org, int);
+ char* s1 = va_arg(org, char*);
+ double d1 = va_arg(org, double);
+ va_end(org);
+
+ int i2 = va_arg(dup, int);
+ int j2 = va_arg(dup, int);
+ char* s2 = va_arg(dup, char*);
+ double d2 = va_arg(dup, double);
+
+ EXPECT_EQ(i1, i2);
+ EXPECT_EQ(j1, j2);
+ EXPECT_STREQ(s1, s2);
+ EXPECT_EQ(d1, d2);
+
+ va_end(dup);
+}
+
+TEST(StringUtilTest, VAList) {
+ VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
+}
+
+// Test for Tokenize
+template <typename STR>
+void TokenizeTest() {
+ std::vector<STR> r;
+ size_t size;
+
+ size = Tokenize(STR("This is a string"), STR(" "), &r);
+ EXPECT_EQ(4U, size);
+ ASSERT_EQ(4U, r.size());
+ EXPECT_EQ(r[0], STR("This"));
+ EXPECT_EQ(r[1], STR("is"));
+ EXPECT_EQ(r[2], STR("a"));
+ EXPECT_EQ(r[3], STR("string"));
+ r.clear();
+
+ size = Tokenize(STR("one,two,three"), STR(","), &r);
+ EXPECT_EQ(3U, size);
+ ASSERT_EQ(3U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR("two"));
+ EXPECT_EQ(r[2], STR("three"));
+ r.clear();
+
+ size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
+ EXPECT_EQ(3U, size);
+ ASSERT_EQ(3U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR("two"));
+ EXPECT_EQ(r[2], STR("three;four"));
+ r.clear();
+
+ size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
+ EXPECT_EQ(4U, size);
+ ASSERT_EQ(4U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR("two"));
+ EXPECT_EQ(r[2], STR("three"));
+ EXPECT_EQ(r[3], STR("four"));
+ r.clear();
+
+ size = Tokenize(STR("one, two, three"), STR(","), &r);
+ EXPECT_EQ(3U, size);
+ ASSERT_EQ(3U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR(" two"));
+ EXPECT_EQ(r[2], STR(" three"));
+ r.clear();
+
+ size = Tokenize(STR("one, two, three, "), STR(","), &r);
+ EXPECT_EQ(4U, size);
+ ASSERT_EQ(4U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR(" two"));
+ EXPECT_EQ(r[2], STR(" three"));
+ EXPECT_EQ(r[3], STR(" "));
+ r.clear();
+
+ size = Tokenize(STR("one, two, three,"), STR(","), &r);
+ EXPECT_EQ(3U, size);
+ ASSERT_EQ(3U, r.size());
+ EXPECT_EQ(r[0], STR("one"));
+ EXPECT_EQ(r[1], STR(" two"));
+ EXPECT_EQ(r[2], STR(" three"));
+ r.clear();
+
+ size = Tokenize(STR(), STR(","), &r);
+ EXPECT_EQ(0U, size);
+ ASSERT_EQ(0U, r.size());
+ r.clear();
+
+ size = Tokenize(STR(","), STR(","), &r);
+ EXPECT_EQ(0U, size);
+ ASSERT_EQ(0U, r.size());
+ r.clear();
+
+ size = Tokenize(STR(",;:."), STR(".:;,"), &r);
+ EXPECT_EQ(0U, size);
+ ASSERT_EQ(0U, r.size());
+ r.clear();
+
+ size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
+ EXPECT_EQ(1U, size);
+ ASSERT_EQ(1U, r.size());
+ EXPECT_EQ(r[0], STR("a"));
+ r.clear();
+
+ size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
+ EXPECT_EQ(2U, size);
+ ASSERT_EQ(2U, r.size());
+ EXPECT_EQ(r[0], STR("\ta\t"));
+ EXPECT_EQ(r[1], STR("b\tcc"));
+ r.clear();
+}
+
+TEST(StringUtilTest, TokenizeStdString) {
+ TokenizeTest<std::string>();
+}
+
+TEST(StringUtilTest, TokenizeStringPiece) {
+ TokenizeTest<base::StringPiece>();
+}
+
+// Test for JoinString
+TEST(StringUtilTest, JoinString) {
+ std::vector<std::string> in;
+ EXPECT_EQ("", JoinString(in, ','));
+
+ in.push_back("a");
+ EXPECT_EQ("a", JoinString(in, ','));
+
+ in.push_back("b");
+ in.push_back("c");
+ EXPECT_EQ("a,b,c", JoinString(in, ','));
+
+ in.push_back(std::string());
+ EXPECT_EQ("a,b,c,", JoinString(in, ','));
+ in.push_back(" ");
+ EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
+}
+
+// Test for JoinString overloaded with std::string separator
+TEST(StringUtilTest, JoinStringWithString) {
+ std::string separator(", ");
+ std::vector<std::string> parts;
+ EXPECT_EQ(std::string(), JoinString(parts, separator));
+
+ parts.push_back("a");
+ EXPECT_EQ("a", JoinString(parts, separator));
+
+ parts.push_back("b");
+ parts.push_back("c");
+ EXPECT_EQ("a, b, c", JoinString(parts, separator));
+
+ parts.push_back(std::string());
+ EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
+ parts.push_back(" ");
+ EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
+}
+
+// Test for JoinString overloaded with string16 separator
+TEST(StringUtilTest, JoinStringWithString16) {
+ string16 separator = ASCIIToUTF16(", ");
+ std::vector<string16> parts;
+ EXPECT_EQ(string16(), JoinString(parts, separator));
+
+ parts.push_back(ASCIIToUTF16("a"));
+ EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
+
+ parts.push_back(ASCIIToUTF16("b"));
+ parts.push_back(ASCIIToUTF16("c"));
+ EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
+
+ parts.push_back(ASCIIToUTF16(""));
+ EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
+ parts.push_back(ASCIIToUTF16(" "));
+ EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
+}
+
+TEST(StringUtilTest, StartsWith) {
+ EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
+ EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
+ EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
+ EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
+ EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
+ EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
+ EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
+ EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
+ EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
+ EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
+
+ EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
+ EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
+ EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
+ EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
+ EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
+ EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
+ EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));
+ EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));
+ EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));
+ EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));
+}
+
+TEST(StringUtilTest, EndsWith) {
+ EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
+ EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
+ EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
+ EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
+ EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
+ EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
+ EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
+ EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
+ EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));
+ EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));
+ EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));
+ EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));
+ EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
+ EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
+ EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
+ EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
+}
+
+TEST(StringUtilTest, GetStringFWithOffsets) {
+ std::vector<string16> subst;
+ subst.push_back(ASCIIToUTF16("1"));
+ subst.push_back(ASCIIToUTF16("2"));
+ std::vector<size_t> offsets;
+
+ ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
+ subst,
+ &offsets);
+ EXPECT_EQ(2U, offsets.size());
+ EXPECT_EQ(7U, offsets[0]);
+ EXPECT_EQ(25U, offsets[1]);
+ offsets.clear();
+
+ ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
+ subst,
+ &offsets);
+ EXPECT_EQ(2U, offsets.size());
+ EXPECT_EQ(25U, offsets[0]);
+ EXPECT_EQ(7U, offsets[1]);
+ offsets.clear();
+}
+
+TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
+ // Test whether replacestringplaceholders works as expected when there
+ // are fewer inputs than outputs.
+ std::vector<string16> subst;
+ subst.push_back(ASCIIToUTF16("9a"));
+ subst.push_back(ASCIIToUTF16("8b"));
+ subst.push_back(ASCIIToUTF16("7c"));
+
+ string16 formatted =
+ ReplaceStringPlaceholders(
+ ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
+
+ EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
+}
+
+TEST(StringUtilTest, ReplaceStringPlaceholders) {
+ std::vector<string16> subst;
+ subst.push_back(ASCIIToUTF16("9a"));
+ subst.push_back(ASCIIToUTF16("8b"));
+ subst.push_back(ASCIIToUTF16("7c"));
+ subst.push_back(ASCIIToUTF16("6d"));
+ subst.push_back(ASCIIToUTF16("5e"));
+ subst.push_back(ASCIIToUTF16("4f"));
+ subst.push_back(ASCIIToUTF16("3g"));
+ subst.push_back(ASCIIToUTF16("2h"));
+ subst.push_back(ASCIIToUTF16("1i"));
+
+ string16 formatted =
+ ReplaceStringPlaceholders(
+ ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
+
+ EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
+}
+
+TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
+ std::vector<string16> subst;
+ subst.push_back(ASCIIToUTF16("9a"));
+ subst.push_back(ASCIIToUTF16("8b"));
+ subst.push_back(ASCIIToUTF16("7c"));
+ subst.push_back(ASCIIToUTF16("6d"));
+ subst.push_back(ASCIIToUTF16("5e"));
+ subst.push_back(ASCIIToUTF16("4f"));
+ subst.push_back(ASCIIToUTF16("3g"));
+ subst.push_back(ASCIIToUTF16("2h"));
+ subst.push_back(ASCIIToUTF16("1i"));
+ subst.push_back(ASCIIToUTF16("0j"));
+ subst.push_back(ASCIIToUTF16("-1k"));
+ subst.push_back(ASCIIToUTF16("-2l"));
+ subst.push_back(ASCIIToUTF16("-3m"));
+ subst.push_back(ASCIIToUTF16("-4n"));
+
+ string16 formatted =
+ ReplaceStringPlaceholders(
+ ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
+ "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
+
+ EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
+ "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
+}
+
+TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
+ std::vector<std::string> subst;
+ subst.push_back("9a");
+ subst.push_back("8b");
+ subst.push_back("7c");
+ subst.push_back("6d");
+ subst.push_back("5e");
+ subst.push_back("4f");
+ subst.push_back("3g");
+ subst.push_back("2h");
+ subst.push_back("1i");
+
+ std::string formatted =
+ ReplaceStringPlaceholders(
+ "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
+
+ EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
+}
+
+TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
+ std::vector<std::string> subst;
+ subst.push_back("a");
+ subst.push_back("b");
+ subst.push_back("c");
+ EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
+ "$1 $$2 $$$3");
+}
+
+TEST(StringUtilTest, MatchPatternTest) {
+ EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
+ EXPECT_TRUE(MatchPattern("www.google.com", "*"));
+ EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
+ EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
+ EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
+ EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
+ EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
+ EXPECT_FALSE(MatchPattern("", "*.*"));
+ EXPECT_TRUE(MatchPattern("", "*"));
+ EXPECT_TRUE(MatchPattern("", "?"));
+ EXPECT_TRUE(MatchPattern("", ""));
+ EXPECT_FALSE(MatchPattern("Hello", ""));
+ EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
+ // Stop after a certain recursion depth.
+ EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
+
+ // Test UTF8 matching.
+ EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
+ EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
+ EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
+ // Invalid sequences should be handled as a single invalid character.
+ EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
+ // If the pattern has invalid characters, it shouldn't match anything.
+ EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
+
+ // Test UTF16 character matching.
+ EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
+ UTF8ToUTF16("*.com")));
+ EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
+ UTF8ToUTF16("He??o\\*1*")));
+
+ // This test verifies that consecutive wild cards are collapsed into 1
+ // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
+ // recursion depth).
+ EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
+ UTF8ToUTF16("He********************************o")));
+}
+
+TEST(StringUtilTest, LcpyTest) {
+ // Test the normal case where we fit in our buffer.
+ {
+ char dst[10];
+ wchar_t wdst[10];
+ EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
+ EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
+ EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
+ EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
+ }
+
+ // Test dst_size == 0, nothing should be written to |dst| and we should
+ // have the equivalent of strlen(src).
+ {
+ char dst[2] = {1, 2};
+ wchar_t wdst[2] = {1, 2};
+ EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
+ EXPECT_EQ(1, dst[0]);
+ EXPECT_EQ(2, dst[1]);
+ EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
+#if defined(WCHAR_T_IS_UNSIGNED)
+ EXPECT_EQ(1U, wdst[0]);
+ EXPECT_EQ(2U, wdst[1]);
+#else
+ EXPECT_EQ(1, wdst[0]);
+ EXPECT_EQ(2, wdst[1]);
+#endif
+ }
+
+ // Test the case were we _just_ competely fit including the null.
+ {
+ char dst[8];
+ wchar_t wdst[8];
+ EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
+ EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
+ EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
+ EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
+ }
+
+ // Test the case were we we are one smaller, so we can't fit the null.
+ {
+ char dst[7];
+ wchar_t wdst[7];
+ EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
+ EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
+ EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
+ EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
+ }
+
+ // Test the case were we are just too small.
+ {
+ char dst[3];
+ wchar_t wdst[3];
+ EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
+ EXPECT_EQ(0, memcmp(dst, "ab", 3));
+ EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
+ EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
+ }
+}
+
+TEST(StringUtilTest, WprintfFormatPortabilityTest) {
+ static const struct {
+ const wchar_t* input;
+ bool portable;
+ } cases[] = {
+ { L"%ls", true },
+ { L"%s", false },
+ { L"%S", false },
+ { L"%lS", false },
+ { L"Hello, %s", false },
+ { L"%lc", true },
+ { L"%c", false },
+ { L"%C", false },
+ { L"%lC", false },
+ { L"%ls %s", false },
+ { L"%s %ls", false },
+ { L"%s %ls %s", false },
+ { L"%f", true },
+ { L"%f %F", false },
+ { L"%d %D", false },
+ { L"%o %O", false },
+ { L"%u %U", false },
+ { L"%f %d %o %u", true },
+ { L"%-8d (%02.1f%)", true },
+ { L"% 10s", false },
+ { L"% 10ls", true }
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
+ EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
+}
+
+TEST(StringUtilTest, RemoveChars) {
+ const char* kRemoveChars = "-/+*";
+ std::string input = "A-+bc/d!*";
+ EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
+ EXPECT_EQ("Abcd!", input);
+
+ // No characters match kRemoveChars.
+ EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
+ EXPECT_EQ("Abcd!", input);
+
+ // Empty string.
+ input.clear();
+ EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
+ EXPECT_EQ(std::string(), input);
+}
+
+TEST(StringUtilTest, ReplaceChars) {
+ struct TestData {
+ const char* input;
+ const char* replace_chars;
+ const char* replace_with;
+ const char* output;
+ bool result;
+ } cases[] = {
+ { "", "", "", "", false },
+ { "test", "", "", "test", false },
+ { "test", "", "!", "test", false },
+ { "test", "z", "!", "test", false },
+ { "test", "e", "!", "t!st", true },
+ { "test", "e", "!?", "t!?st", true },
+ { "test", "ez", "!", "t!st", true },
+ { "test", "zed", "!?", "t!?st", true },
+ { "test", "t", "!?", "!?es!?", true },
+ { "test", "et", "!>", "!>!>s!>", true },
+ { "test", "zest", "!", "!!!!", true },
+ { "test", "szt", "!", "!e!!", true },
+ { "test", "t", "test", "testestest", true },
+ };
+
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
+ std::string output;
+ bool result = ReplaceChars(cases[i].input,
+ cases[i].replace_chars,
+ cases[i].replace_with,
+ &output);
+ EXPECT_EQ(cases[i].result, result);
+ EXPECT_EQ(cases[i].output, output);
+ }
+}
+
+TEST(StringUtilTest, ContainsOnlyChars) {
+ // Providing an empty list of characters should return false but for the empty
+ // string.
+ EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
+ EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
+
+ EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
+ EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
+ EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
+ EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
+ EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
+}
+
+class WriteIntoTest : public testing::Test {
+ protected:
+ static void WritesCorrectly(size_t num_chars) {
+ std::string buffer;
+ char kOriginal[] = "supercali";
+ strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
+ // Using std::string(buffer.c_str()) instead of |buffer| truncates the
+ // string at the first \0.
+ EXPECT_EQ(std::string(kOriginal,
+ std::min(num_chars, arraysize(kOriginal) - 1)),
+ std::string(buffer.c_str()));
+ EXPECT_EQ(num_chars, buffer.size());
+ }
+};
+
+TEST_F(WriteIntoTest, WriteInto) {
+ // Validate that WriteInto reserves enough space and
+ // sizes a string correctly.
+ WritesCorrectly(1);
+ WritesCorrectly(2);
+ WritesCorrectly(5000);
+
+ // Validate that WriteInto doesn't modify other strings
+ // when using a Copy-on-Write implementation.
+ const char kLive[] = "live";
+ const char kDead[] = "dead";
+ const std::string live = kLive;
+ std::string dead = live;
+ strncpy(WriteInto(&dead, 5), kDead, 4);
+ EXPECT_EQ(kDead, dead);
+ EXPECT_EQ(4u, dead.size());
+ EXPECT_EQ(kLive, live);
+ EXPECT_EQ(4u, live.size());
+}
+
+} // namespace base
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h
new file mode 100644
index 0000000..602ba27
--- /dev/null
+++ b/base/strings/string_util_win.h
@@ -0,0 +1,61 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_
+#define BASE_STRINGS_STRING_UTIL_WIN_H_
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+
+#include "base/logging.h"
+
+namespace base {
+
+// Chromium code style is to not use malloc'd strings; this is only for use
+// for interaction with APIs that require it.
+inline char* strdup(const char* str) {
+ return _strdup(str);
+}
+
+inline int strcasecmp(const char* s1, const char* s2) {
+ return _stricmp(s1, s2);
+}
+
+inline int strncasecmp(const char* s1, const char* s2, size_t count) {
+ return _strnicmp(s1, s2, count);
+}
+
+inline int strncmp16(const char16* s1, const char16* s2, size_t count) {
+ return ::wcsncmp(s1, s2, count);
+}
+
+inline int vsnprintf(char* buffer, size_t size,
+ const char* format, va_list arguments) {
+ int length = _vsprintf_p(buffer, size, format, arguments);
+ if (length < 0) {
+ if (size > 0)
+ buffer[0] = 0;
+ return _vscprintf_p(format, arguments);
+ }
+ return length;
+}
+
+inline int vswprintf(wchar_t* buffer, size_t size,
+ const wchar_t* format, va_list arguments) {
+ DCHECK(IsWprintfFormatPortable(format));
+
+ int length = _vswprintf_p(buffer, size, format, arguments);
+ if (length < 0) {
+ if (size > 0)
+ buffer[0] = 0;
+ return _vscwprintf_p(format, arguments);
+ }
+ return length;
+}
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRING_UTIL_WIN_H_
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
new file mode 100644
index 0000000..fe23daa
--- /dev/null
+++ b/base/strings/stringprintf.cc
@@ -0,0 +1,186 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/stringprintf.h"
+
+#include <errno.h>
+
+#include "base/scoped_clear_errno.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+
+namespace base {
+
+namespace {
+
+// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
+// is the size of the buffer. These return the number of characters in the
+// formatted string excluding the NUL terminator. If the buffer is not
+// large enough to accommodate the formatted string without truncation, they
+// return the number of characters that would be in the fully-formatted string
+// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
+inline int vsnprintfT(char* buffer,
+ size_t buf_size,
+ const char* format,
+ va_list argptr) {
+ return base::vsnprintf(buffer, buf_size, format, argptr);
+}
+
+#if !defined(OS_ANDROID)
+inline int vsnprintfT(wchar_t* buffer,
+ size_t buf_size,
+ const wchar_t* format,
+ va_list argptr) {
+ return base::vswprintf(buffer, buf_size, format, argptr);
+}
+#endif
+
+// Templatized backend for StringPrintF/StringAppendF. This does not finalize
+// the va_list, the caller is expected to do that.
+template <class StringType>
+static void StringAppendVT(StringType* dst,
+ const typename StringType::value_type* format,
+ va_list ap) {
+ // First try with a small fixed size buffer.
+ // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
+ // and StringUtilTest.StringPrintfBounds.
+ typename StringType::value_type stack_buf[1024];
+
+ va_list ap_copy;
+ GG_VA_COPY(ap_copy, ap);
+
+#if !defined(OS_WIN)
+ ScopedClearErrno clear_errno;
+#endif
+ int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy);
+ va_end(ap_copy);
+
+ if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
+ // It fit.
+ dst->append(stack_buf, result);
+ return;
+ }
+
+ // Repeatedly increase buffer size until it fits.
+ int mem_length = arraysize(stack_buf);
+ while (true) {
+ if (result < 0) {
+#if !defined(OS_WIN)
+ // On Windows, vsnprintfT always returns the number of characters in a
+ // fully-formatted string, so if we reach this point, something else is
+ // wrong and no amount of buffer-doubling is going to fix it.
+ if (errno != 0 && errno != EOVERFLOW)
+#endif
+ {
+ // If an error other than overflow occurred, it's never going to work.
+ DLOG(WARNING) << "Unable to printf the requested string due to error.";
+ return;
+ }
+ // Try doubling the buffer size.
+ mem_length *= 2;
+ } else {
+ // We need exactly "result + 1" characters.
+ mem_length = result + 1;
+ }
+
+ if (mem_length > 32 * 1024 * 1024) {
+ // That should be plenty, don't try anything larger. This protects
+ // against huge allocations when using vsnprintfT implementations that
+ // return -1 for reasons other than overflow without setting errno.
+ DLOG(WARNING) << "Unable to printf the requested string due to size.";
+ return;
+ }
+
+ std::vector<typename StringType::value_type> mem_buf(mem_length);
+
+ // NOTE: You can only use a va_list once. Since we're in a while loop, we
+ // need to make a new copy each time so we don't use up the original.
+ GG_VA_COPY(ap_copy, ap);
+ result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
+ va_end(ap_copy);
+
+ if ((result >= 0) && (result < mem_length)) {
+ // It fit.
+ dst->append(&mem_buf[0], result);
+ return;
+ }
+ }
+}
+
+} // namespace
+
+std::string StringPrintf(const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ std::string result;
+ StringAppendV(&result, format, ap);
+ va_end(ap);
+ return result;
+}
+
+#if !defined(OS_ANDROID)
+std::wstring StringPrintf(const wchar_t* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ std::wstring result;
+ StringAppendV(&result, format, ap);
+ va_end(ap);
+ return result;
+}
+#endif
+
+std::string StringPrintV(const char* format, va_list ap) {
+ std::string result;
+ StringAppendV(&result, format, ap);
+ return result;
+}
+
+const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ dst->clear();
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+ return *dst;
+}
+
+#if !defined(OS_ANDROID)
+const std::wstring& SStringPrintf(std::wstring* dst,
+ const wchar_t* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ dst->clear();
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+ return *dst;
+}
+#endif
+
+void StringAppendF(std::string* dst, const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+}
+
+#if !defined(OS_ANDROID)
+void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+}
+#endif
+
+void StringAppendV(std::string* dst, const char* format, va_list ap) {
+ StringAppendVT(dst, format, ap);
+}
+
+#if !defined(OS_ANDROID)
+void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
+ StringAppendVT(dst, format, ap);
+}
+#endif
+
+} // namespace base
diff --git a/base/strings/stringprintf.h b/base/strings/stringprintf.h
new file mode 100644
index 0000000..3c0e399
--- /dev/null
+++ b/base/strings/stringprintf.h
@@ -0,0 +1,62 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_STRINGPRINTF_H_
+#define BASE_STRINGS_STRINGPRINTF_H_
+
+#include <stdarg.h> // va_list
+
+#include <string>
+
+#include "base/base_export.h"
+#include "base/compiler_specific.h"
+
+namespace base {
+
+// Return a C++ string given printf-like input.
+BASE_EXPORT std::string StringPrintf(const char* format, ...)
+ PRINTF_FORMAT(1, 2);
+// OS_ANDROID's libc does not support wchar_t, so several overloads are omitted.
+#if !defined(OS_ANDROID)
+BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...)
+ WPRINTF_FORMAT(1, 2);
+#endif
+
+// Return a C++ string given vprintf-like input.
+BASE_EXPORT std::string StringPrintV(const char* format, va_list ap)
+ PRINTF_FORMAT(1, 0);
+
+// Store result into a supplied string and return it.
+BASE_EXPORT const std::string& SStringPrintf(std::string* dst,
+ const char* format, ...)
+ PRINTF_FORMAT(2, 3);
+#if !defined(OS_ANDROID)
+BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst,
+ const wchar_t* format, ...)
+ WPRINTF_FORMAT(2, 3);
+#endif
+
+// Append result to a supplied string.
+BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...)
+ PRINTF_FORMAT(2, 3);
+#if !defined(OS_ANDROID)
+// TODO(evanm): this is only used in a few places in the code;
+// replace with string16 version.
+BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...)
+ WPRINTF_FORMAT(2, 3);
+#endif
+
+// Lower-level routine that takes a va_list and appends to a specified
+// string. All other routines are just convenience wrappers around it.
+BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap)
+ PRINTF_FORMAT(2, 0);
+#if !defined(OS_ANDROID)
+BASE_EXPORT void StringAppendV(std::wstring* dst,
+ const wchar_t* format, va_list ap)
+ WPRINTF_FORMAT(2, 0);
+#endif
+
+} // namespace base
+
+#endif // BASE_STRINGS_STRINGPRINTF_H_
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc
new file mode 100644
index 0000000..a1bf2da
--- /dev/null
+++ b/base/strings/stringprintf_unittest.cc
@@ -0,0 +1,188 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/stringprintf.h"
+
+#include <errno.h>
+
+#include "base/basictypes.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace base {
+
+namespace {
+
+// A helper for the StringAppendV test that follows.
+//
+// Just forwards its args to StringAppendV.
+static void StringAppendVTestHelper(std::string* out, const char* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ StringAppendV(out, format, ap);
+ va_end(ap);
+}
+
+} // namespace
+
+TEST(StringPrintfTest, StringPrintfEmpty) {
+ EXPECT_EQ("", StringPrintf("%s", ""));
+}
+
+TEST(StringPrintfTest, StringPrintfMisc) {
+ EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
+#if !defined(OS_ANDROID)
+ EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
+#endif
+}
+
+TEST(StringPrintfTest, StringAppendfEmptyString) {
+ std::string value("Hello");
+ StringAppendF(&value, "%s", "");
+ EXPECT_EQ("Hello", value);
+
+#if !defined(OS_ANDROID)
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L"%ls", L"");
+ EXPECT_EQ(L"Hello", valuew);
+#endif
+}
+
+TEST(StringPrintfTest, StringAppendfString) {
+ std::string value("Hello");
+ StringAppendF(&value, " %s", "World");
+ EXPECT_EQ("Hello World", value);
+
+#if !defined(OS_ANDROID)
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L" %ls", L"World");
+ EXPECT_EQ(L"Hello World", valuew);
+#endif
+}
+
+TEST(StringPrintfTest, StringAppendfInt) {
+ std::string value("Hello");
+ StringAppendF(&value, " %d", 123);
+ EXPECT_EQ("Hello 123", value);
+
+#if !defined(OS_ANDROID)
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L" %d", 123);
+ EXPECT_EQ(L"Hello 123", valuew);
+#endif
+}
+
+// Make sure that lengths exactly around the initial buffer size are handled
+// correctly.
+TEST(StringPrintfTest, StringPrintfBounds) {
+ const int kSrcLen = 1026;
+ char src[kSrcLen];
+ for (size_t i = 0; i < arraysize(src); i++)
+ src[i] = 'A';
+
+ wchar_t srcw[kSrcLen];
+ for (size_t i = 0; i < arraysize(srcw); i++)
+ srcw[i] = 'A';
+
+ for (int i = 1; i < 3; i++) {
+ src[kSrcLen - i] = 0;
+ std::string out;
+ SStringPrintf(&out, "%s", src);
+ EXPECT_STREQ(src, out.c_str());
+
+#if !defined(OS_ANDROID)
+ srcw[kSrcLen - i] = 0;
+ std::wstring outw;
+ SStringPrintf(&outw, L"%ls", srcw);
+ EXPECT_STREQ(srcw, outw.c_str());
+#endif
+ }
+}
+
+// Test very large sprintfs that will cause the buffer to grow.
+TEST(StringPrintfTest, Grow) {
+ char src[1026];
+ for (size_t i = 0; i < arraysize(src); i++)
+ src[i] = 'A';
+ src[1025] = 0;
+
+ const char* fmt = "%sB%sB%sB%sB%sB%sB%s";
+
+ std::string out;
+ SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
+
+ const int kRefSize = 320000;
+ char* ref = new char[kRefSize];
+#if defined(OS_WIN)
+ sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src);
+#elif defined(OS_POSIX)
+ snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src);
+#endif
+
+ EXPECT_STREQ(ref, out.c_str());
+ delete[] ref;
+}
+
+TEST(StringPrintfTest, StringAppendV) {
+ std::string out;
+ StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
+ EXPECT_EQ("1 foo bar", out);
+}
+
+// Test the boundary condition for the size of the string_util's
+// internal buffer.
+TEST(StringPrintfTest, GrowBoundary) {
+ const int string_util_buf_len = 1024;
+ // Our buffer should be one larger than the size of StringAppendVT's stack
+ // buffer.
+ const int buf_len = string_util_buf_len + 1;
+ char src[buf_len + 1]; // Need extra one for NULL-terminator.
+ for (int i = 0; i < buf_len; ++i)
+ src[i] = 'a';
+ src[buf_len] = 0;
+
+ std::string out;
+ SStringPrintf(&out, "%s", src);
+
+ EXPECT_STREQ(src, out.c_str());
+}
+
+// TODO(evanm): what's the proper cross-platform test here?
+#if defined(OS_WIN)
+// sprintf in Visual Studio fails when given U+FFFF. This tests that the
+// failure case is gracefuly handled.
+TEST(StringPrintfTest, Invalid) {
+ wchar_t invalid[2];
+ invalid[0] = 0xffff;
+ invalid[1] = 0;
+
+ std::wstring out;
+ SStringPrintf(&out, L"%ls", invalid);
+ EXPECT_STREQ(L"", out.c_str());
+}
+#endif
+
+// Test that the positional parameters work.
+TEST(StringPrintfTest, PositionalParameters) {
+ std::string out;
+ SStringPrintf(&out, "%1$s %1$s", "test");
+ EXPECT_STREQ("test test", out.c_str());
+
+#if defined(OS_WIN)
+ std::wstring wout;
+ SStringPrintf(&wout, L"%1$ls %1$ls", L"test");
+ EXPECT_STREQ(L"test test", wout.c_str());
+#endif
+}
+
+// Test that StringPrintf and StringAppendV do not change errno.
+TEST(StringPrintfTest, StringPrintfErrno) {
+ errno = 1;
+ EXPECT_EQ("", StringPrintf("%s", ""));
+ EXPECT_EQ(1, errno);
+ std::string out;
+ StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
+ EXPECT_EQ(1, errno);
+}
+
+} // namespace base