summaryrefslogtreecommitdiffstats
path: root/base/string_util_unittest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'base/string_util_unittest.cc')
-rw-r--r--base/string_util_unittest.cc848
1 files changed, 848 insertions, 0 deletions
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
new file mode 100644
index 0000000..c6ff622
--- /dev/null
+++ b/base/string_util_unittest.cc
@@ -0,0 +1,848 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <sstream>
+#include <stdarg.h>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+}
+
+static const struct trim_case {
+ const wchar_t* input;
+ const TrimPositions positions;
+ const wchar_t* output;
+ const TrimPositions return_value;
+} trim_cases[] = {
+ {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
+ {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
+ {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
+ {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
+ {L"", TRIM_ALL, L"", TRIM_NONE},
+ {L" ", TRIM_LEADING, L"", TRIM_LEADING},
+ {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
+ {L" ", TRIM_ALL, L"", TRIM_ALL},
+ {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
+ {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
+};
+
+static const struct trim_case_ascii {
+ const char* input;
+ const TrimPositions positions;
+ const char* output;
+ const TrimPositions return_value;
+} trim_cases_ascii[] = {
+ {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
+ {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
+ {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
+ {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
+ {"", TRIM_ALL, "", TRIM_NONE},
+ {" ", TRIM_LEADING, "", TRIM_LEADING},
+ {" ", TRIM_TRAILING, "", TRIM_TRAILING},
+ {" ", TRIM_ALL, "", TRIM_ALL},
+ {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
+ {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL},
+};
+
+TEST(StringUtilTest, TrimWhitespace) {
+ std::wstring output; // Allow contents to carry over to next testcase
+ for (int i = 0; i < arraysize(trim_cases); ++i) {
+ const trim_case& value = trim_cases[i];
+ EXPECT_EQ(value.return_value,
+ TrimWhitespace(value.input, value.positions, &output));
+ EXPECT_EQ(value.output, output);
+ }
+
+ // Test that TrimWhitespace() can take the same string for input and output
+ output = L" This is a test \r\n";
+ EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
+ EXPECT_EQ(L"This is a test", output);
+
+ // Once more, but with a string of whitespace
+ output = L" \r\n";
+ EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
+ EXPECT_EQ(L"", output);
+
+ std::string output_ascii;
+ for (int i = 0; i < arraysize(trim_cases_ascii); ++i) {
+ const trim_case_ascii& value = trim_cases_ascii[i];
+ EXPECT_EQ(value.return_value,
+ TrimWhitespace(value.input, value.positions, &output_ascii));
+ EXPECT_EQ(value.output, output_ascii);
+ }
+}
+
+static const struct collapse_case {
+ const wchar_t* input;
+ const bool trim;
+ const wchar_t* output;
+} collapse_cases[] = {
+ {L" Google Video ", false, L"Google Video"},
+ {L"Google Video", false, L"Google Video"},
+ {L"", false, L""},
+ {L" ", false, L""},
+ {L"\t\rTest String\n", false, L"Test String"},
+ {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
+ {L" Test \n \t String ", false, L"Test String"},
+ {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
+ {L" Test String", false, L"Test String"},
+ {L"Test String ", false, L"Test String"},
+ {L"Test String", false, L"Test String"},
+ {L"", true, L""},
+ {L"\n", true, L""},
+ {L" \r ", true, L""},
+ {L"\nFoo", true, L"Foo"},
+ {L"\r Foo ", true, L"Foo"},
+ {L" Foo bar ", true, L"Foo bar"},
+ {L" \tFoo bar \n", true, L"Foo bar"},
+ {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
+};
+
+TEST(StringUtilTest, CollapseWhitespace) {
+ for (int i = 0; i < arraysize(collapse_cases); ++i) {
+ const collapse_case& value = collapse_cases[i];
+ EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
+ }
+}
+
+static const wchar_t* const kConvertRoundtripCases[] = {
+ L"Google Video",
+ // "网页 图片 资讯更多 »"
+ L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
+ // "Παγκόσμιος Ιστός"
+ L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
+ L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
+ // "Поиск страниц на русском"
+ L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
+ L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
+ L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
+ // "전체서비스"
+ L"\xc804\xccb4\xc11c\xbe44\xc2a4",
+ // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
+ L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
+
+ // Test a character that takes more than 16-bits. This will depend on whether
+ // wchar_t is 16 or 32 bits.
+ #ifdef WIN32
+ L"\xd800\xdf00",
+ #else
+ "\x10300,
+ #endif
+};
+
+TEST(StringUtilTest, ConvertUTF8AndWide) {
+ // we round-trip all the wide strings through UTF-8 to make sure everything
+ // agrees on the conversion. This uses the stream operators to test them
+ // simultaneously.
+ for (int i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
+ std::ostringstream utf8;
+ utf8 << WideToUTF8(kConvertRoundtripCases[i]);
+ std::wostringstream wide;
+ wide << UTF8ToWide(utf8.str());
+
+ EXPECT_EQ(kConvertRoundtripCases[i], wide.str());
+ }
+}
+
+TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) {
+ // An empty std::wstring should be converted to an empty std::string,
+ // and vice versa.
+ std::wstring wempty;
+ std::string empty;
+ EXPECT_EQ(empty, WideToUTF8(wempty));
+ EXPECT_EQ(wempty, UTF8ToWide(empty));
+}
+
+TEST(StringUtilTest, ConvertMultiString) {
+ static wchar_t wmulti[] = {
+ L'f', L'o', L'o', L'\0',
+ L'b', L'a', L'r', L'\0',
+ L'b', L'a', L'z', L'\0',
+ L'\0'
+ };
+ static char multi[] = {
+ 'f', 'o', 'o', '\0',
+ 'b', 'a', 'r', '\0',
+ 'b', 'a', 'z', '\0',
+ '\0'
+ };
+ std::wstring wmultistring;
+ memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti));
+ EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());
+ std::string expected;
+ memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
+ EXPECT_EQ(arraysize(multi) - 1, expected.length());
+ const std::string& converted = WideToUTF8(wmultistring);
+ EXPECT_EQ(arraysize(multi) - 1, converted.length());
+ EXPECT_EQ(expected, converted);
+}
+
+TEST(StringUtilTest, ConvertCodepageUTF8) {
+ // Make sure WideToCodepage works like WideToUTF8.
+ for (int i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
+ std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
+ std::string utf8;
+ EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
+ OnStringUtilConversionError::SKIP, &utf8));
+ EXPECT_EQ(expected, utf8);
+ }
+}
+
+TEST(StringUtilTest, ConvertBetweenCodepageAndWide) {
+ static const struct {
+ const char* codepage_name;
+ const char* encoded;
+ OnStringUtilConversionError::Type on_error;
+ bool success;
+ const wchar_t* wide;
+ } kConvertCodepageCases[] = {
+ // Test a case where the input can no be decoded, using both SKIP and FAIL
+ // error handling rules. "A7 41" is valid, but "A6" isn't.
+ {"big5",
+ "\xA7\x41\xA6",
+ OnStringUtilConversionError::FAIL,
+ false,
+ L""},
+ {"big5",
+ "\xA7\x41\xA6",
+ OnStringUtilConversionError::SKIP,
+ true,
+ L"\x4F60"},
+ // Arabic (ISO-8859)
+ {"iso-8859-6",
+ "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " "
+ "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" "
+ L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652"},
+ // Chinese Simplified (GB2312)
+ {"gb2312",
+ "\xC4\xE3\xBA\xC3",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x4F60\x597D"},
+ // Chinese Traditional (BIG5)
+ {"big5",
+ "\xA7\x41\xA6\x6E",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x4F60\x597D"},
+ // Greek (ISO-8859)
+ {"iso-8859-7",
+ "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"},
+ // Hebrew (Windows)
+ {"windows-1255", /* to be replaced with "iso-8859-8-I"? */
+ "\xF9\xD1\xC8\xEC\xE5\xC9\xED",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD"},
+ // Hindi Devanagari (ISCII)
+ {"iscii-dev",
+ "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930"},
+ // Korean (EUC)
+ {"euc-kr",
+ "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\xC548\xB155\xD558\xC138\xC694"},
+ // Japanese (EUC)
+ {"euc-jp",
+ "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F"},
+ // Japanese (ISO-2022)
+ {"iso-2022-jp",
+ "\x1B\x24\x42" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F" "\x1B\x28\x42",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F"},
+ // Japanese (Shift-JIS)
+ {"sjis",
+ "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x3053\x3093\x306B\x3061\x306F"},
+ // Russian (KOI8)
+ {"koi8-r",
+ "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
+ L"\x0443\x0439\x0442\x0435"},
+ // Thai (ISO-8859)
+ {"windows-874", /* to be replaced with "iso-8859-11". */
+ "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
+ OnStringUtilConversionError::FAIL,
+ true,
+ L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
+ L"\x0E04\x0E23\x0e31\x0E1A"},
+ };
+
+ for (int i = 0; i < arraysize(kConvertCodepageCases); ++i) {
+ std::wstring wide;
+ bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
+ kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error,
+ &wide);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
+
+ // When decoding was successful and nothing was skipped, we also check the
+ // reverse conversion.
+ if (success &&
+ kConvertCodepageCases[i].on_error ==
+ OnStringUtilConversionError::FAIL) {
+ std::string encoded;
+ success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,
+ kConvertCodepageCases[i].on_error, &encoded);
+ EXPECT_EQ(kConvertCodepageCases[i].success, success);
+ EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
+ }
+ }
+
+ // The above cases handled codepage->wide errors, but not wide->codepage.
+ // Test that here.
+ std::string encoded("Temp data"); // Make sure the string gets cleared.
+
+ // First test going to an encoding that can not represent that character.
+ EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
+ OnStringUtilConversionError::FAIL, &encoded));
+ EXPECT_TRUE(encoded.empty());
+ EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
+ OnStringUtilConversionError::SKIP, &encoded));
+ EXPECT_STREQ("Chinese", encoded.c_str());
+
+#ifdef WIN32
+ // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.
+ EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",
+ OnStringUtilConversionError::FAIL, &encoded));
+ EXPECT_TRUE(encoded.empty());
+ EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",
+ OnStringUtilConversionError::SKIP, &encoded));
+ EXPECT_STREQ("az", encoded.c_str());
+#endif
+
+ // Invalid characters should fail.
+ EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
+ OnStringUtilConversionError::SKIP, &encoded));
+ EXPECT_STREQ("az", encoded.c_str());
+
+ // Invalid codepages should fail.
+ EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
+ OnStringUtilConversionError::SKIP, &encoded));
+}
+
+TEST(StringUtilTest, ConvertASCII) {
+ static const char* char_cases[] = {
+ "Google Video",
+ "Hello, world\n",
+ "0123ABCDwxyz \a\b\t\r\n!+,.~"
+ };
+
+ static const wchar_t* const wchar_cases[] = {
+ L"Google Video",
+ L"Hello, world\n",
+ L"0123ABCDwxyz \a\b\t\r\n!+,.~"
+ };
+
+ for (int i = 0; i < arraysize(char_cases); ++i) {
+ EXPECT_TRUE(IsStringASCII(char_cases[i]));
+ std::wstring wide = ASCIIToWide(char_cases[i]);
+ EXPECT_EQ(wchar_cases[i], wide);
+
+ EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
+ std::string ascii = WideToASCII(wchar_cases[i]);
+ EXPECT_EQ(char_cases[i], ascii);
+ }
+
+ EXPECT_FALSE(IsStringASCII("Google \x80Video"));
+ EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
+
+ // Convert empty strings.
+ std::wstring wempty;
+ std::string empty;
+ EXPECT_EQ(empty, WideToASCII(wempty));
+ EXPECT_EQ(wempty, ASCIIToWide(empty));
+}
+
+static const struct {
+ const wchar_t* src_w;
+ const char* src_a;
+ const char* dst;
+} lowercase_cases[] = {
+ {L"FoO", "FoO", "foo"},
+ {L"foo", "foo", "foo"},
+ {L"FOO", "FOO", "foo"},
+};
+
+TEST(StringUtilTest, LowerCaseEqualsASCII) {
+ for (int i = 0; i < arraysize(lowercase_cases); ++i) {
+ EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
+ lowercase_cases[i].dst));
+ EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
+ lowercase_cases[i].dst));
+ }
+}
+
+TEST(StringUtilTest, GetByteDisplayUnits) {
+ static const struct {
+ int64 bytes;
+ DataUnits expected;
+ } cases[] = {
+ {0, DATA_UNITS_BYTE},
+ {512, DATA_UNITS_BYTE},
+ {10*1024, DATA_UNITS_KILOBYTE},
+ {10*1024*1024, DATA_UNITS_MEGABYTE},
+ {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE},
+ {~(1LL<<63), DATA_UNITS_GIGABYTE},
+#ifdef NDEBUG
+ {-1, DATA_UNITS_BYTE},
+#endif
+ };
+
+ for (int i = 0; i < arraysize(cases); ++i)
+ EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
+}
+
+TEST(StringUtilTest, FormatBytes) {
+ static const struct {
+ int64 bytes;
+ DataUnits units;
+ const wchar_t* expected;
+ const wchar_t* expected_with_units;
+ } cases[] = {
+ {0, DATA_UNITS_BYTE, L"0", L"0 B"},
+ {512, DATA_UNITS_BYTE, L"512", L"512 B"},
+ {512, DATA_UNITS_KILOBYTE, L"0.5", L"0.5 kB"},
+ {1024*1024, DATA_UNITS_KILOBYTE, L"1024", L"1024 kB"},
+ {1024*1024, DATA_UNITS_MEGABYTE, L"1", L"1 MB"},
+ {1024*1024*1024, DATA_UNITS_GIGABYTE, L"1", L"1 GB"},
+ {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"},
+ {~(1LL<<63), DATA_UNITS_GIGABYTE, L"8589934592", L"8589934592 GB"},
+ // Make sure the first digit of the fractional part works.
+ {1024*1024 + 103, DATA_UNITS_KILOBYTE, L"1024.1", L"1024.1 kB"},
+ {1024*1024 + 205 * 1024, DATA_UNITS_MEGABYTE, L"1.2", L"1.2 MB"},
+ {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIGABYTE,
+ L"1.9", L"1.9 GB"},
+ {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"},
+#ifdef NDEBUG
+ {-1, DATA_UNITS_BYTE, L"", L""},
+#endif
+ };
+
+ for (int i = 0; i < arraysize(cases); ++i) {
+ EXPECT_EQ(cases[i].expected,
+ FormatBytes(cases[i].bytes, cases[i].units, false));
+ EXPECT_EQ(cases[i].expected_with_units,
+ FormatBytes(cases[i].bytes, cases[i].units, true));
+ }
+}
+
+TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
+ static const struct {
+ wchar_t* str;
+ std::wstring::size_type start_offset;
+ wchar_t* find_this;
+ wchar_t* replace_with;
+ wchar_t* expected;
+ } cases[] = {
+ {L"aaa", 0, L"a", L"b", L"bbb"},
+ {L"abb", 0, L"ab", L"a", L"ab"},
+ {L"Removing some substrings inging", 0, L"ing", L"", L"Remov some substrs "},
+ {L"Not found", 0, L"x", L"0", L"Not found"},
+ {L"Not found again", 5, L"x", L"0", L"Not found again"},
+ {L" Making it much longer ", 0, L" ", L"Four score and seven years ago",
+ L"Four score and seven years agoMakingFour score and seven years agoit"
+ L"Four score and seven years agomuchFour score and seven years agolonger"
+ L"Four score and seven years ago"},
+ {L"Invalid offset", 9999, L"t", L"foobar", L"Invalid offset"},
+ {L"Replace me only me once", 9, L"me ", L"", L"Replace me only once"},
+ {L"abababab", 2, L"ab", L"c", L"abccc"},
+ };
+
+ for (int i = 0; i < arraysize(cases); i++) {
+ std::wstring str(cases[i].str);
+ ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
+ cases[i].find_this, cases[i].replace_with);
+ EXPECT_EQ(cases[i].expected, str);
+ }
+}
+
+TEST(StringUtilTest, IntToString) {
+ static const struct {
+ int input;
+ std::string output;
+ } cases[] = {
+ {0, "0"},
+ {42, "42"},
+ {-42, "-42"},
+ {INT_MAX, "2147483647"},
+ {INT_MIN, "-2147483648"},
+ };
+
+ for (int i = 0; i < arraysize(cases); ++i)
+ EXPECT_EQ(cases[i].output, IntToString(cases[i].input));
+}
+
+TEST(StringUtilTest, Uint64ToString) {
+ static const struct {
+ uint64 input;
+ std::string output;
+ } cases[] = {
+ {0, "0"},
+ {42, "42"},
+ {INT_MAX, "2147483647"},
+ {kuint64max, "18446744073709551615"},
+ };
+
+ for (int i = 0; i < arraysize(cases); ++i)
+ EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input));
+}
+
+// This checks where we can use the assignment operator for a va_list. We need
+// a way to do this since Visual C doesn't support va_copy, but assignment on
+// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
+// capability.
+static void VariableArgsFunc(const char* format, ...) {
+ va_list org;
+ va_start(org, format);
+
+ va_list dup = org;
+ int i1 = va_arg(org, int);
+ int j1 = va_arg(org, int);
+ char* s1 = va_arg(org, char*);
+ double d1 = va_arg(org, double);
+ va_end(org);
+
+ int i2 = va_arg(dup, int);
+ int j2 = va_arg(dup, int);
+ char* s2 = va_arg(dup, char*);
+ double d2 = va_arg(dup, double);
+
+ EXPECT_EQ(i1, i2);
+ EXPECT_EQ(j1, j2);
+ EXPECT_STREQ(s1, s2);
+ EXPECT_EQ(d1, d2);
+
+ va_end(dup);
+}
+
+TEST(StringUtilTest, VAList) {
+ VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
+}
+
+TEST(StringUtilTest, StringPrintfEmptyFormat) {
+ const char* empty = "";
+ EXPECT_EQ("", StringPrintf(empty));
+ EXPECT_EQ("", StringPrintf("%s", ""));
+}
+
+TEST(StringUtilTest, StringPrintfMisc) {
+ EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
+ EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2s %1c", 123, L"hello", 'w'));
+}
+
+TEST(StringUtilTest, StringAppendfStringEmptyParam) {
+ std::string value("Hello");
+ StringAppendF(&value, "");
+ EXPECT_EQ("Hello", value);
+
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L"");
+ EXPECT_EQ(L"Hello", valuew);
+}
+
+TEST(StringUtilTest, StringAppendfEmptyString) {
+ std::string value("Hello");
+ StringAppendF(&value, "%s", "");
+ EXPECT_EQ("Hello", value);
+
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L"%s", L"");
+ EXPECT_EQ(L"Hello", valuew);
+}
+
+TEST(StringUtilTest, StringAppendfString) {
+ std::string value("Hello");
+ StringAppendF(&value, " %s", "World");
+ EXPECT_EQ("Hello World", value);
+
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L" %s", L"World");
+ EXPECT_EQ(L"Hello World", valuew);
+}
+
+TEST(StringUtilTest, StringAppendfInt) {
+ std::string value("Hello");
+ StringAppendF(&value, " %d", 123);
+ EXPECT_EQ("Hello 123", value);
+
+ std::wstring valuew(L"Hello");
+ StringAppendF(&valuew, L" %d", 123);
+ EXPECT_EQ(L"Hello 123", valuew);
+}
+
+// Make sure that lengths exactly around the initial buffer size are handled
+// correctly.
+TEST(StringUtilTest, StringPrintfBounds) {
+ const int src_len = 1026;
+ char src[src_len];
+ for (int i = 0; i < arraysize(src); i++)
+ src[i] = 'A';
+
+ wchar_t srcw[src_len];
+ for (int i = 0; i < arraysize(srcw); i++)
+ srcw[i] = 'A';
+
+ for (int i = 1; i < 3; i++) {
+ src[src_len - i] = 0;
+ std::string out;
+ SStringPrintf(&out, "%s", src);
+ EXPECT_STREQ(src, out.c_str());
+
+ srcw[src_len - i] = 0;
+ std::wstring outw;
+ SStringPrintf(&outw, L"%s", srcw);
+ EXPECT_STREQ(srcw, outw.c_str());
+ }
+}
+
+// Test very large sprintfs that will cause the buffer to grow.
+TEST(StringUtilTest, Grow) {
+ char src[1026];
+ for (int i = 0; i < arraysize(src); i++)
+ src[i] = 'A';
+ src[1025] = 0;
+
+ char* fmt = "%sB%sB%sB%sB%sB%sB%s";
+
+ std::string out;
+ SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
+
+ char* ref = new char[320000];
+ sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src);
+
+ EXPECT_STREQ(ref, out.c_str());
+ delete ref;
+}
+
+// Test the boundary condition for the size of the string_util's
+// internal buffer.
+TEST(StringUtilTest, GrowBoundary) {
+ const int string_util_buf_len = 1024;
+ // Our buffer should be one larger than the size of StringAppendVT's stack
+ // buffer.
+ const int buf_len = string_util_buf_len + 1;
+ char src[buf_len + 1]; // Need extra one for NULL-terminator.
+ for (int i = 0; i < buf_len; ++i)
+ src[i] = 'a';
+ src[buf_len] = 0;
+
+ std::string out;
+ SStringPrintf(&out, "%s", src);
+
+ EXPECT_STREQ(src, out.c_str());
+}
+
+// sprintf in Visual Studio fails when given U+FFFF. This tests that the
+// failure case is gracefuly handled.
+TEST(StringUtilTest, Invalid) {
+ wchar_t invalid[2];
+ invalid[0] = 0xffff;
+ invalid[1] = 0;
+
+ std::wstring out;
+ SStringPrintf(&out, L"%s", invalid);
+ EXPECT_STREQ(L"", out.c_str());
+}
+
+// Test for SplitString
+TEST(StringUtilTest, SplitString) {
+ std::vector<std::wstring> r;
+
+ SplitString(L"a,b,c", L',', &r);
+ EXPECT_EQ(r.size(), 3);
+ EXPECT_EQ(r[0], L"a");
+ EXPECT_EQ(r[1], L"b");
+ EXPECT_EQ(r[2], L"c");
+ r.clear();
+
+ SplitString(L"a, b, c", L',', &r);
+ EXPECT_EQ(r.size(), 3);
+ EXPECT_EQ(r[0], L"a");
+ EXPECT_EQ(r[1], L"b");
+ EXPECT_EQ(r[2], L"c");
+ r.clear();
+
+ SplitString(L"a,,c", L',', &r);
+ EXPECT_EQ(r.size(), 3);
+ EXPECT_EQ(r[0], L"a");
+ EXPECT_EQ(r[1], L"");
+ EXPECT_EQ(r[2], L"c");
+ r.clear();
+
+ SplitString(L"", L'*', &r);
+ EXPECT_EQ(r.size(), 1);
+ EXPECT_EQ(r[0], L"");
+ r.clear();
+
+ SplitString(L"foo", L'*', &r);
+ EXPECT_EQ(r.size(), 1);
+ EXPECT_EQ(r[0], L"foo");
+ r.clear();
+
+ SplitString(L"foo ,", L',', &r);
+ EXPECT_EQ(r.size(), 2);
+ EXPECT_EQ(r[0], L"foo");
+ EXPECT_EQ(r[1], L"");
+ r.clear();
+
+ SplitString(L",", L',', &r);
+ EXPECT_EQ(r.size(), 2);
+ EXPECT_EQ(r[0], L"");
+ EXPECT_EQ(r[1], L"");
+ r.clear();
+
+ SplitString(L"\t\ta\t", L'\t', &r);
+ EXPECT_EQ(r.size(), 4);
+ EXPECT_EQ(r[0], L"");
+ EXPECT_EQ(r[1], L"");
+ EXPECT_EQ(r[2], L"a");
+ EXPECT_EQ(r[3], L"");
+ r.clear();
+
+ SplitStringDontTrim(L"\t\ta\t", L'\t', &r);
+ EXPECT_EQ(r.size(), 4);
+ EXPECT_EQ(r[0], L"");
+ EXPECT_EQ(r[1], L"");
+ EXPECT_EQ(r[2], L"a");
+ EXPECT_EQ(r[3], L"");
+ r.clear();
+
+ SplitString(L"\ta\t\nb\tcc", L'\n', &r);
+ EXPECT_EQ(r.size(), 2);
+ EXPECT_EQ(r[0], L"a");
+ EXPECT_EQ(r[1], L"b\tcc");
+ r.clear();
+
+ SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r);
+ EXPECT_EQ(r.size(), 2);
+ EXPECT_EQ(r[0], L"\ta\t");
+ EXPECT_EQ(r[1], L"b\tcc");
+ r.clear();
+}
+
+TEST(StringUtilTest, StartsWith) {
+ EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", true));
+ EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", false));
+ EXPECT_EQ(true, StartsWithASCII("JavaScript:url", "javascript", false));
+ EXPECT_EQ(false, StartsWithASCII("java", "javascript", true));
+ EXPECT_EQ(false, StartsWithASCII("java", "javascript", false));
+}
+
+TEST(StringUtilTest, GetStringFWithOffsets) {
+ std::vector<size_t> offsets;
+
+ ReplaceStringPlaceholders(L"Hello, $1. Your number is $2.", L"1", L"2",
+ &offsets);
+ EXPECT_EQ(2, offsets.size());
+ EXPECT_EQ(7, offsets[0]);
+ EXPECT_EQ(25, offsets[1]);
+ offsets.clear();
+
+ ReplaceStringPlaceholders(L"Hello, $2. Your number is $1.", L"1", L"2",
+ &offsets);
+ EXPECT_EQ(2, offsets.size());
+ EXPECT_EQ(25, offsets[0]);
+ EXPECT_EQ(7, offsets[1]);
+ offsets.clear();
+}
+
+TEST(StringUtilTest, SplitStringAlongWhitespace) {
+ struct TestData {
+ const std::wstring input;
+ const int expected_result_count;
+ const std::wstring output1;
+ const std::wstring output2;
+ } data[] = {
+ { L"a", 1, L"a", L"" },
+ { L" ", 0, L"", L"" },
+ { L" a", 1, L"a", L"" },
+ { L" ab ", 1, L"ab", L"" },
+ { L" ab c", 2, L"ab", L"c" },
+ { L" ab c ", 2, L"ab", L"c" },
+ { L" ab cd", 2, L"ab", L"cd" },
+ { L" ab cd ", 2, L"ab", L"cd" },
+ { L" \ta\t", 1, L"a", L"" },
+ { L" b\ta\t", 2, L"b", L"a" },
+ { L" b\tat", 2, L"b", L"at" },
+ { L"b\tat", 2, L"b", L"at" },
+ { L"b\t at", 2, L"b", L"at" },
+ };
+ for (size_t i = 0; i < arraysize(data); ++i) {
+ std::vector<std::wstring> results;
+ SplitStringAlongWhitespace(data[i].input, &results);
+ ASSERT_EQ(data[i].expected_result_count, results.size());
+ if (data[i].expected_result_count > 0)
+ ASSERT_EQ(data[i].output1, results[0]);
+ if (data[i].expected_result_count > 1)
+ ASSERT_EQ(data[i].output2, results[1]);
+ }
+}
+
+TEST(StringUtilTest, MatchPatternTest) {
+ EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true);
+ EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true);
+ EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false);
+ EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true);
+ EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false);
+ EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false);
+ EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true);
+ EXPECT_EQ(MatchPattern(L"", L"*.*"), false);
+ EXPECT_EQ(MatchPattern(L"", L"*"), true);
+ EXPECT_EQ(MatchPattern(L"", L"?"), true);
+ EXPECT_EQ(MatchPattern(L"", L""), true);
+ EXPECT_EQ(MatchPattern(L"Hello", L""), false);
+ EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true);
+ EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string
+}
+
+