diff options
Diffstat (limited to 'base/string_util_unittest.cc')
-rw-r--r-- | base/string_util_unittest.cc | 848 |
1 files changed, 848 insertions, 0 deletions
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc new file mode 100644 index 0000000..c6ff622 --- /dev/null +++ b/base/string_util_unittest.cc @@ -0,0 +1,848 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <sstream> +#include <stdarg.h> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/string_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { +} + +static const struct trim_case { + const wchar_t* input; + const TrimPositions positions; + const wchar_t* output; + const TrimPositions return_value; +} trim_cases[] = { + {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, + {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, + {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, + {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, + {L"", TRIM_ALL, L"", TRIM_NONE}, + {L" ", TRIM_LEADING, L"", TRIM_LEADING}, + {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, + {L" ", TRIM_ALL, L"", TRIM_ALL}, + {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, + {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, +}; + +static const struct trim_case_ascii { + const char* input; + const TrimPositions positions; + const char* output; + const TrimPositions return_value; +} trim_cases_ascii[] = { + {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, + {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, + {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, + {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, + {"", TRIM_ALL, "", TRIM_NONE}, + {" ", TRIM_LEADING, "", TRIM_LEADING}, + {" ", TRIM_TRAILING, "", TRIM_TRAILING}, + {" ", TRIM_ALL, "", TRIM_ALL}, + {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, + {"\x85Test String\xa0\x20", TRIM_ALL, "Test String", TRIM_ALL}, +}; + +TEST(StringUtilTest, TrimWhitespace) { + std::wstring output; // Allow contents to carry over to next testcase + for (int i = 0; i < arraysize(trim_cases); ++i) { + const trim_case& value = trim_cases[i]; + EXPECT_EQ(value.return_value, + TrimWhitespace(value.input, value.positions, &output)); + EXPECT_EQ(value.output, output); + } + + // Test that TrimWhitespace() can take the same string for input and output + output = L" This is a test \r\n"; + EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); + EXPECT_EQ(L"This is a test", output); + + // Once more, but with a string of whitespace + output = L" \r\n"; + EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); + EXPECT_EQ(L"", output); + + std::string output_ascii; + for (int i = 0; i < arraysize(trim_cases_ascii); ++i) { + const trim_case_ascii& value = trim_cases_ascii[i]; + EXPECT_EQ(value.return_value, + TrimWhitespace(value.input, value.positions, &output_ascii)); + EXPECT_EQ(value.output, output_ascii); + } +} + +static const struct collapse_case { + const wchar_t* input; + const bool trim; + const wchar_t* output; +} collapse_cases[] = { + {L" Google Video ", false, L"Google Video"}, + {L"Google Video", false, L"Google Video"}, + {L"", false, L""}, + {L" ", false, L""}, + {L"\t\rTest String\n", false, L"Test String"}, + {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, + {L" Test \n \t String ", false, L"Test String"}, + {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, + {L" Test String", false, L"Test String"}, + {L"Test String ", false, L"Test String"}, + {L"Test String", false, L"Test String"}, + {L"", true, L""}, + {L"\n", true, L""}, + {L" \r ", true, L""}, + {L"\nFoo", true, L"Foo"}, + {L"\r Foo ", true, L"Foo"}, + {L" Foo bar ", true, L"Foo bar"}, + {L" \tFoo bar \n", true, L"Foo bar"}, + {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, +}; + +TEST(StringUtilTest, CollapseWhitespace) { + for (int i = 0; i < arraysize(collapse_cases); ++i) { + const collapse_case& value = collapse_cases[i]; + EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); + } +} + +static const wchar_t* const kConvertRoundtripCases[] = { + L"Google Video", + // "网页 图片 资讯更多 »" + L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", + // "Παγκόσμιος Ιστός" + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", + // "Поиск страниц на русском" + L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" + L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" + L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", + // "전체서비스" + L"\xc804\xccb4\xc11c\xbe44\xc2a4", + // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) + L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", + + // Test a character that takes more than 16-bits. This will depend on whether + // wchar_t is 16 or 32 bits. + #ifdef WIN32 + L"\xd800\xdf00", + #else + "\x10300, + #endif +}; + +TEST(StringUtilTest, ConvertUTF8AndWide) { + // we round-trip all the wide strings through UTF-8 to make sure everything + // agrees on the conversion. This uses the stream operators to test them + // simultaneously. + for (int i = 0; i < arraysize(kConvertRoundtripCases); ++i) { + std::ostringstream utf8; + utf8 << WideToUTF8(kConvertRoundtripCases[i]); + std::wostringstream wide; + wide << UTF8ToWide(utf8.str()); + + EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); + } +} + +TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) { + // An empty std::wstring should be converted to an empty std::string, + // and vice versa. + std::wstring wempty; + std::string empty; + EXPECT_EQ(empty, WideToUTF8(wempty)); + EXPECT_EQ(wempty, UTF8ToWide(empty)); +} + +TEST(StringUtilTest, ConvertMultiString) { + static wchar_t wmulti[] = { + L'f', L'o', L'o', L'\0', + L'b', L'a', L'r', L'\0', + L'b', L'a', L'z', L'\0', + L'\0' + }; + static char multi[] = { + 'f', 'o', 'o', '\0', + 'b', 'a', 'r', '\0', + 'b', 'a', 'z', '\0', + '\0' + }; + std::wstring wmultistring; + memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti)); + EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); + std::string expected; + memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); + EXPECT_EQ(arraysize(multi) - 1, expected.length()); + const std::string& converted = WideToUTF8(wmultistring); + EXPECT_EQ(arraysize(multi) - 1, converted.length()); + EXPECT_EQ(expected, converted); +} + +TEST(StringUtilTest, ConvertCodepageUTF8) { + // Make sure WideToCodepage works like WideToUTF8. + for (int i = 0; i < arraysize(kConvertRoundtripCases); ++i) { + std::string expected(WideToUTF8(kConvertRoundtripCases[i])); + std::string utf8; + EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, + OnStringUtilConversionError::SKIP, &utf8)); + EXPECT_EQ(expected, utf8); + } +} + +TEST(StringUtilTest, ConvertBetweenCodepageAndWide) { + static const struct { + const char* codepage_name; + const char* encoded; + OnStringUtilConversionError::Type on_error; + bool success; + const wchar_t* wide; + } kConvertCodepageCases[] = { + // Test a case where the input can no be decoded, using both SKIP and FAIL + // error handling rules. "A7 41" is valid, but "A6" isn't. + {"big5", + "\xA7\x41\xA6", + OnStringUtilConversionError::FAIL, + false, + L""}, + {"big5", + "\xA7\x41\xA6", + OnStringUtilConversionError::SKIP, + true, + L"\x4F60"}, + // Arabic (ISO-8859) + {"iso-8859-6", + "\xC7\xEE\xE4\xD3\xF1\xEE\xE4\xC7\xE5\xEF" " " + "\xD9\xEE\xE4\xEE\xEA\xF2\xE3\xEF\xE5\xF2", + OnStringUtilConversionError::FAIL, + true, + L"\x0627\x064E\x0644\x0633\x0651\x064E\x0644\x0627\x0645\x064F" L" " + L"\x0639\x064E\x0644\x064E\x064A\x0652\x0643\x064F\x0645\x0652"}, + // Chinese Simplified (GB2312) + {"gb2312", + "\xC4\xE3\xBA\xC3", + OnStringUtilConversionError::FAIL, + true, + L"\x4F60\x597D"}, + // Chinese Traditional (BIG5) + {"big5", + "\xA7\x41\xA6\x6E", + OnStringUtilConversionError::FAIL, + true, + L"\x4F60\x597D"}, + // Greek (ISO-8859) + {"iso-8859-7", + "\xE3\xE5\xE9\xDC" " " "\xF3\xEF\xF5", + OnStringUtilConversionError::FAIL, + true, + L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"}, + // Hebrew (Windows) + {"windows-1255", /* to be replaced with "iso-8859-8-I"? */ + "\xF9\xD1\xC8\xEC\xE5\xC9\xED", + OnStringUtilConversionError::FAIL, + true, + L"\x05E9\x05C1\x05B8\x05DC\x05D5\x05B9\x05DD"}, + // Hindi Devanagari (ISCII) + {"iscii-dev", + "\xEF\x42" "\xC6\xCC\xD7\xE8\xB3\xDA\xCF", + OnStringUtilConversionError::FAIL, + true, + L"\x0928\x092E\x0938\x094D\x0915\x093E\x0930"}, + // Korean (EUC) + {"euc-kr", + "\xBE\xC8\xB3\xE7\xC7\xCF\xBC\xBC\xBF\xE4", + OnStringUtilConversionError::FAIL, + true, + L"\xC548\xB155\xD558\xC138\xC694"}, + // Japanese (EUC) + {"euc-jp", + "\xA4\xB3\xA4\xF3\xA4\xCB\xA4\xC1\xA4\xCF", + OnStringUtilConversionError::FAIL, + true, + L"\x3053\x3093\x306B\x3061\x306F"}, + // Japanese (ISO-2022) + {"iso-2022-jp", + "\x1B\x24\x42" "\x24\x33\x24\x73\x24\x4B\x24\x41\x24\x4F" "\x1B\x28\x42", + OnStringUtilConversionError::FAIL, + true, + L"\x3053\x3093\x306B\x3061\x306F"}, + // Japanese (Shift-JIS) + {"sjis", + "\x82\xB1\x82\xF1\x82\xC9\x82\xBF\x82\xCD", + OnStringUtilConversionError::FAIL, + true, + L"\x3053\x3093\x306B\x3061\x306F"}, + // Russian (KOI8) + {"koi8-r", + "\xDA\xC4\xD2\xC1\xD7\xD3\xD4\xD7\xD5\xCA\xD4\xC5", + OnStringUtilConversionError::FAIL, + true, + L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432" + L"\x0443\x0439\x0442\x0435"}, + // Thai (ISO-8859) + {"windows-874", /* to be replaced with "iso-8859-11". */ + "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", + OnStringUtilConversionError::FAIL, + true, + L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" + L"\x0E04\x0E23\x0e31\x0E1A"}, + }; + + for (int i = 0; i < arraysize(kConvertCodepageCases); ++i) { + std::wstring wide; + bool success = CodepageToWide(kConvertCodepageCases[i].encoded, + kConvertCodepageCases[i].codepage_name, + kConvertCodepageCases[i].on_error, + &wide); + EXPECT_EQ(kConvertCodepageCases[i].success, success); + EXPECT_EQ(kConvertCodepageCases[i].wide, wide); + + // When decoding was successful and nothing was skipped, we also check the + // reverse conversion. + if (success && + kConvertCodepageCases[i].on_error == + OnStringUtilConversionError::FAIL) { + std::string encoded; + success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name, + kConvertCodepageCases[i].on_error, &encoded); + EXPECT_EQ(kConvertCodepageCases[i].success, success); + EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); + } + } + + // The above cases handled codepage->wide errors, but not wide->codepage. + // Test that here. + std::string encoded("Temp data"); // Make sure the string gets cleared. + + // First test going to an encoding that can not represent that character. + EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", + OnStringUtilConversionError::FAIL, &encoded)); + EXPECT_TRUE(encoded.empty()); + EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", + OnStringUtilConversionError::SKIP, &encoded)); + EXPECT_STREQ("Chinese", encoded.c_str()); + +#ifdef WIN32 + // When we're in UTF-16 mode, test an invalid UTF-16 character in the input. + EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1", + OnStringUtilConversionError::FAIL, &encoded)); + EXPECT_TRUE(encoded.empty()); + EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1", + OnStringUtilConversionError::SKIP, &encoded)); + EXPECT_STREQ("az", encoded.c_str()); +#endif + + // Invalid characters should fail. + EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1", + OnStringUtilConversionError::SKIP, &encoded)); + EXPECT_STREQ("az", encoded.c_str()); + + // Invalid codepages should fail. + EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", + OnStringUtilConversionError::SKIP, &encoded)); +} + +TEST(StringUtilTest, ConvertASCII) { + static const char* char_cases[] = { + "Google Video", + "Hello, world\n", + "0123ABCDwxyz \a\b\t\r\n!+,.~" + }; + + static const wchar_t* const wchar_cases[] = { + L"Google Video", + L"Hello, world\n", + L"0123ABCDwxyz \a\b\t\r\n!+,.~" + }; + + for (int i = 0; i < arraysize(char_cases); ++i) { + EXPECT_TRUE(IsStringASCII(char_cases[i])); + std::wstring wide = ASCIIToWide(char_cases[i]); + EXPECT_EQ(wchar_cases[i], wide); + + EXPECT_TRUE(IsStringASCII(wchar_cases[i])); + std::string ascii = WideToASCII(wchar_cases[i]); + EXPECT_EQ(char_cases[i], ascii); + } + + EXPECT_FALSE(IsStringASCII("Google \x80Video")); + EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); + + // Convert empty strings. + std::wstring wempty; + std::string empty; + EXPECT_EQ(empty, WideToASCII(wempty)); + EXPECT_EQ(wempty, ASCIIToWide(empty)); +} + +static const struct { + const wchar_t* src_w; + const char* src_a; + const char* dst; +} lowercase_cases[] = { + {L"FoO", "FoO", "foo"}, + {L"foo", "foo", "foo"}, + {L"FOO", "FOO", "foo"}, +}; + +TEST(StringUtilTest, LowerCaseEqualsASCII) { + for (int i = 0; i < arraysize(lowercase_cases); ++i) { + EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, + lowercase_cases[i].dst)); + EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, + lowercase_cases[i].dst)); + } +} + +TEST(StringUtilTest, GetByteDisplayUnits) { + static const struct { + int64 bytes; + DataUnits expected; + } cases[] = { + {0, DATA_UNITS_BYTE}, + {512, DATA_UNITS_BYTE}, + {10*1024, DATA_UNITS_KILOBYTE}, + {10*1024*1024, DATA_UNITS_MEGABYTE}, + {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE}, + {~(1LL<<63), DATA_UNITS_GIGABYTE}, +#ifdef NDEBUG + {-1, DATA_UNITS_BYTE}, +#endif + }; + + for (int i = 0; i < arraysize(cases); ++i) + EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); +} + +TEST(StringUtilTest, FormatBytes) { + static const struct { + int64 bytes; + DataUnits units; + const wchar_t* expected; + const wchar_t* expected_with_units; + } cases[] = { + {0, DATA_UNITS_BYTE, L"0", L"0 B"}, + {512, DATA_UNITS_BYTE, L"512", L"512 B"}, + {512, DATA_UNITS_KILOBYTE, L"0.5", L"0.5 kB"}, + {1024*1024, DATA_UNITS_KILOBYTE, L"1024", L"1024 kB"}, + {1024*1024, DATA_UNITS_MEGABYTE, L"1", L"1 MB"}, + {1024*1024*1024, DATA_UNITS_GIGABYTE, L"1", L"1 GB"}, + {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, + {~(1LL<<63), DATA_UNITS_GIGABYTE, L"8589934592", L"8589934592 GB"}, + // Make sure the first digit of the fractional part works. + {1024*1024 + 103, DATA_UNITS_KILOBYTE, L"1024.1", L"1024.1 kB"}, + {1024*1024 + 205 * 1024, DATA_UNITS_MEGABYTE, L"1.2", L"1.2 MB"}, + {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIGABYTE, + L"1.9", L"1.9 GB"}, + {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, +#ifdef NDEBUG + {-1, DATA_UNITS_BYTE, L"", L""}, +#endif + }; + + for (int i = 0; i < arraysize(cases); ++i) { + EXPECT_EQ(cases[i].expected, + FormatBytes(cases[i].bytes, cases[i].units, false)); + EXPECT_EQ(cases[i].expected_with_units, + FormatBytes(cases[i].bytes, cases[i].units, true)); + } +} + +TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { + static const struct { + wchar_t* str; + std::wstring::size_type start_offset; + wchar_t* find_this; + wchar_t* replace_with; + wchar_t* expected; + } cases[] = { + {L"aaa", 0, L"a", L"b", L"bbb"}, + {L"abb", 0, L"ab", L"a", L"ab"}, + {L"Removing some substrings inging", 0, L"ing", L"", L"Remov some substrs "}, + {L"Not found", 0, L"x", L"0", L"Not found"}, + {L"Not found again", 5, L"x", L"0", L"Not found again"}, + {L" Making it much longer ", 0, L" ", L"Four score and seven years ago", + L"Four score and seven years agoMakingFour score and seven years agoit" + L"Four score and seven years agomuchFour score and seven years agolonger" + L"Four score and seven years ago"}, + {L"Invalid offset", 9999, L"t", L"foobar", L"Invalid offset"}, + {L"Replace me only me once", 9, L"me ", L"", L"Replace me only once"}, + {L"abababab", 2, L"ab", L"c", L"abccc"}, + }; + + for (int i = 0; i < arraysize(cases); i++) { + std::wstring str(cases[i].str); + ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, + cases[i].find_this, cases[i].replace_with); + EXPECT_EQ(cases[i].expected, str); + } +} + +TEST(StringUtilTest, IntToString) { + static const struct { + int input; + std::string output; + } cases[] = { + {0, "0"}, + {42, "42"}, + {-42, "-42"}, + {INT_MAX, "2147483647"}, + {INT_MIN, "-2147483648"}, + }; + + for (int i = 0; i < arraysize(cases); ++i) + EXPECT_EQ(cases[i].output, IntToString(cases[i].input)); +} + +TEST(StringUtilTest, Uint64ToString) { + static const struct { + uint64 input; + std::string output; + } cases[] = { + {0, "0"}, + {42, "42"}, + {INT_MAX, "2147483647"}, + {kuint64max, "18446744073709551615"}, + }; + + for (int i = 0; i < arraysize(cases); ++i) + EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input)); +} + +// This checks where we can use the assignment operator for a va_list. We need +// a way to do this since Visual C doesn't support va_copy, but assignment on +// va_list is not guaranteed to be a copy. See StringAppendVT which uses this +// capability. +static void VariableArgsFunc(const char* format, ...) { + va_list org; + va_start(org, format); + + va_list dup = org; + int i1 = va_arg(org, int); + int j1 = va_arg(org, int); + char* s1 = va_arg(org, char*); + double d1 = va_arg(org, double); + va_end(org); + + int i2 = va_arg(dup, int); + int j2 = va_arg(dup, int); + char* s2 = va_arg(dup, char*); + double d2 = va_arg(dup, double); + + EXPECT_EQ(i1, i2); + EXPECT_EQ(j1, j2); + EXPECT_STREQ(s1, s2); + EXPECT_EQ(d1, d2); + + va_end(dup); +} + +TEST(StringUtilTest, VAList) { + VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); +} + +TEST(StringUtilTest, StringPrintfEmptyFormat) { + const char* empty = ""; + EXPECT_EQ("", StringPrintf(empty)); + EXPECT_EQ("", StringPrintf("%s", "")); +} + +TEST(StringUtilTest, StringPrintfMisc) { + EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w')); + EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2s %1c", 123, L"hello", 'w')); +} + +TEST(StringUtilTest, StringAppendfStringEmptyParam) { + std::string value("Hello"); + StringAppendF(&value, ""); + EXPECT_EQ("Hello", value); + + std::wstring valuew(L"Hello"); + StringAppendF(&valuew, L""); + EXPECT_EQ(L"Hello", valuew); +} + +TEST(StringUtilTest, StringAppendfEmptyString) { + std::string value("Hello"); + StringAppendF(&value, "%s", ""); + EXPECT_EQ("Hello", value); + + std::wstring valuew(L"Hello"); + StringAppendF(&valuew, L"%s", L""); + EXPECT_EQ(L"Hello", valuew); +} + +TEST(StringUtilTest, StringAppendfString) { + std::string value("Hello"); + StringAppendF(&value, " %s", "World"); + EXPECT_EQ("Hello World", value); + + std::wstring valuew(L"Hello"); + StringAppendF(&valuew, L" %s", L"World"); + EXPECT_EQ(L"Hello World", valuew); +} + +TEST(StringUtilTest, StringAppendfInt) { + std::string value("Hello"); + StringAppendF(&value, " %d", 123); + EXPECT_EQ("Hello 123", value); + + std::wstring valuew(L"Hello"); + StringAppendF(&valuew, L" %d", 123); + EXPECT_EQ(L"Hello 123", valuew); +} + +// Make sure that lengths exactly around the initial buffer size are handled +// correctly. +TEST(StringUtilTest, StringPrintfBounds) { + const int src_len = 1026; + char src[src_len]; + for (int i = 0; i < arraysize(src); i++) + src[i] = 'A'; + + wchar_t srcw[src_len]; + for (int i = 0; i < arraysize(srcw); i++) + srcw[i] = 'A'; + + for (int i = 1; i < 3; i++) { + src[src_len - i] = 0; + std::string out; + SStringPrintf(&out, "%s", src); + EXPECT_STREQ(src, out.c_str()); + + srcw[src_len - i] = 0; + std::wstring outw; + SStringPrintf(&outw, L"%s", srcw); + EXPECT_STREQ(srcw, outw.c_str()); + } +} + +// Test very large sprintfs that will cause the buffer to grow. +TEST(StringUtilTest, Grow) { + char src[1026]; + for (int i = 0; i < arraysize(src); i++) + src[i] = 'A'; + src[1025] = 0; + + char* fmt = "%sB%sB%sB%sB%sB%sB%s"; + + std::string out; + SStringPrintf(&out, fmt, src, src, src, src, src, src, src); + + char* ref = new char[320000]; + sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src); + + EXPECT_STREQ(ref, out.c_str()); + delete ref; +} + +// Test the boundary condition for the size of the string_util's +// internal buffer. +TEST(StringUtilTest, GrowBoundary) { + const int string_util_buf_len = 1024; + // Our buffer should be one larger than the size of StringAppendVT's stack + // buffer. + const int buf_len = string_util_buf_len + 1; + char src[buf_len + 1]; // Need extra one for NULL-terminator. + for (int i = 0; i < buf_len; ++i) + src[i] = 'a'; + src[buf_len] = 0; + + std::string out; + SStringPrintf(&out, "%s", src); + + EXPECT_STREQ(src, out.c_str()); +} + +// sprintf in Visual Studio fails when given U+FFFF. This tests that the +// failure case is gracefuly handled. +TEST(StringUtilTest, Invalid) { + wchar_t invalid[2]; + invalid[0] = 0xffff; + invalid[1] = 0; + + std::wstring out; + SStringPrintf(&out, L"%s", invalid); + EXPECT_STREQ(L"", out.c_str()); +} + +// Test for SplitString +TEST(StringUtilTest, SplitString) { + std::vector<std::wstring> r; + + SplitString(L"a,b,c", L',', &r); + EXPECT_EQ(r.size(), 3); + EXPECT_EQ(r[0], L"a"); + EXPECT_EQ(r[1], L"b"); + EXPECT_EQ(r[2], L"c"); + r.clear(); + + SplitString(L"a, b, c", L',', &r); + EXPECT_EQ(r.size(), 3); + EXPECT_EQ(r[0], L"a"); + EXPECT_EQ(r[1], L"b"); + EXPECT_EQ(r[2], L"c"); + r.clear(); + + SplitString(L"a,,c", L',', &r); + EXPECT_EQ(r.size(), 3); + EXPECT_EQ(r[0], L"a"); + EXPECT_EQ(r[1], L""); + EXPECT_EQ(r[2], L"c"); + r.clear(); + + SplitString(L"", L'*', &r); + EXPECT_EQ(r.size(), 1); + EXPECT_EQ(r[0], L""); + r.clear(); + + SplitString(L"foo", L'*', &r); + EXPECT_EQ(r.size(), 1); + EXPECT_EQ(r[0], L"foo"); + r.clear(); + + SplitString(L"foo ,", L',', &r); + EXPECT_EQ(r.size(), 2); + EXPECT_EQ(r[0], L"foo"); + EXPECT_EQ(r[1], L""); + r.clear(); + + SplitString(L",", L',', &r); + EXPECT_EQ(r.size(), 2); + EXPECT_EQ(r[0], L""); + EXPECT_EQ(r[1], L""); + r.clear(); + + SplitString(L"\t\ta\t", L'\t', &r); + EXPECT_EQ(r.size(), 4); + EXPECT_EQ(r[0], L""); + EXPECT_EQ(r[1], L""); + EXPECT_EQ(r[2], L"a"); + EXPECT_EQ(r[3], L""); + r.clear(); + + SplitStringDontTrim(L"\t\ta\t", L'\t', &r); + EXPECT_EQ(r.size(), 4); + EXPECT_EQ(r[0], L""); + EXPECT_EQ(r[1], L""); + EXPECT_EQ(r[2], L"a"); + EXPECT_EQ(r[3], L""); + r.clear(); + + SplitString(L"\ta\t\nb\tcc", L'\n', &r); + EXPECT_EQ(r.size(), 2); + EXPECT_EQ(r[0], L"a"); + EXPECT_EQ(r[1], L"b\tcc"); + r.clear(); + + SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r); + EXPECT_EQ(r.size(), 2); + EXPECT_EQ(r[0], L"\ta\t"); + EXPECT_EQ(r[1], L"b\tcc"); + r.clear(); +} + +TEST(StringUtilTest, StartsWith) { + EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", true)); + EXPECT_EQ(true, StartsWithASCII("javascript:url", "javascript", false)); + EXPECT_EQ(true, StartsWithASCII("JavaScript:url", "javascript", false)); + EXPECT_EQ(false, StartsWithASCII("java", "javascript", true)); + EXPECT_EQ(false, StartsWithASCII("java", "javascript", false)); +} + +TEST(StringUtilTest, GetStringFWithOffsets) { + std::vector<size_t> offsets; + + ReplaceStringPlaceholders(L"Hello, $1. Your number is $2.", L"1", L"2", + &offsets); + EXPECT_EQ(2, offsets.size()); + EXPECT_EQ(7, offsets[0]); + EXPECT_EQ(25, offsets[1]); + offsets.clear(); + + ReplaceStringPlaceholders(L"Hello, $2. Your number is $1.", L"1", L"2", + &offsets); + EXPECT_EQ(2, offsets.size()); + EXPECT_EQ(25, offsets[0]); + EXPECT_EQ(7, offsets[1]); + offsets.clear(); +} + +TEST(StringUtilTest, SplitStringAlongWhitespace) { + struct TestData { + const std::wstring input; + const int expected_result_count; + const std::wstring output1; + const std::wstring output2; + } data[] = { + { L"a", 1, L"a", L"" }, + { L" ", 0, L"", L"" }, + { L" a", 1, L"a", L"" }, + { L" ab ", 1, L"ab", L"" }, + { L" ab c", 2, L"ab", L"c" }, + { L" ab c ", 2, L"ab", L"c" }, + { L" ab cd", 2, L"ab", L"cd" }, + { L" ab cd ", 2, L"ab", L"cd" }, + { L" \ta\t", 1, L"a", L"" }, + { L" b\ta\t", 2, L"b", L"a" }, + { L" b\tat", 2, L"b", L"at" }, + { L"b\tat", 2, L"b", L"at" }, + { L"b\t at", 2, L"b", L"at" }, + }; + for (size_t i = 0; i < arraysize(data); ++i) { + std::vector<std::wstring> results; + SplitStringAlongWhitespace(data[i].input, &results); + ASSERT_EQ(data[i].expected_result_count, results.size()); + if (data[i].expected_result_count > 0) + ASSERT_EQ(data[i].output1, results[0]); + if (data[i].expected_result_count > 1) + ASSERT_EQ(data[i].output2, results[1]); + } +} + +TEST(StringUtilTest, MatchPatternTest) { + EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true); + EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true); + EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false); + EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true); + EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false); + EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false); + EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true); + EXPECT_EQ(MatchPattern(L"", L"*.*"), false); + EXPECT_EQ(MatchPattern(L"", L"*"), true); + EXPECT_EQ(MatchPattern(L"", L"?"), true); + EXPECT_EQ(MatchPattern(L"", L""), true); + EXPECT_EQ(MatchPattern(L"Hello", L""), false); + EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true); + EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string +} + + |