diff options
author | jhawkins@chromium.org <jhawkins@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-04 20:27:25 +0000 |
---|---|---|
committer | jhawkins@chromium.org <jhawkins@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-04 20:27:25 +0000 |
commit | 6c17851db5843c959c073b606f474434cddf5c19 (patch) | |
tree | ca18cefae8e06d37b648dd36a28e3dd2ecddfc5a /base | |
parent | 5ed7c01ee895b37c12252bc63d769af26f9093ec (diff) | |
download | chromium_src-6c17851db5843c959c073b606f474434cddf5c19.zip chromium_src-6c17851db5843c959c073b606f474434cddf5c19.tar.gz chromium_src-6c17851db5843c959c073b606f474434cddf5c19.tar.bz2 |
Implement ContactInfo, the FormGroup that stores contact information.
BUG=none
TEST=StringUtilTest.Tokenizer
Review URL: http://codereview.chromium.org/502103
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@35475 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/string_util.cc | 54 | ||||
-rw-r--r-- | base/string_util.h | 15 | ||||
-rw-r--r-- | base/string_util_unittest.cc | 181 |
3 files changed, 197 insertions, 53 deletions
diff --git a/base/string_util.cc b/base/string_util.cc index b38e182..abbfd57 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -293,7 +293,6 @@ namespace base { bool IsWprintfFormatPortable(const wchar_t* format) { for (const wchar_t* position = format; *position != '\0'; ++position) { - if (*position == '%') { bool in_specification = true; bool modifier_l = false; @@ -322,7 +321,6 @@ bool IsWprintfFormatPortable(const wchar_t* format) { } } } - } return true; @@ -816,9 +814,9 @@ bool StartsWithASCII(const std::string& str, template <typename STR> bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { - if (case_sensitive) + if (case_sensitive) { return str.compare(0, search.length(), search) == 0; - else { + } else { if (search.size() > str.size()) return false; return std::equal(search.begin(), search.end(), str.begin(), @@ -1105,7 +1103,6 @@ namespace { template <typename STR, typename INT, typename UINT, bool NEG> struct IntToStringT { - // This is to avoid a compiler warning about unary minus on unsigned type. // For example, say you had the following code: // template <typename INT> @@ -1349,6 +1346,47 @@ void SplitStringDontTrim(const std::string& str, } template<typename STR> +static size_t TokenizeT(const STR& str, + const STR& delimiters, + std::vector<STR>* tokens) { + tokens->clear(); + + typename STR::size_type start = str.find_first_not_of(delimiters); + while (start != STR::npos) { + typename STR::size_type end = str.find_first_of(delimiters, start + 1); + if (end == STR::npos) { + tokens->push_back(str.substr(start)); + break; + } else { + tokens->push_back(str.substr(start, end - start)); + start = str.find_first_not_of(delimiters, end + 1); + } + } + + return tokens->size(); +} + +size_t Tokenize(const std::wstring& str, + const std::wstring& delimiters, + std::vector<std::wstring>* tokens) { + return TokenizeT(str, delimiters, tokens); +} + +#if !defined(WCHAR_T_IS_UTF16) +size_t Tokenize(const string16& str, + const string16& delimiters, + std::vector<string16>* tokens) { + return TokenizeT(str, delimiters, tokens); +} +#endif + +size_t Tokenize(const std::string& str, + const std::string& delimiters, + std::vector<std::string>* tokens) { + return TokenizeT(str, delimiters, tokens); +} + +template<typename STR> static STR JoinStringT(const std::vector<STR>& parts, typename STR::value_type sep) { if (parts.size() == 0) return STR(); @@ -1388,7 +1426,7 @@ void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { bool last_was_ws = false; size_t last_non_ws_start = 0; for (size_t i = 0; i < length; ++i) { - switch(str[i]) { + switch (str[i]) { // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. case L' ': case L'\t': @@ -1559,7 +1597,7 @@ static void EatSameChars(const CHAR** pattern, const CHAR** string) { template <class CHAR> static void EatWildcard(const CHAR** pattern) { - while(**pattern) { + while (**pattern) { if (!IsWildcard(**pattern)) return; (*pattern)++; diff --git a/base/string_util.h b/base/string_util.h index 723242e..f0fa6da 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // @@ -547,6 +547,19 @@ void SplitStringDontTrim(const std::string& str, char s, std::vector<std::string>* r); +// Splits a string into its fields delimited by any of the characters in +// |delimiters|. Each field is added to the |tokens| vector. Returns the +// number of tokens found. +size_t Tokenize(const std::wstring& str, + const std::wstring& delimiters, + std::vector<std::wstring>* tokens); +size_t Tokenize(const string16& str, + const string16& delimiters, + std::vector<string16>* tokens); +size_t Tokenize(const std::string& str, + const std::string& delimiters, + std::vector<std::string>* tokens); + // Does the opposite of SplitString(). std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t s); string16 JoinString(const std::vector<string16>& parts, char16 s); diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc index 76dafa3..7e3062a 100644 --- a/base/string_util_unittest.cc +++ b/base/string_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -172,7 +172,7 @@ TEST(StringUtilTest, IsStringUTF8) { EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); - EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM + EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM // surrogate code points EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); @@ -180,22 +180,22 @@ TEST(StringUtilTest, IsStringUTF8) { EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); // overlong sequences - EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 - EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" - EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 - EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 - EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff - EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D - EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 - EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 - EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) - EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F - EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 + EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 + EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" + EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 + EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 + EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff + EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D + EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 + EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 + EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) + EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F + EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 // Beyond U+10FFFF (the upper limit of Unicode codespace) - EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 - EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes - EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes + EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 + EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes + EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) EXPECT_FALSE(IsStringUTF8("\xfe\xff")); @@ -204,23 +204,23 @@ TEST(StringUtilTest, IsStringUTF8) { EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> - EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) - EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE - EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF + EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) + EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE + EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF // This should also be false, but currently we pass them through. // Disable them for now. #if 0 - EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 - EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF + EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 + EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF #endif // Strings in legacy encodings. We can certainly make up strings // in a legacy encoding that are valid in UTF-8, but in real data, // most of them are invalid as UTF-8. - EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 - EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR - EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 + EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 + EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR + EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 // "abc" with U+201[CD] in windows-125[0-8] EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); // U+0639 U+064E U+0644 U+064E in ISO-8859-6 @@ -446,7 +446,6 @@ struct IntToStringTest { } TEST(StringUtilTest, IntToString) { - static const IntToStringTest<int> int_tests[] = { { 0, "0", "0" }, { -1, "-1", "4294967295" }, @@ -869,22 +868,22 @@ TEST(StringUtilTest, StringAppendfInt) { // Make sure that lengths exactly around the initial buffer size are handled // correctly. TEST(StringUtilTest, StringPrintfBounds) { - const int src_len = 1026; - char src[src_len]; + const int kSrcLen = 1026; + char src[kSrcLen]; for (size_t i = 0; i < arraysize(src); i++) src[i] = 'A'; - wchar_t srcw[src_len]; + wchar_t srcw[kSrcLen]; for (size_t i = 0; i < arraysize(srcw); i++) srcw[i] = 'A'; for (int i = 1; i < 3; i++) { - src[src_len - i] = 0; + src[kSrcLen - i] = 0; std::string out; SStringPrintf(&out, "%s", src); EXPECT_STREQ(src, out.c_str()); - srcw[src_len - i] = 0; + srcw[kSrcLen - i] = 0; std::wstring outw; SStringPrintf(&outw, L"%ls", srcw); EXPECT_STREQ(srcw, outw.c_str()); @@ -903,11 +902,12 @@ TEST(StringUtilTest, Grow) { std::string out; SStringPrintf(&out, fmt, src, src, src, src, src, src, src); - char* ref = new char[320000]; + const int kRefSize = 320000; + char* ref = new char[kRefSize]; #if defined(OS_WIN) - sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src); + sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src); #elif defined(OS_POSIX) - snprintf(ref, 320000, fmt, src, src, src, src, src, src, src); + snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src); #endif EXPECT_STREQ(ref, out.c_str()); @@ -971,50 +971,50 @@ TEST(StringUtilTest, SplitString) { std::vector<std::wstring> r; SplitString(L"a,b,c", L',', &r); - EXPECT_EQ(3U, r.size()); + ASSERT_EQ(3U, r.size()); EXPECT_EQ(r[0], L"a"); EXPECT_EQ(r[1], L"b"); EXPECT_EQ(r[2], L"c"); r.clear(); SplitString(L"a, b, c", L',', &r); - EXPECT_EQ(3U, r.size()); + ASSERT_EQ(3U, r.size()); EXPECT_EQ(r[0], L"a"); EXPECT_EQ(r[1], L"b"); EXPECT_EQ(r[2], L"c"); r.clear(); SplitString(L"a,,c", L',', &r); - EXPECT_EQ(3U, r.size()); + ASSERT_EQ(3U, r.size()); EXPECT_EQ(r[0], L"a"); EXPECT_EQ(r[1], L""); EXPECT_EQ(r[2], L"c"); r.clear(); SplitString(L"", L'*', &r); - EXPECT_EQ(1U, r.size()); + ASSERT_EQ(1U, r.size()); EXPECT_EQ(r[0], L""); r.clear(); SplitString(L"foo", L'*', &r); - EXPECT_EQ(1U, r.size()); + ASSERT_EQ(1U, r.size()); EXPECT_EQ(r[0], L"foo"); r.clear(); SplitString(L"foo ,", L',', &r); - EXPECT_EQ(2U, r.size()); + ASSERT_EQ(2U, r.size()); EXPECT_EQ(r[0], L"foo"); EXPECT_EQ(r[1], L""); r.clear(); SplitString(L",", L',', &r); - EXPECT_EQ(2U, r.size()); + ASSERT_EQ(2U, r.size()); EXPECT_EQ(r[0], L""); EXPECT_EQ(r[1], L""); r.clear(); SplitString(L"\t\ta\t", L'\t', &r); - EXPECT_EQ(4U, r.size()); + ASSERT_EQ(4U, r.size()); EXPECT_EQ(r[0], L""); EXPECT_EQ(r[1], L""); EXPECT_EQ(r[2], L"a"); @@ -1022,7 +1022,7 @@ TEST(StringUtilTest, SplitString) { r.clear(); SplitStringDontTrim(L"\t\ta\t", L'\t', &r); - EXPECT_EQ(4U, r.size()); + ASSERT_EQ(4U, r.size()); EXPECT_EQ(r[0], L""); EXPECT_EQ(r[1], L""); EXPECT_EQ(r[2], L"a"); @@ -1030,18 +1030,111 @@ TEST(StringUtilTest, SplitString) { r.clear(); SplitString(L"\ta\t\nb\tcc", L'\n', &r); - EXPECT_EQ(2U, r.size()); + ASSERT_EQ(2U, r.size()); EXPECT_EQ(r[0], L"a"); EXPECT_EQ(r[1], L"b\tcc"); r.clear(); SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r); - EXPECT_EQ(2U, r.size()); + ASSERT_EQ(2U, r.size()); EXPECT_EQ(r[0], L"\ta\t"); EXPECT_EQ(r[1], L"b\tcc"); r.clear(); } +// Test for Tokenize +TEST(StringUtilTest, Tokenize) { + std::vector<std::string> r; + size_t size; + + size = Tokenize("This is a string", " ", &r); + EXPECT_EQ(4U, size); + ASSERT_EQ(4U, r.size()); + EXPECT_EQ(r[0], "This"); + EXPECT_EQ(r[1], "is"); + EXPECT_EQ(r[2], "a"); + EXPECT_EQ(r[3], "string"); + r.clear(); + + size = Tokenize("one,two,three", ",", &r); + EXPECT_EQ(3U, size); + ASSERT_EQ(3U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], "two"); + EXPECT_EQ(r[2], "three"); + r.clear(); + + size = Tokenize("one,two:three;four", ",:", &r); + EXPECT_EQ(3U, size); + ASSERT_EQ(3U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], "two"); + EXPECT_EQ(r[2], "three;four"); + r.clear(); + + size = Tokenize("one,two:three;four", ";,:", &r); + EXPECT_EQ(4U, size); + ASSERT_EQ(4U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], "two"); + EXPECT_EQ(r[2], "three"); + EXPECT_EQ(r[3], "four"); + r.clear(); + + size = Tokenize("one, two, three", ",", &r); + EXPECT_EQ(3U, size); + ASSERT_EQ(3U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], " two"); + EXPECT_EQ(r[2], " three"); + r.clear(); + + size = Tokenize("one, two, three, ", ",", &r); + EXPECT_EQ(4U, size); + ASSERT_EQ(4U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], " two"); + EXPECT_EQ(r[2], " three"); + EXPECT_EQ(r[3], " "); + r.clear(); + + size = Tokenize("one, two, three,", ",", &r); + EXPECT_EQ(3U, size); + ASSERT_EQ(3U, r.size()); + EXPECT_EQ(r[0], "one"); + EXPECT_EQ(r[1], " two"); + EXPECT_EQ(r[2], " three"); + r.clear(); + + size = Tokenize("", ",", &r); + EXPECT_EQ(0U, size); + ASSERT_EQ(0U, r.size()); + r.clear(); + + size = Tokenize(",", ",", &r); + EXPECT_EQ(0U, size); + ASSERT_EQ(0U, r.size()); + r.clear(); + + size = Tokenize(",;:.", ".:;,", &r); + EXPECT_EQ(0U, size); + ASSERT_EQ(0U, r.size()); + r.clear(); + + size = Tokenize("\t\ta\t", "\t", &r); + EXPECT_EQ(1U, size); + ASSERT_EQ(1U, r.size()); + EXPECT_EQ(r[0], "a"); + r.clear(); + + size = Tokenize("\ta\t\nb\tcc", "\n", &r); + EXPECT_EQ(2U, size); + ASSERT_EQ(2U, r.size()); + EXPECT_EQ(r[0], "\ta\t"); + EXPECT_EQ(r[1], "b\tcc"); + r.clear(); +} + // Test for JoinString TEST(StringUtilTest, JoinString) { std::vector<std::string> in; |