From c5a7248b45656b1ca63f9ce35b60ca337f83d3d1 Mon Sep 17 00:00:00 2001 From: "tony@chromium.org" Date: Thu, 3 Dec 2009 23:32:57 +0000 Subject: Fixes to the string MatchPattern functions: 1) Make it explicit that it only supports ASCII (since it iterates character by character). 2) Limit the recursion to 16 levels. We could allow more, but in the case of a ?, it has exponential complexity, so I figured 16 was a good stopping point. It seems rare that someone would have more than 16 '?' and '*'s. BUG=28645 Review URL: http://codereview.chromium.org/460047 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33748 0039d316-1c4b-4281-b951-d872f2087c98 --- base/string_util.cc | 21 +++++++++++++-------- base/string_util.h | 7 +++---- base/string_util_unittest.cc | 30 ++++++++++++++++-------------- 3 files changed, 32 insertions(+), 26 deletions(-) (limited to 'base') diff --git a/base/string_util.cc b/base/string_util.cc index 01a6907..9db0f03 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -1561,7 +1561,11 @@ static void EatWildcard(const CHAR** pattern) { } template -static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) { +static bool MatchPatternT(const CHAR* eval, const CHAR* pattern, int depth) { + const int kMaxDepth = 16; + if (depth > kMaxDepth) + return false; + // Eat all the matching chars. EatSameChars(&pattern, &eval); @@ -1581,8 +1585,8 @@ static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) { // If this is a question mark, then we need to compare the rest with // the current string or the string with one character eaten. if (pattern[0] == '?') { - if (MatchPatternT(eval, pattern + 1) || - MatchPatternT(eval + 1, pattern + 1)) + if (MatchPatternT(eval, pattern + 1, depth + 1) || + MatchPatternT(eval + 1, pattern + 1, depth + 1)) return true; } @@ -1590,7 +1594,7 @@ static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) { // of the pattern. if (pattern[0] == '*') { while (*eval) { - if (MatchPatternT(eval, pattern + 1)) + if (MatchPatternT(eval, pattern + 1, depth + 1)) return true; eval++; } @@ -1608,12 +1612,13 @@ static bool MatchPatternT(const CHAR* eval, const CHAR* pattern) { return false; } -bool MatchPattern(const std::wstring& eval, const std::wstring& pattern) { - return MatchPatternT(eval.c_str(), pattern.c_str()); +bool MatchPatternWide(const std::wstring& eval, const std::wstring& pattern) { + return MatchPatternT(eval.c_str(), pattern.c_str(), 0); } -bool MatchPattern(const std::string& eval, const std::string& pattern) { - return MatchPatternT(eval.c_str(), pattern.c_str()); +bool MatchPatternASCII(const std::string& eval, const std::string& pattern) { + DCHECK(IsStringASCII(eval) && IsStringASCII(pattern)); + return MatchPatternT(eval.c_str(), pattern.c_str(), 0); } bool StringToInt(const std::string& input, int* output) { diff --git a/base/string_util.h b/base/string_util.h index dec39a6..66518e9 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -590,11 +590,10 @@ bool ElideString(const std::wstring& input, int max_len, std::wstring* output); // Returns true if the string passed in matches the pattern. The pattern // string can contain wildcards like * and ? -// TODO(iyengar) This function may not work correctly for CJK strings as -// it does individual character matches. // The backslash character (\) is an escape character for * and ? -bool MatchPattern(const std::wstring& string, const std::wstring& pattern); -bool MatchPattern(const std::string& string, const std::string& pattern); +// We limit the patterns to having a max of 16 * or ? characters. +bool MatchPatternWide(const std::wstring& string, const std::wstring& pattern); +bool MatchPatternASCII(const std::string& string, const std::string& pattern); // Returns a hex string representation of a binary buffer. // The returned hex string will be in upper case. diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc index c586ff4..76dafa3 100644 --- a/base/string_util_unittest.cc +++ b/base/string_util_unittest.cc @@ -1212,20 +1212,22 @@ TEST(StringUtilTest, SplitStringAlongWhitespace) { } TEST(StringUtilTest, MatchPatternTest) { - EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true); - EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true); - EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false); - EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true); - EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false); - EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false); - EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true); - EXPECT_EQ(MatchPattern(L"", L"*.*"), false); - EXPECT_EQ(MatchPattern(L"", L"*"), true); - EXPECT_EQ(MatchPattern(L"", L"?"), true); - EXPECT_EQ(MatchPattern(L"", L""), true); - EXPECT_EQ(MatchPattern(L"Hello", L""), false); - EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true); - EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string + EXPECT_EQ(MatchPatternASCII("www.google.com", "*.com"), true); + EXPECT_EQ(MatchPatternASCII("www.google.com", "*"), true); + EXPECT_EQ(MatchPatternASCII("www.google.com", "www*.g*.org"), false); + EXPECT_EQ(MatchPatternASCII("Hello", "H?l?o"), true); + EXPECT_EQ(MatchPatternASCII("www.google.com", "http://*)"), false); + EXPECT_EQ(MatchPatternASCII("www.msn.com", "*.COM"), false); + EXPECT_EQ(MatchPatternASCII("Hello*1234", "He??o\\*1*"), true); + EXPECT_EQ(MatchPatternASCII("", "*.*"), false); + EXPECT_EQ(MatchPatternASCII("", "*"), true); + EXPECT_EQ(MatchPatternASCII("", "?"), true); + EXPECT_EQ(MatchPatternASCII("", ""), true); + EXPECT_EQ(MatchPatternASCII("Hello", ""), false); + EXPECT_EQ(MatchPatternASCII("Hello*", "Hello*"), true); + // Stop after a certain recursion depth. + EXPECT_EQ(MatchPatternASCII("12345678901234567890", "???????????????????*"), + false); } TEST(StringUtilTest, LcpyTest) { -- cgit v1.1