diff options
Diffstat (limited to 'chrome/browser/history')
-rw-r--r-- | chrome/browser/history/query_parser.cc | 35 | ||||
-rw-r--r-- | chrome/browser/history/query_parser.h | 9 |
2 files changed, 23 insertions, 21 deletions
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index 39754ce..f3c4063 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -56,25 +56,6 @@ void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) { CoalesceMatchesFrom(i, matches); } -// For CJK ideographs and Korean Hangul, even a single character -// can be useful in prefix matching, but that may give us too many -// false positives. Moreover, the current ICU word breaker gives us -// back every single Chinese character as a word so that there's no -// point doing anything for them and we only adjust the minimum length -// to 2 for Korean Hangul while using 3 for others. This is a temporary -// hack until we have a segmentation support. -inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word) -{ - DCHECK(word.size() > 0); - size_t minimum_length = 3; - // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) - // because they 'behave like' Latin letters. Moreover, we should - // normalize the former before reaching here. - if (0xAC00 <= word[0] && word[0] <= 0xD7A3) - minimum_length = 2; - return word.size() >= minimum_length; -} - } // namespace // Inheritance structure: @@ -119,7 +100,7 @@ bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words, } bool QueryNodeWord::Matches(const std::wstring& word, bool exact) const { - if (exact || !IsWordLongEnoughForPrefixSearch(word_)) + if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) return word == word_; return word.size() >= word_.size() && (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); @@ -133,7 +114,7 @@ int QueryNodeWord::AppendToSQLiteQuery(std::wstring* query) const { query->append(word_); // Use prefix search if we're not literal and long enough. - if (!literal_ && IsWordLongEnoughForPrefixSearch(word_)) + if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) *query += L'*'; return 1; } @@ -260,6 +241,18 @@ bool QueryNodePhrase::HasMatchIn( QueryParser::QueryParser() { } +// static +bool QueryParser::IsWordLongEnoughForPrefixSearch(const std::wstring& word) { + DCHECK(word.size() > 0); + size_t minimum_length = 3; + // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) + // because they 'behave like' Latin letters. Moreover, we should + // normalize the former before reaching here. + if (0xAC00 <= word[0] && word[0] <= 0xD7A3) + minimum_length = 2; + return word.size() >= minimum_length; +} + // Returns true if the character is considered a quote. static bool IsQueryQuote(wchar_t ch) { return ch == '"' || diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h index 1961d6e..a29548a8 100644 --- a/chrome/browser/history/query_parser.h +++ b/chrome/browser/history/query_parser.h @@ -58,6 +58,15 @@ class QueryParser { public: QueryParser(); + // For CJK ideographs and Korean Hangul, even a single character + // can be useful in prefix matching, but that may give us too many + // false positives. Moreover, the current ICU word breaker gives us + // back every single Chinese character as a word so that there's no + // point doing anything for them and we only adjust the minimum length + // to 2 for Korean Hangul while using 3 for others. This is a temporary + // hack until we have a segmentation support. + static bool IsWordLongEnoughForPrefixSearch(const std::wstring& word); + // Parse a query into a SQLite query. The resulting query is placed in // sqlite_query and the number of words is returned. int ParseQuery(const std::wstring& query, |