summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/browser/history')
-rw-r--r--chrome/browser/history/query_parser.cc35
-rw-r--r--chrome/browser/history/query_parser.h9
2 files changed, 23 insertions, 21 deletions
diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc
index 39754ce..f3c4063 100644
--- a/chrome/browser/history/query_parser.cc
+++ b/chrome/browser/history/query_parser.cc
@@ -56,25 +56,6 @@ void CoalseAndSortMatchPositions(Snippet::MatchPositions* matches) {
CoalesceMatchesFrom(i, matches);
}
-// For CJK ideographs and Korean Hangul, even a single character
-// can be useful in prefix matching, but that may give us too many
-// false positives. Moreover, the current ICU word breaker gives us
-// back every single Chinese character as a word so that there's no
-// point doing anything for them and we only adjust the minimum length
-// to 2 for Korean Hangul while using 3 for others. This is a temporary
-// hack until we have a segmentation support.
-inline bool IsWordLongEnoughForPrefixSearch(const std::wstring& word)
-{
- DCHECK(word.size() > 0);
- size_t minimum_length = 3;
- // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
- // because they 'behave like' Latin letters. Moreover, we should
- // normalize the former before reaching here.
- if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
- minimum_length = 2;
- return word.size() >= minimum_length;
-}
-
} // namespace
// Inheritance structure:
@@ -119,7 +100,7 @@ bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words,
}
bool QueryNodeWord::Matches(const std::wstring& word, bool exact) const {
- if (exact || !IsWordLongEnoughForPrefixSearch(word_))
+ if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
return word == word_;
return word.size() >= word_.size() &&
(word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
@@ -133,7 +114,7 @@ int QueryNodeWord::AppendToSQLiteQuery(std::wstring* query) const {
query->append(word_);
// Use prefix search if we're not literal and long enough.
- if (!literal_ && IsWordLongEnoughForPrefixSearch(word_))
+ if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
*query += L'*';
return 1;
}
@@ -260,6 +241,18 @@ bool QueryNodePhrase::HasMatchIn(
QueryParser::QueryParser() {
}
+// static
+bool QueryParser::IsWordLongEnoughForPrefixSearch(const std::wstring& word) {
+ DCHECK(word.size() > 0);
+ size_t minimum_length = 3;
+ // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
+ // because they 'behave like' Latin letters. Moreover, we should
+ // normalize the former before reaching here.
+ if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
+ minimum_length = 2;
+ return word.size() >= minimum_length;
+}
+
// Returns true if the character is considered a quote.
static bool IsQueryQuote(wchar_t ch) {
return ch == '"' ||
diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h
index 1961d6e..a29548a8 100644
--- a/chrome/browser/history/query_parser.h
+++ b/chrome/browser/history/query_parser.h
@@ -58,6 +58,15 @@ class QueryParser {
public:
QueryParser();
+ // For CJK ideographs and Korean Hangul, even a single character
+ // can be useful in prefix matching, but that may give us too many
+ // false positives. Moreover, the current ICU word breaker gives us
+ // back every single Chinese character as a word so that there's no
+ // point doing anything for them and we only adjust the minimum length
+ // to 2 for Korean Hangul while using 3 for others. This is a temporary
+ // hack until we have a segmentation support.
+ static bool IsWordLongEnoughForPrefixSearch(const std::wstring& word);
+
// Parse a query into a SQLite query. The resulting query is placed in
// sqlite_query and the number of words is returned.
int ParseQuery(const std::wstring& query,