summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.cc4
-rw-r--r--chrome/browser/autocomplete/history_quick_provider_unittest.cc104
-rw-r--r--chrome/browser/history/in_memory_url_index.cc327
-rw-r--r--chrome/browser/history/in_memory_url_index.h78
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc408
-rw-r--r--chrome/test/data/History/url_history_provider_test.db.txt2
6 files changed, 519 insertions, 404 deletions
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc
index d678ae45..57c9d11 100644
--- a/chrome/browser/autocomplete/history_quick_provider.cc
+++ b/chrome/browser/autocomplete/history_quick_provider.cc
@@ -93,10 +93,6 @@ void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {}
void HistoryQuickProvider::DoAutocomplete() {
// Get the matching URLs from the DB.
string16 term_string = autocomplete_input_.text();
- // TODO(mrossetti): Temporary workaround for http://crbug.com/88498.
- // Just give up after 50 characters.
- if (term_string.size() > 50)
- return;
term_string = UnescapeURLComponent(term_string,
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
history::InMemoryURLIndex::String16Vector terms(
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
index 6143481..7a8be3c 100644
--- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
@@ -69,7 +69,7 @@ struct TestURLInfo {
{"http://abcxyzdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
{"http://xyzabcdefghijklmnopqrstuvw.com/a", "", 3, 1, 0},
{"http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice",
- "Dogs & Cats & Mice", 1, 1, 0},
+ "Dogs & Cats & Mice & Other Animals", 1, 1, 0},
};
class HistoryQuickProviderTest : public TestingBrowserProcessTest,
@@ -83,16 +83,20 @@ class HistoryQuickProviderTest : public TestingBrowserProcessTest,
virtual void OnProviderUpdate(bool updated_matches);
protected:
- void SetUp() {
- profile_.reset(new TestingProfile());
- profile_->CreateHistoryService(true, false);
- profile_->CreateBookmarkModel(true);
- profile_->BlockUntilBookmarkModelLoaded();
- history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
- EXPECT_TRUE(history_service_);
- provider_ = new HistoryQuickProvider(this, profile_.get());
- FillData();
- }
+ class SetShouldContain : public std::unary_function<const std::string&,
+ std::set<std::string> > {
+ public:
+ explicit SetShouldContain(const ACMatches& matched_urls);
+
+ void operator()(const std::string& expected);
+
+ std::set<std::string> LeftOvers() const { return matches_; }
+
+ private:
+ std::set<std::string> matches_;
+ };
+
+ void SetUp();
void TearDown() {
provider_ = NULL;
@@ -121,6 +125,17 @@ class HistoryQuickProviderTest : public TestingBrowserProcessTest,
scoped_refptr<HistoryQuickProvider> provider_;
};
+void HistoryQuickProviderTest::SetUp() {
+ profile_.reset(new TestingProfile());
+ profile_->CreateHistoryService(true, false);
+ profile_->CreateBookmarkModel(true);
+ profile_->BlockUntilBookmarkModelLoaded();
+ history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
+ EXPECT_TRUE(history_service_);
+ provider_ = new HistoryQuickProvider(this, profile_.get());
+ FillData();
+}
+
void HistoryQuickProviderTest::OnProviderUpdate(bool updated_matches) {
MessageLoop::current()->Quit();
}
@@ -155,29 +170,24 @@ void HistoryQuickProviderTest::FillData() {
provider_->SetIndexForTesting(index);
}
-class SetShouldContain : public std::unary_function<const std::string&,
- std::set<std::string> > {
- public:
- explicit SetShouldContain(const ACMatches& matched_urls) {
- for (ACMatches::const_iterator iter = matched_urls.begin();
- iter != matched_urls.end(); ++iter)
- matches_.insert(iter->destination_url.spec());
- }
-
- void operator()(const std::string& expected) {
- EXPECT_EQ(1U, matches_.erase(expected)) << "Results did not contain '"
- << expected << "' but should have.";
- }
+HistoryQuickProviderTest::SetShouldContain::SetShouldContain(
+ const ACMatches& matched_urls) {
+ for (ACMatches::const_iterator iter = matched_urls.begin();
+ iter != matched_urls.end(); ++iter)
+ matches_.insert(iter->destination_url.spec());
+}
- std::set<std::string> LeftOvers() const { return matches_; }
+void HistoryQuickProviderTest::SetShouldContain::operator()(
+ const std::string& expected) {
+ EXPECT_EQ(1U, matches_.erase(expected))
+ << "Results did not contain '" << expected << "' but should have.";
+}
- private:
- std::set<std::string> matches_;
-};
void HistoryQuickProviderTest::RunTest(const string16 text,
std::vector<std::string> expected_urls,
std::string expected_top_result) {
+ SCOPED_TRACE(text); // Minimal hint to query being run.
std::sort(expected_urls.begin(), expected_urls.end());
MessageLoop::current()->RunAllPending();
@@ -215,23 +225,28 @@ void HistoryQuickProviderTest::RunTest(const string16 text,
}
TEST_F(HistoryQuickProviderTest, SimpleSingleMatch) {
- string16 text(ASCIIToUTF16("slashdot"));
std::string expected_url("http://slashdot.org/favorite_page.html");
std::vector<std::string> expected_urls;
expected_urls.push_back(expected_url);
- RunTest(text, expected_urls, expected_url);
+ RunTest(ASCIIToUTF16("slashdot"), expected_urls, expected_url);
+}
+
+TEST_F(HistoryQuickProviderTest, MultiTermTitleMatch) {
+ std::string expected_url(
+ "http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice");
+ std::vector<std::string> expected_urls;
+ expected_urls.push_back(expected_url);
+ RunTest(ASCIIToUTF16("mice other animals"), expected_urls, expected_url);
}
TEST_F(HistoryQuickProviderTest, NonWordLastCharacterMatch) {
- string16 text(ASCIIToUTF16("slashdot.org/"));
std::string expected_url("http://slashdot.org/favorite_page.html");
std::vector<std::string> expected_urls;
expected_urls.push_back(expected_url);
- RunTest(text, expected_urls, expected_url);
+ RunTest(ASCIIToUTF16("slashdot.org/"), expected_urls, expected_url);
}
TEST_F(HistoryQuickProviderTest, MultiMatch) {
- string16 text(ASCIIToUTF16("foo"));
std::vector<std::string> expected_urls;
// Scores high because of typed_count.
expected_urls.push_back("http://foo.com/");
@@ -239,52 +254,51 @@ TEST_F(HistoryQuickProviderTest, MultiMatch) {
expected_urls.push_back("http://foo.com/dir/another/");
// Scores high because of high visit count.
expected_urls.push_back("http://foo.com/dir/another/again/myfile.html");
- RunTest(text, expected_urls, "http://foo.com/");
+ RunTest(ASCIIToUTF16("foo"), expected_urls, "http://foo.com/");
}
TEST_F(HistoryQuickProviderTest, StartRelativeMatch) {
- string16 text(ASCIIToUTF16("xyz"));
std::vector<std::string> expected_urls;
expected_urls.push_back("http://xyzabcdefghijklmnopqrstuvw.com/a");
expected_urls.push_back("http://abcxyzdefghijklmnopqrstuvw.com/a");
expected_urls.push_back("http://abcdefxyzghijklmnopqrstuvw.com/a");
- RunTest(text, expected_urls, "http://xyzabcdefghijklmnopqrstuvw.com/a");
+ RunTest(ASCIIToUTF16("xyz"), expected_urls,
+ "http://xyzabcdefghijklmnopqrstuvw.com/a");
}
TEST_F(HistoryQuickProviderTest, VisitCountMatches) {
- string16 text(ASCIIToUTF16("visitedest"));
std::vector<std::string> expected_urls;
expected_urls.push_back("http://visitedest.com/y/a");
expected_urls.push_back("http://visitedest.com/y/b");
expected_urls.push_back("http://visitedest.com/x/c");
- RunTest(text, expected_urls, "http://visitedest.com/y/a");
+ RunTest(ASCIIToUTF16("visitedest"), expected_urls,
+ "http://visitedest.com/y/a");
}
TEST_F(HistoryQuickProviderTest, TypedCountMatches) {
- string16 text(ASCIIToUTF16("typeredest"));
std::vector<std::string> expected_urls;
expected_urls.push_back("http://typeredest.com/y/a");
expected_urls.push_back("http://typeredest.com/y/b");
expected_urls.push_back("http://typeredest.com/x/c");
- RunTest(text, expected_urls, "http://typeredest.com/y/a");
+ RunTest(ASCIIToUTF16("typeredest"), expected_urls,
+ "http://typeredest.com/y/a");
}
TEST_F(HistoryQuickProviderTest, DaysAgoMatches) {
- string16 text(ASCIIToUTF16("daysagoest"));
std::vector<std::string> expected_urls;
expected_urls.push_back("http://daysagoest.com/y/a");
expected_urls.push_back("http://daysagoest.com/y/b");
expected_urls.push_back("http://daysagoest.com/x/c");
- RunTest(text, expected_urls, "http://daysagoest.com/y/a");
+ RunTest(ASCIIToUTF16("daysagoest"), expected_urls,
+ "http://daysagoest.com/y/a");
}
TEST_F(HistoryQuickProviderTest, EncodingLimitMatch) {
- string16 text(ASCIIToUTF16("ice"));
std::vector<std::string> expected_urls;
std::string url(
"http://cda.com/Dogs%20Cats%20Gorillas%20Sea%20Slugs%20and%20Mice");
expected_urls.push_back(url);
- RunTest(text, expected_urls, url);
+ RunTest(ASCIIToUTF16("ice"), expected_urls, url);
// Verify that the matches' ACMatchClassifications offsets are in range.
ACMatchClassifications content(ac_matches_[0].contents_class);
// The max offset accounts for 6 occurrences of '%20' plus the 'http://'.
@@ -293,7 +307,7 @@ TEST_F(HistoryQuickProviderTest, EncodingLimitMatch) {
citer != content.end(); ++citer)
EXPECT_LT(citer->offset, max_offset);
ACMatchClassifications description(ac_matches_[0].description_class);
- std::string page_title("Dogs & Cats & Mice");
+ std::string page_title("Dogs & Cats & Mice & Other Animals");
for (ACMatchClassifications::const_iterator diter = description.begin();
diter != description.end(); ++diter)
EXPECT_LT(diter->offset, page_title.length());
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 658dac3..37db2b4 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -76,31 +76,26 @@ bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
return m1.raw_score >= m2.raw_score;
}
-struct InMemoryURLIndex::TermCharWordSet {
- TermCharWordSet() // Required for STL resize().
- : char_(0),
- word_id_set_(),
- used_(false) {}
- TermCharWordSet(const char16& uni_char,
- const WordIDSet& word_id_set,
- bool used)
- : char_(uni_char),
- word_id_set_(word_id_set),
- used_(used) {}
-
- // Predicate for STL algorithm use.
- bool is_not_used() const { return !used_; }
-
- char16 char_;
- WordIDSet word_id_set_;
- bool used_; // true if this set has been used for the current term search.
-};
+InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
+ const WordIDSet& word_id_set,
+ const HistoryIDSet& history_id_set)
+ : word_id_set_(word_id_set),
+ history_id_set_(history_id_set),
+ used_(true) {}
+
+InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()
+ : used_(true) {}
// Comparison function for sorting TermMatches by their offsets.
bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
return m1.offset < m2.offset;
}
+// Comparison function for sorting search terms by descending length.
+bool LengthGreater(const string16& string_a, const string16& string_b) {
+ return string_a.length() > string_b.length();
+}
+
// std::accumulate helper function to add up TermMatches' lengths.
int AccumulateMatchLength(int total, const TermMatch& match) {
return total + match.length;
@@ -249,8 +244,8 @@ void InMemoryURLIndex::ClearPrivateData() {
word_map_.clear();
char_word_map_.clear();
word_id_history_map_.clear();
- term_char_word_set_cache_.clear();
history_info_map_.clear();
+ search_term_cache_.clear();
}
bool InMemoryURLIndex::RestoreFromCacheFile() {
@@ -349,7 +344,7 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
history_info_map_.erase(row_id);
}
// This invalidates the cache.
- term_char_word_set_cache_.clear();
+ search_term_cache_.clear();
// TODO(mrossetti): Record this transaction in the cache.
}
@@ -361,7 +356,7 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) {
// will propagate to the user.
history_info_map_.erase(row_id);
// This invalidates the word cache.
- term_char_word_set_cache_.clear();
+ search_term_cache_.clear();
// TODO(mrossetti): Record this transaction in the cache.
}
@@ -371,9 +366,9 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
const String16Vector& terms) {
ScoredHistoryMatches scored_items;
if (!terms.empty()) {
- // Reset used_ flags for term_char_word_set_cache_. We use a basic mark-
- // and-sweep approach.
- ResetTermCharWordSetCache();
+ // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
+ // approach.
+ ResetSearchTermCache();
// Lowercase the terms.
// TODO(mrossetti): Another opportunity for a transform algorithm.
@@ -382,7 +377,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
term_iter != terms.end(); ++term_iter)
lower_terms.push_back(base::i18n::ToLower(*term_iter));
- String16Vector::value_type all_terms(JoinString(lower_terms, ' '));
+ string16 all_terms(JoinString(lower_terms, ' '));
HistoryIDSet history_id_set = HistoryIDSetFromWords(all_terms);
// Don't perform any scoring (and don't return any matches) if the
@@ -409,23 +404,22 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
}
}
- // Remove any stale TermCharWordSet's.
- term_char_word_set_cache_.erase(
- std::remove_if(term_char_word_set_cache_.begin(),
- term_char_word_set_cache_.end(),
- std::mem_fun_ref(&TermCharWordSet::is_not_used)),
- term_char_word_set_cache_.end());
+ // Remove any stale SearchTermCacheItems.
+ for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
+ cache_iter != search_term_cache_.end(); ) {
+ if (!cache_iter->second.used_)
+ search_term_cache_.erase(cache_iter++);
+ else
+ ++cache_iter;
+ }
+
return scored_items;
}
-void InMemoryURLIndex::ResetTermCharWordSetCache() {
- // TODO(mrossetti): Consider keeping more of the cache around for possible
- // repeat searches, except a 'shortcuts' approach might be better for that.
- // TODO(mrossetti): Change TermCharWordSet to a class and use for_each.
- for (TermCharWordSetVector::iterator iter =
- term_char_word_set_cache_.begin();
- iter != term_char_word_set_cache_.end(); ++iter)
- iter->used_ = false;
+void InMemoryURLIndex::ResetSearchTermCache() {
+ for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
+ iter != search_term_cache_.end(); ++iter)
+ iter->second.used_ = false;
}
InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
@@ -436,49 +430,124 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
// a string like "http://www.somewebsite.com" which, from our perspective,
// is four words: 'http', 'www', 'somewebsite', and 'com'.
HistoryIDSet history_id_set;
- String16Set words = WordSetFromString16(uni_string);
- bool first_word = true;
- for (String16Set::iterator iter = words.begin();
- iter != words.end(); ++iter) {
- String16Set::value_type uni_word = *iter;
- HistoryIDSet term_history_id_set =
- InMemoryURLIndex::HistoryIDsForTerm(uni_word);
- if (first_word) {
- history_id_set.swap(term_history_id_set);
- first_word = false;
- } else {
- HistoryIDSet old_history_id_set(history_id_set);
+ String16Vector terms = WordVectorFromString16(uni_string, true);
+ // Sort the terms into the longest first as such are likely to narrow down
+ // the results quicker. Also, single character terms are the most expensive
+ // to process so save them for last.
+ std::sort(terms.begin(), terms.end(), LengthGreater);
+ for (String16Vector::iterator iter = terms.begin(); iter != terms.end();
+ ++iter) {
+ string16 uni_word = *iter;
+ HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
+ if (term_history_set.empty()) {
history_id_set.clear();
- std::set_intersection(old_history_id_set.begin(),
- old_history_id_set.end(),
- term_history_id_set.begin(),
- term_history_id_set.end(),
- std::inserter(history_id_set,
- history_id_set.begin()));
+ break;
+ }
+ if (iter == terms.begin()) {
+ history_id_set.swap(term_history_set);
+ } else {
+ HistoryIDSet new_history_id_set;
+ std::set_intersection(history_id_set.begin(), history_id_set.end(),
+ term_history_set.begin(), term_history_set.end(),
+ std::inserter(new_history_id_set,
+ new_history_id_set.begin()));
+ history_id_set.swap(new_history_id_set);
}
}
return history_id_set;
}
InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
- const string16& uni_word) {
- InMemoryURLIndex::HistoryIDSet history_id_set;
-
- // For each unique character in the word, in order of first appearance, get
- // the char/word_id map entry and intersect with the set in an incremental
- // manner.
- Char16Vector uni_chars = Char16VectorFromString16(uni_word);
- WordIDSet word_id_set(WordIDSetForTermChars(uni_chars));
-
- // TODO(mrossetti): At this point, as a possible optimization, we could
- // scan through all candidate words and make sure the |uni_word| is a
- // substring within the candidate words, eliminating those which aren't.
- // I'm not sure it would be worth the effort. And remember, we've got to do
- // a final substring match in order to get the highlighting ranges later
- // in the process in any case.
+ const string16& term) {
+ if (term.empty())
+ return HistoryIDSet();
+
+ // TODO(mrossetti): Consider optimizing for very common terms such as
+ // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
+ // occuring words in the user's searches.
+
+ size_t term_length = term.length();
+ InMemoryURLIndex::WordIDSet word_id_set;
+ if (term_length > 1) {
+ // See if this term or a prefix thereof is present in the cache.
+ SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
+ for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
+ cache_iter != search_term_cache_.end(); ++cache_iter) {
+ if (StartsWith(term, cache_iter->first, false) &&
+ (best_prefix == search_term_cache_.end() ||
+ cache_iter->first.length() > best_prefix->first.length()))
+ best_prefix = cache_iter;
+ }
+
+ // If a prefix was found then determine the leftover characters to be used
+ // for further refining the results from that prefix.
+ Char16Set prefix_chars;
+ string16 leftovers(term);
+ if (best_prefix != search_term_cache_.end()) {
+ // If the prefix is an exact match for the term then grab the cached
+ // results and we're done.
+ size_t prefix_length = best_prefix->first.length();
+ if (prefix_length == term_length) {
+ best_prefix->second.used_ = true;
+ return best_prefix->second.history_id_set_;
+ }
+
+ // Otherwise we have a handy starting point.
+ // If there are no history results for this prefix then we can bail early
+ // as there will be no history results for the full term.
+ if (best_prefix->second.history_id_set_.empty()) {
+ search_term_cache_[term] = SearchTermCacheItem();
+ return HistoryIDSet();
+ }
+ word_id_set = best_prefix->second.word_id_set_;
+ prefix_chars = Char16SetFromString16(best_prefix->first);
+ leftovers = term.substr(prefix_length);
+ }
+
+ // Filter for each remaining, unique character in the term.
+ Char16Set leftover_chars = Char16SetFromString16(leftovers);
+ Char16Set unique_chars;
+ std::set_difference(leftover_chars.begin(), leftover_chars.end(),
+ prefix_chars.begin(), prefix_chars.end(),
+ std::inserter(unique_chars, unique_chars.begin()));
+
+ // Reduce the word set with any leftover, unprocessed characters.
+ if (!unique_chars.empty()) {
+ WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
+ // We might come up empty on the leftovers.
+ if (leftover_set.empty()) {
+ search_term_cache_[term] = SearchTermCacheItem();
+ return HistoryIDSet();
+ }
+ // Or there may not have been a prefix from which to start.
+ if (prefix_chars.empty()) {
+ word_id_set.swap(leftover_set);
+ } else {
+ WordIDSet new_word_id_set;
+ std::set_intersection(word_id_set.begin(), word_id_set.end(),
+ leftover_set.begin(), leftover_set.end(),
+ std::inserter(new_word_id_set,
+ new_word_id_set.begin()));
+ word_id_set.swap(new_word_id_set);
+ }
+ }
+
+ // We must filter the word list because the resulting word set surely
+ // contains words which do not have the search term as a proper subset.
+ for (WordIDSet::iterator word_set_iter = word_id_set.begin();
+ word_set_iter != word_id_set.end(); ) {
+ if (word_list_[*word_set_iter].find(term) == string16::npos)
+ word_id_set.erase(word_set_iter++);
+ else
+ ++word_set_iter;
+ }
+ } else {
+ word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
+ }
// If any words resulted then we can compose a set of history IDs by unioning
// the sets from each word.
+ HistoryIDSet history_id_set;
if (!word_id_set.empty()) {
for (WordIDSet::iterator word_id_iter = word_id_set.begin();
word_id_iter != word_id_set.end(); ++word_id_iter) {
@@ -492,6 +561,11 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
}
}
+ // Record a new cache entry for this word if the term is longer than
+ // a single character.
+ if (term_length > 1)
+ search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
+
return history_id_set;
}
@@ -533,37 +607,22 @@ InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
}
// static
-InMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16(
- const string16& uni_word) {
- InMemoryURLIndex::Char16Vector characters;
- InMemoryURLIndex::Char16Set unique_characters;
- for (string16::const_iterator iter = uni_word.begin();
- iter != uni_word.end(); ++iter) {
- if (!unique_characters.count(*iter)) {
- unique_characters.insert(*iter);
- characters.push_back(*iter);
- }
- }
- return characters;
-}
-
-// static
InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
- const string16& uni_word) {
+ const string16& term) {
Char16Set characters;
- for (string16::const_iterator iter = uni_word.begin();
- iter != uni_word.end(); ++iter)
+ for (string16::const_iterator iter = term.begin(); iter != term.end();
+ ++iter)
characters.insert(*iter);
return characters;
}
-void InMemoryURLIndex::AddWordToIndex(const string16& uni_word,
+void InMemoryURLIndex::AddWordToIndex(const string16& term,
HistoryID history_id) {
- WordMap::iterator word_pos = word_map_.find(uni_word);
+ WordMap::iterator word_pos = word_map_.find(term);
if (word_pos != word_map_.end())
UpdateWordHistory(word_pos->second, history_id);
else
- AddWordHistory(uni_word, history_id);
+ AddWordHistory(term, history_id);
}
void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
@@ -575,20 +634,20 @@ void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
// Add a new word to the word list and the word map, and then create a
// new entry in the word/history map.
-void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
+void InMemoryURLIndex::AddWordHistory(const string16& term,
HistoryID history_id) {
- word_list_.push_back(uni_word);
+ word_list_.push_back(term);
WordID word_id = word_list_.size() - 1;
- word_map_[uni_word] = word_id;
+ word_map_[term] = word_id;
HistoryIDSet history_id_set;
history_id_set.insert(history_id);
word_id_history_map_[word_id] = history_id_set;
// For each character in the newly added word (i.e. a word that is not
// already in the word index), add the word to the character index.
- Char16Set characters = Char16SetFromString16(uni_word);
+ Char16Set characters = Char16SetFromString16(term);
for (Char16Set::iterator uni_char_iter = characters.begin();
uni_char_iter != characters.end(); ++uni_char_iter) {
- Char16Set::value_type uni_char = *uni_char_iter;
+ char16 uni_char = *uni_char_iter;
CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
if (char_iter != char_word_map_.end()) {
// Update existing entry in the char/word index.
@@ -604,29 +663,11 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
}
InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
- const InMemoryURLIndex::Char16Vector& uni_chars) {
- size_t index = CachedResultsIndexForTerm(uni_chars);
-
- // If there were no unprocessed characters in the search term |uni_chars|
- // then we can use the cached one as-is as the results with no further
- // filtering.
- if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1)
- return term_char_word_set_cache_[index].word_id_set_;
-
- // Some or all of the characters remain to be indexed so trim the cache.
- if (index + 1 < term_char_word_set_cache_.size())
- term_char_word_set_cache_.resize(index + 1);
+ const Char16Set& term_chars) {
WordIDSet word_id_set;
- // Take advantage of our cached starting point, if any.
- Char16Vector::const_iterator c_iter = uni_chars.begin();
- if (index != kNoCachedResultForTerm) {
- word_id_set = term_char_word_set_cache_[index].word_id_set_;
- c_iter += index + 1;
- }
- // Now process the remaining characters in the search term.
- for (; c_iter != uni_chars.end(); ++c_iter) {
- Char16Vector::value_type uni_char = *c_iter;
- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
+ for (Char16Set::const_iterator c_iter = term_chars.begin();
+ c_iter != term_chars.end(); ++c_iter) {
+ CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
if (char_iter == char_word_map_.end()) {
// A character was not found so there are no matching results: bail.
word_id_set.clear();
@@ -640,37 +681,22 @@ InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
break;
}
- if (word_id_set.empty()) {
+ if (c_iter == term_chars.begin()) {
+ // First character results becomes base set of results.
word_id_set = char_word_id_set;
} else {
- WordIDSet old_word_id_set(word_id_set);
- word_id_set.clear();
- std::set_intersection(old_word_id_set.begin(),
- old_word_id_set.end(),
- char_word_id_set.begin(),
- char_word_id_set.end(),
- std::inserter(word_id_set,
- word_id_set.begin()));
+ // Subsequent character results get intersected in.
+ WordIDSet new_word_id_set;
+ std::set_intersection(word_id_set.begin(), word_id_set.end(),
+ char_word_id_set.begin(), char_word_id_set.end(),
+ std::inserter(new_word_id_set,
+ new_word_id_set.begin()));
+ word_id_set.swap(new_word_id_set);
}
- // Add this new char/set instance to the cache.
- term_char_word_set_cache_.push_back(TermCharWordSet(
- uni_char, word_id_set, true));
}
return word_id_set;
}
-size_t InMemoryURLIndex::CachedResultsIndexForTerm(
- const InMemoryURLIndex::Char16Vector& uni_chars) {
- TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin();
- for (Char16Vector::const_iterator c_iter = uni_chars.begin();
- (c_iter != uni_chars.end()) &&
- (t_iter != term_char_word_set_cache_.end()) &&
- (*c_iter == t_iter->char_);
- ++c_iter, ++t_iter)
- t_iter->used_ = true; // Update the cache.
- return t_iter - term_char_word_set_cache_.begin() - 1;
-}
-
// static
TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
const string16& string,
@@ -681,7 +707,7 @@ TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
TermMatches matches;
for (size_t location = short_string.find(term); location != string16::npos;
location = short_string.find(term, location + 1)) {
- matches.push_back(TermMatch(term_num, location, term.size()));
+ matches.push_back(TermMatch(term_num, location, term.length()));
}
return matches;
}
@@ -691,8 +717,7 @@ TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
if (matches.empty())
return matches;
TermMatches sorted_matches = matches;
- std::sort(sorted_matches.begin(), sorted_matches.end(),
- MatchOffsetLess);
+ std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
TermMatches clean_matches;
TermMatch last_match = sorted_matches[0];
clean_matches.push_back(last_match);
@@ -775,10 +800,10 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
// Get partial scores based on term matching. Note that the score for
// each of the URL and title are adjusted by the fraction of the
// terms appearing in each.
- int url_score = ScoreComponentForMatches(match.url_matches, url.size()) *
+ int url_score = ScoreComponentForMatches(match.url_matches, url.length()) *
match.url_matches.size() / terms.size();
int title_score =
- ScoreComponentForMatches(match.title_matches, title.size()) *
+ ScoreComponentForMatches(match.title_matches, title.length()) *
match.title_matches.size() / terms.size();
// Arbitrarily pick the best.
// TODO(mrossetti): It might make sense that a term which appears in both the
@@ -1086,7 +1111,7 @@ void InMemoryURLIndex::SaveHistoryInfoMap(
map_entry->set_history_id(iter->first);
const URLRow& url_row(iter->second);
// Note: We only save information that contributes to the index so there
- // is no need to save term_char_word_set_cache_ (not persistent),
+ // is no need to save search_term_cache_ (not persistent),
// languages_, etc.
map_entry->set_visit_count(url_row.visit_count());
map_entry->set_typed_count(url_row.typed_count());
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index 44ad146..b7ff33d 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -176,10 +176,12 @@ class InMemoryURLIndex {
private:
friend class AddHistoryMatch;
+ friend class InMemoryURLIndexTest;
FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities);
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
@@ -214,18 +216,35 @@ class InMemoryURLIndex {
typedef std::set<HistoryID> HistoryIDSet;
typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
- // Support caching of term character results so that we can optimize
- // searches which build upon a previous search. Each entry in this vector
- // represents a progressive reduction of the result set for each unique
- // character found in the search term, with each character being taken as
- // initially encountered. For example, once the results for the search
- // term of "mextexarkana" have been fully determined, this vector will
- // contain one entry for the characters: m, e, x, t, a, r, k, & n, in
- // that order. The result sets will naturally shrink in size for each
- // subsequent character as the sets intersections are taken in an
- // incremental manner.
- struct TermCharWordSet;
- typedef std::vector<TermCharWordSet> TermCharWordSetVector;
+
+ // Support caching of term results so that we can optimize searches which
+ // build upon a previous search. Each entry in this map represents one
+ // search term from the most recent search. For example, if the user had
+ // typed "google blog trans" and then typed an additional 'l' (at the end,
+ // of course) then there would be four items in the cache: 'blog', 'google',
+ // 'trans', and 'transl'. All would be marked as being in use except for the
+ // 'trans' item; its cached data would have been used when optimizing the
+ // construction of the search results candidates for 'transl' but then would
+ // no longer needed.
+ //
+ // Items stored in the search term cache. If a search term exactly matches one
+ // in the cache then we can quickly supply the proper |history_id_set_| (and
+ // marking the cache item as being |used_|. If we find a prefix for a search
+ // term in the cache (which is very likely to occur as the user types each
+ // term into the omnibox) then we can short-circuit the index search for those
+ // characters in the prefix by returning the |word_id_set|. In that case we do
+ // not mark the item as being |used_|.
+ struct SearchTermCacheItem {
+ SearchTermCacheItem(const WordIDSet& word_id_set,
+ const HistoryIDSet& history_id_set);
+ // Creates a cache item for a term which has no results.
+ SearchTermCacheItem();
+
+ WordIDSet word_id_set_;
+ HistoryIDSet history_id_set_;
+ bool used_; // True if this item has been used for the current term search.
+ };
+ typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap;
// TODO(rohitrao): Probably replace this with QueryResults.
typedef std::vector<URLRow> URLRowVector;
@@ -262,19 +281,8 @@ class InMemoryURLIndex {
// Breaks a string down into individual words.
static String16Set WordSetFromString16(const string16& uni_string);
- // Given a vector of Char16s, representing the characters the user has typed
- // into the omnibox, finds words containing those characters. If any
- // existing, cached set is a proper subset then starts with that cached
- // set. Updates the previously-typed-character cache.
- WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars);
-
- // Given a vector of Char16s in |uni_chars|, compare those characters, in
- // order, with the previously searched term, returning the index of the
- // cached results in the term_char_word_set_cache_ of the set with best
- // matching number of characters. Returns kNoCachedResultForTerm if there
- // was no match at all (i.e. the first character of |uni-chars| is not the
- // same as the character of the first entry in term_char_word_set_cache_).
- size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars);
+ // Given a set of Char16s, finds words containing those characters.
+ WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
// Creates a TermMatches which has an entry for each occurrence of the string
// |term| found in the string |string|. Mark each match with |term_num| so
@@ -289,10 +297,6 @@ class InMemoryURLIndex {
// Indexes one URL history item.
bool IndexRow(const URLRow& row);
- // Breaks a string down into unique, individual characters in the order
- // in which the characters are first encountered in the |uni_word| string.
- static Char16Vector Char16VectorFromString16(const string16& uni_word);
-
// Breaks the |uni_word| string down into its individual characters.
// Note that this is temporarily intended to work on a single word, but
// _will_ work on a string of words, perhaps with unexpected results.
@@ -315,18 +319,16 @@ class InMemoryURLIndex {
// |history_id| as the initial element of the word's set.
void AddWordHistory(const string16& uni_word, HistoryID history_id);
- // Clears the search term cache. This cache holds on to the intermediate
- // word results for each previously typed character to eliminate the need
- // to re-perform set operations for previously typed characters.
- void ResetTermCharWordSetCache();
+ // Clears |used_| for each item in the search term cache.
+ void ResetSearchTermCache();
// Composes a set of history item IDs by intersecting the set for each word
// in |uni_string|.
HistoryIDSet HistoryIDSetFromWords(const string16& uni_string);
// Helper function to HistoryIDSetFromWords which composes a set of history
- // ids for the given term given in |uni_word|.
- HistoryIDSet HistoryIDsForTerm(const string16& uni_word);
+ // ids for the given term given in |term|.
+ HistoryIDSet HistoryIDsForTerm(const string16& term);
// Calculates a raw score for this history item by first determining
// if all of the terms in |terms_vector| occur in |row| and, if so,
@@ -405,8 +407,12 @@ class InMemoryURLIndex {
WordMap word_map_;
CharWordIDMap char_word_map_;
WordIDHistoryMap word_id_history_map_;
- TermCharWordSetVector term_char_word_set_cache_;
HistoryInfoMap history_info_map_;
+
+ // Cache of search terms.
+ SearchTermCacheMap search_term_cache_;
+
+ // Languages used during the word-breaking process during indexing.
std::string languages_;
// Only URLs with a whitelisted scheme are indexed.
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index 776e18f..cc10215 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -43,122 +43,165 @@ class InMemoryURLIndexTest : public testing::Test,
protected:
// Test setup.
- virtual void SetUp() {
- // Create and populate a working copy of the URL history database.
- FilePath history_proto_path;
- PathService::Get(chrome::DIR_TEST_DATA, &history_proto_path);
- history_proto_path = history_proto_path.Append(
- FILE_PATH_LITERAL("History"));
- history_proto_path = history_proto_path.Append(TestDBName());
- EXPECT_TRUE(file_util::PathExists(history_proto_path));
-
- std::ifstream proto_file(history_proto_path.value().c_str());
- static const size_t kCommandBufferMaxSize = 2048;
- char sql_cmd_line[kCommandBufferMaxSize];
-
- sql::Connection& db(GetDB());
- {
- sql::Transaction transaction(&db);
- transaction.Begin();
- while (!proto_file.eof()) {
- proto_file.getline(sql_cmd_line, kCommandBufferMaxSize);
- if (!proto_file.eof()) {
- // We only process lines which begin with a upper-case letter.
- // TODO(mrossetti): Can iswupper() be used here?
- if (sql_cmd_line[0] >= 'A' && sql_cmd_line[0] <= 'Z') {
- std::string sql_cmd(sql_cmd_line);
- sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line));
- EXPECT_TRUE(sql_stmt.Run());
- }
- }
- }
- transaction.Commit();
- }
- proto_file.close();
-
- // Update the last_visit_time table column
- // such that it represents a time relative to 'now'.
- sql::Statement statement(db.GetUniqueStatement(
- "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
- EXPECT_TRUE(statement);
- base::Time time_right_now = base::Time::NowFromSystemTime();
- base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
- {
- sql::Transaction transaction(&db);
- transaction.Begin();
- while (statement.Step()) {
- URLRow row;
- FillURLRow(statement, &row);
- base::Time last_visit = time_right_now;
- for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i)
- last_visit -= day_delta;
- row.set_last_visit(last_visit);
- UpdateURLRow(row.id(), row);
- }
- transaction.Commit();
- }
- }
+ virtual void SetUp();
- virtual FilePath::StringType TestDBName() const {
- return FILE_PATH_LITERAL("url_history_provider_test.db.txt");
- }
+ // Allows the database containing the test data to be customized by
+ // subclasses.
+ virtual FilePath::StringType TestDBName() const;
+ // Convenience function to create a URLRow with basic data for |url|, |title|,
+ // |visit_count|, and |typed_count|. |last_visit_ago| gives the number of
+ // days from now to set the URL's last_visit.
URLRow MakeURLRow(const char* url,
const char* title,
int visit_count,
int last_visit_ago,
- int typed_count) {
- URLRow row(GURL(url), 0);
- row.set_title(UTF8ToUTF16(title));
- row.set_visit_count(visit_count);
- row.set_typed_count(typed_count);
- row.set_last_visit(base::Time::NowFromSystemTime() -
- base::TimeDelta::FromDays(last_visit_ago));
- return row;
- }
-
- InMemoryURLIndex::String16Vector Make1Term(const char* term) {
- InMemoryURLIndex::String16Vector terms;
- terms.push_back(UTF8ToUTF16(term));
- return terms;
- }
+ int typed_count);
+ // Convenience functions for easily creating vectors of search terms.
+ InMemoryURLIndex::String16Vector Make1Term(const char* term) const;
InMemoryURLIndex::String16Vector Make2Terms(const char* term_1,
- const char* term_2) {
- InMemoryURLIndex::String16Vector terms;
- terms.push_back(UTF8ToUTF16(term_1));
- terms.push_back(UTF8ToUTF16(term_2));
- return terms;
- }
+ const char* term_2) const;
+
+ // Validates that the given |term| is contained in |cache| and that it is
+ // marked as in-use.
+ void CheckTerm(const InMemoryURLIndex::SearchTermCacheMap& cache,
+ string16 term) const;
scoped_ptr<InMemoryURLIndex> url_index_;
};
+void InMemoryURLIndexTest::SetUp() {
+ // Create and populate a working copy of the URL history database.
+ FilePath history_proto_path;
+ PathService::Get(chrome::DIR_TEST_DATA, &history_proto_path);
+ history_proto_path = history_proto_path.Append(
+ FILE_PATH_LITERAL("History"));
+ history_proto_path = history_proto_path.Append(TestDBName());
+ EXPECT_TRUE(file_util::PathExists(history_proto_path));
+
+ std::ifstream proto_file(history_proto_path.value().c_str());
+ static const size_t kCommandBufferMaxSize = 2048;
+ char sql_cmd_line[kCommandBufferMaxSize];
+
+ sql::Connection& db(GetDB());
+ {
+ sql::Transaction transaction(&db);
+ transaction.Begin();
+ while (!proto_file.eof()) {
+ proto_file.getline(sql_cmd_line, kCommandBufferMaxSize);
+ if (!proto_file.eof()) {
+ // We only process lines which begin with a upper-case letter.
+ // TODO(mrossetti): Can iswupper() be used here?
+ if (sql_cmd_line[0] >= 'A' && sql_cmd_line[0] <= 'Z') {
+ std::string sql_cmd(sql_cmd_line);
+ sql::Statement sql_stmt(db.GetUniqueStatement(sql_cmd_line));
+ EXPECT_TRUE(sql_stmt.Run());
+ }
+ }
+ }
+ transaction.Commit();
+ }
+ proto_file.close();
+
+ // Update the last_visit_time table column
+ // such that it represents a time relative to 'now'.
+ sql::Statement statement(db.GetUniqueStatement(
+ "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
+ EXPECT_TRUE(statement);
+ base::Time time_right_now = base::Time::NowFromSystemTime();
+ base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
+ {
+ sql::Transaction transaction(&db);
+ transaction.Begin();
+ while (statement.Step()) {
+ URLRow row;
+ FillURLRow(statement, &row);
+ base::Time last_visit = time_right_now;
+ for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i)
+ last_visit -= day_delta;
+ row.set_last_visit(last_visit);
+ UpdateURLRow(row.id(), row);
+ }
+ transaction.Commit();
+ }
+}
+
+FilePath::StringType InMemoryURLIndexTest::TestDBName() const {
+ return FILE_PATH_LITERAL("url_history_provider_test.db.txt");
+}
+
+URLRow InMemoryURLIndexTest::MakeURLRow(const char* url,
+ const char* title,
+ int visit_count,
+ int last_visit_ago,
+ int typed_count) {
+ URLRow row(GURL(url), 0);
+ row.set_title(UTF8ToUTF16(title));
+ row.set_visit_count(visit_count);
+ row.set_typed_count(typed_count);
+ row.set_last_visit(base::Time::NowFromSystemTime() -
+ base::TimeDelta::FromDays(last_visit_ago));
+ return row;
+}
+
+InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make1Term(
+ const char* term) const {
+ InMemoryURLIndex::String16Vector terms;
+ terms.push_back(UTF8ToUTF16(term));
+ return terms;
+}
+
+InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make2Terms(
+ const char* term_1,
+ const char* term_2) const {
+ InMemoryURLIndex::String16Vector terms;
+ terms.push_back(UTF8ToUTF16(term_1));
+ terms.push_back(UTF8ToUTF16(term_2));
+ return terms;
+}
+
+void InMemoryURLIndexTest::CheckTerm(
+ const InMemoryURLIndex::SearchTermCacheMap& cache,
+ string16 term) const {
+ InMemoryURLIndex::SearchTermCacheMap::const_iterator cache_iter(
+ cache.find(term));
+ ASSERT_NE(cache.end(), cache_iter)
+ << "Cache does not contain '" << term << "' but should.";
+ InMemoryURLIndex::SearchTermCacheItem cache_item = cache_iter->second;
+ EXPECT_TRUE(cache_item.used_)
+ << "Cache item '" << term << "' should be marked as being in use.";
+}
+
class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest {
protected:
- FilePath::StringType TestDBName() const {
- return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
- }
+ FilePath::StringType TestDBName() const;
};
+FilePath::StringType LimitedInMemoryURLIndexTest::TestDBName() const {
+ return FILE_PATH_LITERAL("url_history_provider_test_limited.db.txt");
+}
+
class ExpandedInMemoryURLIndexTest : public InMemoryURLIndexTest {
protected:
- virtual void SetUp() {
- InMemoryURLIndexTest::SetUp();
- // Add 600 more history items.
- // NOTE: Keep the string length constant at least the length of the format
- // string plus 5 to account for a 3 digit number and terminator.
- char url_format[] = "http://www.google.com/%d";
- const size_t kMaxLen = arraysize(url_format) + 5;
- char url_string[kMaxLen + 1];
- for (int i = 0; i < 600; ++i) {
- base::snprintf(url_string, kMaxLen, url_format, i);
- URLRow row(MakeURLRow(url_string, "Google Search", 20, 0, 20));
- AddURL(row);
- }
- }
+ virtual void SetUp();
};
+void ExpandedInMemoryURLIndexTest::SetUp() {
+ InMemoryURLIndexTest::SetUp();
+ // Add 600 more history items.
+ // NOTE: Keep the string length constant at least the length of the format
+ // string plus 5 to account for a 3 digit number and terminator.
+ char url_format[] = "http://www.google.com/%d";
+ const size_t kMaxLen = arraysize(url_format) + 5;
+ char url_string[kMaxLen + 1];
+ for (int i = 0; i < 600; ++i) {
+ base::snprintf(url_string, kMaxLen, url_format, i);
+ URLRow row(MakeURLRow(url_string, "Google Search", 20, 0, 20));
+ AddURL(row);
+ }
+}
+
TEST_F(InMemoryURLIndexTest, Construction) {
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
EXPECT_TRUE(url_index_.get());
@@ -185,13 +228,12 @@ TEST_F(LimitedInMemoryURLIndexTest, Initialization) {
TEST_F(InMemoryURLIndexTest, Retrieval) {
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
// The term will be lowercased by the search.
// See if a very specific term gives a single result.
- terms.push_back(ASCIIToUTF16("DrudgeReport"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- EXPECT_EQ(1U, matches.size());
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(Make1Term("DrudgeReport"));
+ ASSERT_EQ(1U, matches.size());
// Verify that we got back the result we expected.
EXPECT_EQ(5, matches[0].url_info.id());
@@ -199,18 +241,14 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
EXPECT_EQ(ASCIIToUTF16("DRUDGE REPORT 2010"), matches[0].url_info.title());
// Search which should result in multiple results.
- terms.clear();
- terms.push_back(ASCIIToUTF16("drudge"));
- matches = url_index_->HistoryItemsForTerms(terms);
+ matches = url_index_->HistoryItemsForTerms(Make1Term("drudge"));
ASSERT_EQ(2U, matches.size());
// The results should be in descending score order.
EXPECT_GE(matches[0].raw_score, matches[1].raw_score);
// Search which should result in nearly perfect result.
- terms.clear();
- terms.push_back(ASCIIToUTF16("https"));
- terms.push_back(ASCIIToUTF16("NearlyPerfectResult"));
- matches = url_index_->HistoryItemsForTerms(terms);
+ matches = url_index_->HistoryItemsForTerms(Make2Terms("https",
+ "NearlyPerfectResult"));
ASSERT_EQ(1U, matches.size());
// The results should have a very high score.
EXPECT_GT(matches[0].raw_score, 900);
@@ -221,7 +259,7 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
matches[0].url_info.title());
// Search which should result in very poor result.
- terms.clear();
+ InMemoryURLIndex::String16Vector terms;
terms.push_back(ASCIIToUTF16("z"));
terms.push_back(ASCIIToUTF16("y"));
terms.push_back(ASCIIToUTF16("x"));
@@ -236,26 +274,21 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
matches[0].url_info.title());
// Search which will match at the end of an URL with encoded characters.
- terms.clear();
- terms.push_back(ASCIIToUTF16("ice"));
- matches = url_index_->HistoryItemsForTerms(terms);
+ matches = url_index_->HistoryItemsForTerms(Make1Term("ice"));
ASSERT_EQ(1U, matches.size());
}
TEST_F(ExpandedInMemoryURLIndexTest, ShortCircuit) {
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
// A search for 'w' should short-circuit and not return any matches.
- terms.push_back(ASCIIToUTF16("w"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(Make1Term("w"));
EXPECT_TRUE(matches.empty());
// A search for 'working' should not short-circuit.
- terms.clear();
- terms.push_back(ASCIIToUTF16("working"));
- matches = url_index_->HistoryItemsForTerms(terms);
+ matches = url_index_->HistoryItemsForTerms(Make1Term("working"));
EXPECT_EQ(1U, matches.size());
}
@@ -271,7 +304,7 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
terms.push_back(ASCIIToUTF16("RATE"));
terms.push_back(ASCIIToUTF16("DROPS"));
ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- EXPECT_EQ(1U, matches.size());
+ ASSERT_EQ(1U, matches.size());
// Verify that we got back the result we expected.
EXPECT_EQ(1, matches[0].url_info.id());
@@ -282,19 +315,34 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
matches[0].url_info.title());
}
-TEST_F(InMemoryURLIndexTest, Char16Utilities) {
- string16 term = ASCIIToUTF16("drudgereport");
- string16 expected = ASCIIToUTF16("drugepot");
- EXPECT_EQ(expected.size(),
- InMemoryURLIndex::Char16SetFromString16(term).size());
- InMemoryURLIndex::Char16Vector c_vector =
- InMemoryURLIndex::Char16VectorFromString16(term);
- ASSERT_EQ(expected.size(), c_vector.size());
-
- InMemoryURLIndex::Char16Vector::const_iterator c_iter = c_vector.begin();
- for (string16::const_iterator s_iter = expected.begin();
- s_iter != expected.end(); ++s_iter, ++c_iter)
- EXPECT_EQ(*s_iter, *c_iter);
+TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) {
+ url_index_.reset(new InMemoryURLIndex());
+ url_index_->Init(this, "en,ja,hi,zh");
+
+ // The presence of duplicate characters should succeed. Exercise by cycling
+ // through a string with several duplicate characters.
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(Make1Term("ABRA"));
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(28, matches[0].url_info.id());
+ EXPECT_EQ("http://www.ddj.com/windows/184416623",
+ matches[0].url_info.url().spec());
+
+ matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACAD"));
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(28, matches[0].url_info.id());
+
+ matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABRA"));
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(28, matches[0].url_info.id());
+
+ matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACADABR"));
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(28, matches[0].url_info.id());
+
+ matches = url_index_->HistoryItemsForTerms(Make1Term("ABRACA"));
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(28, matches[0].url_info.id());
}
TEST_F(InMemoryURLIndexTest, StaticFunctions) {
@@ -396,48 +444,74 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
// Verify that match results for previously typed characters are retained
// (in the term_char_word_set_cache_) and reused, if possible, in future
// autocompletes.
+ typedef InMemoryURLIndex::SearchTermCacheMap::iterator CacheIter;
+ typedef InMemoryURLIndex::SearchTermCacheItem CacheItem;
+
url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
url_index_->Init(this, "en,ja,hi,zh");
- // Verify that we can find something that already exists.
- InMemoryURLIndex::String16Vector terms;
- string16 term = ASCIIToUTF16("drudgerepo");
- terms.push_back(term);
- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
+ InMemoryURLIndex::SearchTermCacheMap& cache(url_index_->search_term_cache_);
- {
- // Exercise the term matching cache with the same term.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(term);
- EXPECT_EQ(uni_chars.size(), 7U); // Equivalent to 'degopru'
- EXPECT_EQ(6U, url_index_->CachedResultsIndexForTerm(uni_chars));
- }
-
- {
- // Back off a character.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("drudgerep"));
- EXPECT_EQ(6U, uni_chars.size()); // Equivalent to 'degpru'
- EXPECT_EQ(5U, url_index_->CachedResultsIndexForTerm(uni_chars));
- }
+ // The cache should be empty at this point.
+ EXPECT_EQ(0U, cache.size());
- {
- // Add a couple of characters.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(
- ASCIIToUTF16("drudgereporta"));
- EXPECT_EQ(9U, uni_chars.size()); // Equivalent to 'adegoprtu'
- EXPECT_EQ(6U, url_index_->CachedResultsIndexForTerm(uni_chars));
- }
+ // Now simulate typing search terms into the omnibox and check the state of
+ // the cache as each item is 'typed'.
- {
- // Use different string.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("abcde"));
- EXPECT_EQ(5U, uni_chars.size());
- EXPECT_EQ(static_cast<size_t>(-1),
- url_index_->CachedResultsIndexForTerm(uni_chars));
- }
+ // Simulate typing "r" giving "r" in the simulated omnibox. The results for
+ // 'r' will be not cached because it is only 1 character long.
+ InMemoryURLIndex::String16Vector terms;
+ string16 term_r = ASCIIToUTF16("r");
+ terms.push_back(term_r);
+ url_index_->HistoryItemsForTerms(terms);
+ EXPECT_EQ(0U, cache.size());
+
+ // Simulate typing "re" giving "r re" in the simulated omnibox.
+ string16 term_re = ASCIIToUTF16("re");
+ terms.push_back(term_re);
+ // 're' should be cached at this point but not 'r' as it is a single
+ // character.
+ ASSERT_EQ(2U, terms.size());
+ url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(1U, cache.size());
+ CheckTerm(cache, term_re);
+
+ // Simulate typing "reco" giving "r re reco" in the simulated omnibox.
+ string16 term_reco = ASCIIToUTF16("reco");
+ terms.push_back(term_reco);
+ // 're' and 'reco' should be cached at this point but not 'r' as it is a
+ // single character.
+ url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, cache.size());
+ CheckTerm(cache, term_re);
+ CheckTerm(cache, term_reco);
+
+ terms.clear(); // Simulate pressing <ESC>.
+
+ // Simulate typing "mort".
+ string16 term_mort = ASCIIToUTF16("mort");
+ terms.push_back(term_mort);
+ // Since we now have only one search term, the cached results for 're' and
+ // 'reco' should be purged, giving us only 1 item in the cache (for 'mort').
+ url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(1U, cache.size());
+ CheckTerm(cache, term_mort);
+
+ // Simulate typing "reco" giving "mort reco" in the simulated omnibox.
+ terms.push_back(term_reco);
+ url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, cache.size());
+ CheckTerm(cache, term_mort);
+ CheckTerm(cache, term_reco);
+
+ // Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.
+ terms.resize(terms.size() - 1);
+ string16 term_rec = ASCIIToUTF16("rec");
+ terms.push_back(term_rec);
+ url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, cache.size());
+ CheckTerm(cache, term_mort);
+ CheckTerm(cache, term_rec);
}
TEST_F(InMemoryURLIndexTest, Scoring) {
diff --git a/chrome/test/data/History/url_history_provider_test.db.txt b/chrome/test/data/History/url_history_provider_test.db.txt
index 5fd7821..9db0ca9 100644
--- a/chrome/test/data/History/url_history_provider_test.db.txt
+++ b/chrome/test/data/History/url_history_provider_test.db.txt
@@ -50,7 +50,7 @@ INSERT INTO "urls" VALUES(24,'http://www.danilatos.com/event-test/ExperimentTest
INSERT INTO "urls" VALUES(25,'http://www.codeguru.com/','CodeGuru : codeguru',6,6,0,0,110); // Qualifies
INSERT INTO "urls" VALUES(26,'http://www.codeproject.com/','Your Development Resource - CodeProject',6,6,0,0,369); // Qualifies
INSERT INTO "urls" VALUES(27,'http://www.tomshardware.com/us/#redir','Tom''s Hardware: Hardware News, Tests and Reviews',6,6,0,0,65); // Qualifies
-INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. Dobb''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0); // Qualifies
+INSERT INTO "urls" VALUES(28,'http://www.ddj.com/windows/184416623','Dr. ABRACADABRA''s | Avoiding the Visual C++ Runtime Library | 2 1, 2003',6,6,0,0,0); // Qualifies
INSERT INTO "urls" VALUES(29,'http://svcs.cnn.com/weather/getForecast?time=34&mode=json_html&zipCode=336736767676&locCode=EGLL&celcius=true&csiID=csi2','',6,6,0,1,0); // Qualifies
INSERT INTO "urls" VALUES(30,'http://www.drudgery.com/Dogs%20and%20Mice','Life in the Slow Lane',8,2,2,0,0); // Qualifies
INSERT INTO "urls" VALUES(31,'http://www.redrudgerydo.com/','Music of the Wild Landscape',0,0,6,0,0);