diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-30 03:13:06 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-10-30 03:13:06 +0000 |
commit | 0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce (patch) | |
tree | 507a523bad332280a11599d0e782bde7da396abb /chrome | |
parent | de614d301995efdb349e998cea13c316ae3add7d (diff) | |
download | chromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.zip chromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.tar.gz chromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.tar.bz2 |
Create Private Data for InMemoryURLIndex (in Preparation for SQLite Cache)
1. Encapsulate the private, persistent data for the InMemoryURLIndex in a new class, URLIndexPrivateData (found in in_memory_url_index_types.h).
2. Move most of the support types, including the new URLIndexPrivateData class, into in_memory_url_index_types.h.
3. Correctly handle the adding and removing of page title words when a URL change is detected.
4. Replace static class member functions with non-friend, non-class functions for better flexibility.
5. Move convenience types out from InMemoryURLIndex class up into history namespace.
6. Rename convenience types to generalize their intent.
BUG=92718
TEST=Enhanced unit tests.
Review URL: http://codereview.chromium.org/8359019
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@107893 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider.cc | 19 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider.h | 5 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_quick_provider_unittest.cc | 19 | ||||
-rw-r--r-- | chrome/browser/history/history_types.h | 12 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.cc | 458 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.h | 147 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_types.cc | 198 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_types.h | 208 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_types_unittest.cc | 107 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_unittest.cc | 386 | ||||
-rw-r--r-- | chrome/chrome_browser.gypi | 2 | ||||
-rw-r--r-- | chrome/chrome_tests.gypi | 1 |
12 files changed, 918 insertions, 644 deletions
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc index d3f26d4..7dba1cc 100644 --- a/chrome/browser/autocomplete/history_quick_provider.cc +++ b/chrome/browser/autocomplete/history_quick_provider.cc @@ -16,6 +16,7 @@ #include "base/utf_string_conversions.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/net/url_fixer_upper.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -81,8 +82,7 @@ void HistoryQuickProvider::Start(const AutocompleteInput& input, } } -// HistoryQuickProvider matches are currently not deletable. -// TODO(mrossetti): Determine when a match should be deletable. +// TODO(mrossetti): Implement this function. (Will happen in next CL.) void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {} void HistoryQuickProvider::DoAutocomplete() { @@ -90,8 +90,8 @@ void HistoryQuickProvider::DoAutocomplete() { string16 term_string = autocomplete_input_.text(); term_string = net::UnescapeURLComponent(term_string, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - history::InMemoryURLIndex::String16Vector terms( - InMemoryURLIndex::WordVectorFromString16(term_string, false)); + history::String16Vector terms( + history::String16VectorFromString16(term_string, false)); ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(terms); if (matches.empty()) return; @@ -131,13 +131,12 @@ AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( // Format the URL autocomplete presentation. std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches); + OffsetsFromTermMatches(history_match.url_matches); match.contents = net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll, UnescapeRule::SPACES, NULL, NULL, &offsets); history::TermMatches new_matches = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches, - offsets); + ReplaceOffsetsInTermMatches(history_match.url_matches, offsets); match.contents_class = SpansFromTermMatch(new_matches, match.contents.length(), true); match.fill_into_edit = match.contents; @@ -170,12 +169,6 @@ history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() { return history_service->InMemoryIndex(); } -void HistoryQuickProvider::SetIndexForTesting( - history::InMemoryURLIndex* index) { - DCHECK(index); - index_for_testing_.reset(index); -} - // static int HistoryQuickProvider::CalculateRelevance( const ScoredHistoryMatch& history_match, diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h index b14fda4..c8dadf9 100644 --- a/chrome/browser/autocomplete/history_quick_provider.h +++ b/chrome/browser/autocomplete/history_quick_provider.h @@ -81,7 +81,10 @@ class HistoryQuickProvider : public HistoryProvider { bool is_url); // Only for use in unittests. Takes ownership of |index|. - void SetIndexForTesting(history::InMemoryURLIndex* index); + void set_index(history::InMemoryURLIndex* index) { + index_for_testing_.reset(index); + } + AutocompleteInput autocomplete_input_; std::string languages_; diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index c1fd137..76a17dc 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -142,6 +142,12 @@ void HistoryQuickProviderTest::OnProviderUpdate(bool updated_matches) { void HistoryQuickProviderTest::FillData() { history::URLDatabase* db = history_service_->InMemoryDatabase(); ASSERT_TRUE(db != NULL); + + history::InMemoryURLIndex* index = + new history::InMemoryURLIndex(FilePath()); + PrefService* prefs = profile_->GetPrefs(); + std::string languages(prefs->GetString(prefs::kAcceptLanguages)); + index->Init(db, languages); for (size_t i = 0; i < arraysize(quick_test_db); ++i) { const TestURLInfo& cur = quick_test_db[i]; const GURL current_url(cur.url); @@ -153,19 +159,10 @@ void HistoryQuickProviderTest::FillData() { url_info.set_typed_count(cur.typed_count); url_info.set_last_visit(visit_time); url_info.set_hidden(false); - EXPECT_TRUE(db->AddURL(url_info)); - - history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), - cur.visit_count, cur.typed_count, - visit_time, false, - history::SOURCE_BROWSED); + index->UpdateURL(i, url_info); } - history::InMemoryURLIndex* index = new history::InMemoryURLIndex(FilePath()); - PrefService* prefs = profile_->GetPrefs(); - std::string languages(prefs->GetString(prefs::kAcceptLanguages)); - index->Init(db, languages); - provider_->SetIndexForTesting(index); + provider_->set_index(index); } HistoryQuickProviderTest::SetShouldContain::SetShouldContain( diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index 001e4ca..5567052 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -74,6 +74,13 @@ class URLRow { URLRow& operator=(const URLRow& other); URLID id() const { return id_; } + + // Sets the id of the row. The id should only be manually set when a row has + // been retrieved from the history database or other dataset based on criteria + // other than its id (i.e. by URL) and when the id has not yet been set in the + // row. + void set_id(URLID id) { id_ = id; } + const GURL& url() const { return url_; } const string16& title() const { @@ -140,8 +147,9 @@ class URLRow { // This excludes objects which autoinitialize such as strings. void Initialize(); - // The row ID of this URL. Immutable except for the database which sets it - // when it pulls them out. + // The row ID of this URL from the history database. This is immutable except + // when retrieving the row from the database or when determining if the URL + // referenced by the URLRow already exists in the database. URLID id_; // The URL of this row. Immutable except for the database which sets it diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 6388686..b92a062 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -11,7 +11,6 @@ #include <numeric> #include "base/file_util.h" -#include "base/i18n/break_iterator.h" #include "base/i18n/case_conversion.h" #include "base/metrics/histogram.h" #include "base/string_util.h" @@ -59,23 +58,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; // each of these scores for each factor. const int kScoreRank[] = { 1425, 1200, 900, 400 }; -ScoredHistoryMatch::ScoredHistoryMatch() - : raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) - : HistoryMatch(url_info, 0, false, false), - raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::~ScoredHistoryMatch() {} - -// Comparison function for sorting ScoredMatches by their scores. -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2) { - return m1.raw_score >= m2.raw_score; -} - InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( const WordIDSet& word_id_set, const HistoryIDSet& history_id_set) @@ -88,11 +70,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} -// Comparison function for sorting TermMatches by their offsets. -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { - return m1.offset < m2.offset; -} - // Comparison function for sorting search terms by descending length. bool LengthGreater(const string16& string_a, const string16& string_b) { return string_a.length() > string_b.length(); @@ -137,14 +114,14 @@ int ScoreForValue(int value, const int* value_ranks) { InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) : history_dir_(history_dir), - history_item_count_(0), + private_data_(new URLIndexPrivateData), cached_at_shutdown_(false) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } // Called only by unit tests. InMemoryURLIndex::InMemoryURLIndex() - : history_item_count_(0), + : private_data_(new URLIndexPrivateData), cached_at_shutdown_(false) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } @@ -170,7 +147,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist( // Indexing -bool InMemoryURLIndex::Init(history::URLDatabase* history_db, +bool InMemoryURLIndex::Init(URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; @@ -183,33 +160,47 @@ void InMemoryURLIndex::ShutDown() { cached_at_shutdown_ = true; } -bool InMemoryURLIndex::IndexRow(const URLRow& row) { +void InMemoryURLIndex::IndexRow(const URLRow& row) { const GURL& gurl(row.url()); // Index only URLs with a whitelisted scheme. if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) - return true; + return; + URLID row_id = row.id(); + // Strip out username and password before saving and indexing. string16 url(net::FormatUrl(gurl, languages_, net::kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, NULL, NULL, NULL)); - HistoryID history_id = static_cast<HistoryID>(row.id()); - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); + HistoryID history_id = static_cast<HistoryID>(row_id); + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); // Add the row for quick lookup in the history info store. - URLRow new_row(GURL(url), row.id()); + URLRow new_row(GURL(url), row_id); new_row.set_visit_count(row.visit_count()); new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_[history_id] = new_row; + private_data_->history_info_map_[history_id] = new_row; + + // Index the words contained in the URL and title of the row. + AddRowWordsToIndex(new_row); + return; +} +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { + HistoryID history_id = static_cast<HistoryID>(row.id()); // Split URL into individual, unique words then add in the title words. + const GURL& gurl(row.url()); + string16 url(net::FormatUrl(gurl, languages_, + net::kFormatUrlOmitUsernamePassword, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + NULL, NULL, NULL)); url = base::i18n::ToLower(url); - String16Set url_words = WordSetFromString16(url); - String16Set title_words = WordSetFromString16(row.title()); + String16Set url_words = String16SetFromString16(url); + String16Set title_words = String16SetFromString16(row.title()); String16Set words; std::set_union(url_words.begin(), url_words.end(), title_words.begin(), title_words.end(), @@ -218,8 +209,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { word_iter != words.end(); ++word_iter) AddWordToIndex(*word_iter, history_id); - ++history_item_count_; - return true; + search_term_cache_.clear(); // Invalidate the term cache. +} + +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { + RemoveRowWordsFromIndex(row); + HistoryID history_id = static_cast<HistoryID>(row.id()); + private_data_->history_info_map_.erase(history_id); +} + +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { + // Remove the entries in history_id_word_map_ and word_id_history_map_ for + // this row. + URLIndexPrivateData& private_data(*(private_data_.get())); + HistoryID history_id = static_cast<HistoryID>(row.id()); + WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; + private_data.history_id_word_map_.erase(history_id); + + // Reconcile any changes to word usage. + for (WordIDSet::iterator word_id_iter = word_id_set.begin(); + word_id_iter != word_id_set.end(); ++word_id_iter) { + WordID word_id = *word_id_iter; + private_data.word_id_history_map_[word_id].erase(history_id); + if (!private_data.word_id_history_map_[word_id].empty()) + continue; // The word is still in use. + + // The word is no longer in use. Reconcile any changes to character usage. + string16 word = private_data.word_list_[word_id]; + Char16Set characters = Char16SetFromString16(word); + for (Char16Set::iterator uni_char_iter = characters.begin(); + uni_char_iter != characters.end(); ++uni_char_iter) { + char16 uni_char = *uni_char_iter; + private_data.char_word_map_[uni_char].erase(word_id); + if (private_data.char_word_map_[uni_char].empty()) + private_data.char_word_map_.erase(uni_char); // No longer in use. + } + + // Complete the removal of references to the word. + private_data.word_id_history_map_.erase(word_id); + private_data.word_map_.erase(word); + private_data.word_list_[word_id] = string16(); + private_data.available_words_.insert(word_id); + } } bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, @@ -236,10 +267,8 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) return false; URLRow row; - while (history_enum.GetNextURL(&row)) { - if (!IndexRow(row)) - return false; - } + while (history_enum.GetNextURL(&row)) + IndexRow(row); UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", base::TimeTicks::Now() - beginning_time); SaveToCacheFile(); @@ -248,12 +277,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, } void InMemoryURLIndex::ClearPrivateData() { - history_item_count_ = 0; - word_list_.clear(); - word_map_.clear(); - char_word_map_.clear(); - word_id_history_map_.clear(); - history_info_map_.clear(); + private_data_->Clear(); search_term_cache_.clear(); } @@ -289,10 +313,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() { UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", base::TimeTicks::Now() - beginning_time); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", + private_data_->history_id_word_map_.size()); UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", + private_data_->word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", + private_data_->char_word_map_.size()); return true; } @@ -327,29 +354,39 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { // indexed and it qualifies then it gets indexed. If it is already // indexed and still qualifies then it gets updated, otherwise it // is deleted from the index. - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); - if (row_pos == history_info_map_.end()) { + HistoryInfoMap::iterator row_pos = + private_data_->history_info_map_.find(row_id); + if (row_pos == private_data_->history_info_map_.end()) { // This new row should be indexed if it qualifies. - if (RowQualifiesAsSignificant(row, base::Time())) - IndexRow(row); + URLRow new_row(row); + new_row.set_id(row_id); + if (RowQualifiesAsSignificant(new_row, base::Time())) + IndexRow(new_row); } else if (RowQualifiesAsSignificant(row, base::Time())) { // This indexed row still qualifies and will be re-indexed. // The url won't have changed but the title, visit count, etc. // might have changed. - URLRow& old_row = row_pos->second; - old_row.set_visit_count(row.visit_count()); - old_row.set_typed_count(row.typed_count()); - old_row.set_last_visit(row.last_visit()); - // TODO(mrossetti): When we start indexing the title the next line - // will need attention. - old_row.set_title(row.title()); + URLRow& updated_row = row_pos->second; + updated_row.set_visit_count(row.visit_count()); + updated_row.set_typed_count(row.typed_count()); + updated_row.set_last_visit(row.last_visit()); + // While the URL is guaranteed to remain stable, the title may have changed. + // If so, then we need to update the index with the changed words. + if (updated_row.title() != row.title()) { + // Clear all words associated with this row and re-index both the + // URL and title. + RemoveRowWordsFromIndex(updated_row); + updated_row.set_title(row.title()); + AddRowWordsToIndex(updated_row); + } } else { - // This indexed row no longer qualifies and will be de-indexed. - history_info_map_.erase(row_id); + // This indexed row no longer qualifies and will be de-indexed by + // clearing all words associated with this row. + URLRow& removed_row = row_pos->second; + RemoveRowFromIndex(removed_row); } // This invalidates the cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } void InMemoryURLIndex::DeleteURL(URLID row_id) { @@ -358,10 +395,9 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { // hits against this row until that map is rebuilt, but since the // history_info_map_ no longer references the row no erroneous results // will propagate to the user. - history_info_map_.erase(row_id); + private_data_->history_info_map_.erase(row_id); // This invalidates the word cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } // Searching @@ -369,6 +405,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( const String16Vector& terms) { ScoredHistoryMatches scored_items; + + // Do nothing if we have indexed no words (probably because we've not been + // initialized yet). + if (private_data_->word_list_.empty()) + return scored_items; + if (!terms.empty()) { // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep // approach. @@ -426,7 +468,7 @@ void InMemoryURLIndex::ResetSearchTermCache() { iter->second.used_ = false; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( const string16& uni_string) { // Break the terms down into individual terms (words), get the candidate // set for each term, and intersect each to get a final candidate list. @@ -434,7 +476,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( // a string like "http://www.somewebsite.com" which, from our perspective, // is four words: 'http', 'www', 'somewebsite', and 'com'. HistoryIDSet history_id_set; - String16Vector terms = WordVectorFromString16(uni_string, true); + String16Vector terms = String16VectorFromString16(uni_string, true); // Sort the terms into the longest first as such are likely to narrow down // the results quicker. Also, single character terms are the most expensive // to process so save them for last. @@ -461,7 +503,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( return history_id_set; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& term) { if (term.empty()) return HistoryIDSet(); @@ -471,7 +513,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // occuring words in the user's searches. size_t term_length = term.length(); - InMemoryURLIndex::WordIDSet word_id_set; + WordIDSet word_id_set; if (term_length > 1) { // See if this term or a prefix thereof is present in the cache. SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); @@ -517,7 +559,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Reduce the word set with any leftover, unprocessed characters. if (!unique_chars.empty()) { - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); + WordIDSet leftover_set( + private_data_->WordIDSetForTermChars(unique_chars)); // We might come up empty on the leftovers. if (leftover_set.empty()) { search_term_cache_[term] = SearchTermCacheItem(); @@ -540,13 +583,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // contains words which do not have the search term as a proper subset. for (WordIDSet::iterator word_set_iter = word_id_set.begin(); word_set_iter != word_id_set.end(); ) { - if (word_list_[*word_set_iter].find(term) == string16::npos) + if (private_data_->word_list_[*word_set_iter].find(term) == + string16::npos) word_id_set.erase(word_set_iter++); else ++word_set_iter; } } else { - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); + word_id_set = + private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); } // If any words resulted then we can compose a set of history IDs by unioning @@ -556,8 +601,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( for (WordIDSet::iterator word_id_iter = word_id_set.begin(); word_id_iter != word_id_set.end(); ++word_id_iter) { WordID word_id = *word_id_iter; - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); - if (word_iter != word_id_history_map_.end()) { + WordIDHistoryMap::iterator word_iter = + private_data_->word_id_history_map_.find(word_id); + if (word_iter != private_data_->word_id_history_map_.end()) { HistoryIDSet& word_history_id_set(word_iter->second); history_id_set.insert(word_history_id_set.begin(), word_history_id_set.end()); @@ -575,85 +621,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Utility Functions -// static -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( - const string16& uni_string) { - const size_t kMaxWordLength = 64; - String16Vector words = WordVectorFromString16(uni_string, false); - String16Set word_set; - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); - ++iter) - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); - return word_set; -} - -// static -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( - const string16& uni_string, - bool break_on_space) { - base::i18n::BreakIterator iter( - uni_string, - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE - : base::i18n::BreakIterator::BREAK_WORD); - String16Vector words; - if (!iter.Init()) - return words; - while (iter.Advance()) { - if (break_on_space || iter.IsWord()) { - string16 word = iter.GetString(); - if (break_on_space) - TrimWhitespace(word, TRIM_ALL, &word); - if (!word.empty()) - words.push_back(word); - } - } - return words; -} - -// static -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( - const string16& term) { - Char16Set characters; - for (string16::const_iterator iter = term.begin(); iter != term.end(); - ++iter) - characters.insert(*iter); - return characters; -} - void InMemoryURLIndex::AddWordToIndex(const string16& term, HistoryID history_id) { - WordMap::iterator word_pos = word_map_.find(term); - if (word_pos != word_map_.end()) + WordMap::iterator word_pos = private_data_->word_map_.find(term); + if (word_pos != private_data_->word_map_.end()) UpdateWordHistory(word_pos->second, history_id); else AddWordHistory(term, history_id); } void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); - DCHECK(history_pos != word_id_history_map_.end()); - HistoryIDSet& history_id_set(history_pos->second); - history_id_set.insert(history_id); + WordIDHistoryMap::iterator history_pos = + private_data_->word_id_history_map_.find(word_id); + DCHECK(history_pos != private_data_->word_id_history_map_.end()); + HistoryIDSet& history_id_set(history_pos->second); + history_id_set.insert(history_id); + private_data_->AddToHistoryIDWordMap(history_id, word_id); } // Add a new word to the word list and the word map, and then create a // new entry in the word/history map. void InMemoryURLIndex::AddWordHistory(const string16& term, HistoryID history_id) { - word_list_.push_back(term); - WordID word_id = word_list_.size() - 1; - word_map_[term] = word_id; + URLIndexPrivateData& private_data(*(private_data_.get())); + WordID word_id = private_data.word_list_.size(); + if (private_data.available_words_.empty()) { + private_data.word_list_.push_back(term); + } else { + word_id = *(private_data.available_words_.begin()); + private_data.word_list_[word_id] = term; + private_data.available_words_.erase(word_id); + } + private_data.word_map_[term] = word_id; + HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_[word_id] = history_id_set; + private_data.word_id_history_map_[word_id] = history_id_set; + private_data.AddToHistoryIDWordMap(history_id, word_id); + // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. Char16Set characters = Char16SetFromString16(term); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { char16 uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter != char_word_map_.end()) { + CharWordIDMap::iterator char_iter = + private_data.char_word_map_.find(uni_char); + if (char_iter != private_data.char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); word_id_set.insert(word_id); @@ -661,108 +675,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_[uni_char] = word_id_set; - } - } -} - -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - const Char16Set& term_chars) { - WordIDSet word_id_set; - for (Char16Set::const_iterator c_iter = term_chars.begin(); - c_iter != term_chars.end(); ++c_iter) { - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); - if (char_iter == char_word_map_.end()) { - // A character was not found so there are no matching results: bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - // It is possible for there to no longer be any words associated with - // a particular character. Give up in that case. - if (char_word_id_set.empty()) { - word_id_set.clear(); - break; - } - - if (c_iter == term_chars.begin()) { - // First character results becomes base set of results. - word_id_set = char_word_id_set; - } else { - // Subsequent character results get intersected in. - WordIDSet new_word_id_set; - std::set_intersection(word_id_set.begin(), word_id_set.end(), - char_word_id_set.begin(), char_word_id_set.end(), - std::inserter(new_word_id_set, - new_word_id_set.begin())); - word_id_set.swap(new_word_id_set); + private_data.char_word_map_[uni_char] = word_id_set; } } - return word_id_set; -} - -// static -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, - const string16& string, - int term_num) { - const size_t kMaxCompareLength = 2048; - const string16& short_string = (string.length() > kMaxCompareLength) ? - string.substr(0, kMaxCompareLength) : string; - TermMatches matches; - for (size_t location = short_string.find(term); location != string16::npos; - location = short_string.find(term, location + 1)) { - matches.push_back(TermMatch(term_num, location, term.length())); - } - return matches; -} - -// static -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { - if (matches.empty()) - return matches; - TermMatches sorted_matches = matches; - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); - TermMatches clean_matches; - TermMatch last_match = sorted_matches[0]; - clean_matches.push_back(last_match); - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; - iter != sorted_matches.end(); ++iter) { - if (iter->offset >= last_match.offset + last_match.length) { - last_match = *iter; - clean_matches.push_back(last_match); - } - } - return clean_matches; -} - -// static -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( - const TermMatches& matches) { - std::vector<size_t> offsets; - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) - offsets.push_back(i->offset); - return offsets; -} - -// static -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets) { - DCHECK_EQ(matches.size(), offsets.size()); - TermMatches new_matches; - std::vector<size_t>::const_iterator offset_iter = offsets.begin(); - for (TermMatches::const_iterator term_iter = matches.begin(); - term_iter != matches.end(); ++term_iter, ++offset_iter) { - if (*offset_iter != string16::npos) { - TermMatch new_match(*term_iter); - new_match.offset = *offset_iter; - new_matches.push_back(new_match); - } - } - return new_matches; } // static +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( const URLRow& row, const String16Vector& terms) { @@ -791,8 +710,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( } // Sort matches by offset and eliminate any which overlap. - match.url_matches = SortAndDeoverlap(match.url_matches); - match.title_matches = SortAndDeoverlap(match.title_matches); + match.url_matches = SortAndDeoverlapMatches(match.url_matches); + match.title_matches = SortAndDeoverlapMatches(match.title_matches); // We should not (currently) inline autocomplete a result unless both of the // following are true: @@ -906,19 +825,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( const InMemoryURLIndex& index, const String16Vector& lower_terms) : index_(index), - lower_terms_(lower_terms) { -} + lower_terms_(lower_terms) {} InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} -void InMemoryURLIndex::AddHistoryMatch::operator()( - const InMemoryURLIndex::HistoryID history_id) { +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = - index_.history_info_map_.find(history_id); + index_.private_data_->history_info_map_.find(history_id); // Note that a history_id may be present in the word_id_history_map_ yet not // be found in the history_info_map_. This occurs when an item has been // deleted by the user or the item no longer qualifies as a quick result. - if (hist_pos != index_.history_info_map_.end()) { + if (hist_pos != index_.private_data_->history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); if (match.raw_score > 0) @@ -940,7 +857,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { DCHECK(cache); cache->set_timestamp(base::Time::Now().ToInternalValue()); - cache->set_history_item_count(history_item_count_); + // history_item_count_ is no longer used but rather than change the protobuf + // definition use a placeholder. This will go away with the switch to SQLite. + cache->set_history_item_count(0); SaveWordList(cache); SaveWordMap(cache); SaveCharWordMap(cache); @@ -951,20 +870,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { bool InMemoryURLIndex::RestorePrivateData( const InMemoryURLIndexCacheItem& cache) { last_saved_ = base::Time::FromInternalValue(cache.timestamp()); - history_item_count_ = cache.history_item_count(); - return (history_item_count_ == 0) || (RestoreWordList(cache) && - RestoreWordMap(cache) && RestoreCharWordMap(cache) && - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); + return RestoreWordList(cache) && RestoreWordMap(cache) && + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && + RestoreHistoryInfoMap(cache); } - void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { - if (word_list_.empty()) + if (private_data_->word_list_.empty()) return; WordListItem* list_item = cache->mutable_word_list(); - list_item->set_word_count(word_list_.size()); - for (String16Vector::const_iterator iter = word_list_.begin(); - iter != word_list_.end(); ++iter) + list_item->set_word_count(private_data_->word_list_.size()); + for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); + iter != private_data_->word_list_.end(); ++iter) list_item->add_word(UTF16ToUTF8(*iter)); } @@ -979,17 +896,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<std::string>& words(list_item.word()); for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); iter != words.end(); ++iter) - word_list_.push_back(UTF8ToUTF16(*iter)); + private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); return true; } void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { - if (word_map_.empty()) + if (private_data_->word_map_.empty()) return; WordMapItem* map_item = cache->mutable_word_map(); - map_item->set_item_count(word_map_.size()); - for (WordMap::const_iterator iter = word_map_.begin(); - iter != word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_map_.size()); + for (WordMap::const_iterator iter = private_data_->word_map_.begin(); + iter != private_data_->word_map_.end(); ++iter) { WordMapEntry* map_entry = map_item->add_word_map_entry(); map_entry->set_word(UTF16ToUTF8(iter->first)); map_entry->set_word_id(iter->second); @@ -1007,17 +924,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); iter != entries.end(); ++iter) - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); return true; } void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { - if (char_word_map_.empty()) + if (private_data_->char_word_map_.empty()) return; CharWordMapItem* map_item = cache->mutable_char_word_map(); - map_item->set_item_count(char_word_map_.size()); - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); - iter != char_word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->char_word_map_.size()); + for (CharWordIDMap::const_iterator iter = + private_data_->char_word_map_.begin(); + iter != private_data_->char_word_map_.end(); ++iter) { CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); map_entry->set_char_16(iter->first); const WordIDSet& word_id_set(iter->second); @@ -1051,19 +969,20 @@ bool InMemoryURLIndex::RestoreCharWordMap( for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); jiter != word_ids.end(); ++jiter) word_id_set.insert(*jiter); - char_word_map_[uni_char] = word_id_set; + private_data_->char_word_map_[uni_char] = word_id_set; } return true; } void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) const { - if (word_id_history_map_.empty()) + if (private_data_->word_id_history_map_.empty()) return; WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); - map_item->set_item_count(word_id_history_map_.size()); - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); - iter != word_id_history_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_id_history_map_.size()); + for (WordIDHistoryMap::const_iterator iter = + private_data_->word_id_history_map_.begin(); + iter != private_data_->word_id_history_map_.end(); ++iter) { WordIDHistoryMapEntry* map_entry = map_item->add_word_id_history_map_entry(); map_entry->set_word_id(iter->first); @@ -1096,21 +1015,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap( HistoryIDSet history_id_set; const RepeatedField<int64>& history_ids(iter->history_id()); for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); - jiter != history_ids.end(); ++jiter) + jiter != history_ids.end(); ++jiter) { history_id_set.insert(*jiter); - word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(*jiter, word_id); + } + private_data_->word_id_history_map_[word_id] = history_id_set; } return true; } void InMemoryURLIndex::SaveHistoryInfoMap( InMemoryURLIndexCacheItem* cache) const { - if (history_info_map_.empty()) + if (private_data_->history_info_map_.empty()) return; HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); - map_item->set_item_count(history_info_map_.size()); - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); - iter != history_info_map_.end(); ++iter) { + map_item->set_item_count(private_data_->history_info_map_.size()); + for (HistoryInfoMap::const_iterator iter = + private_data_->history_info_map_.begin(); + iter != private_data_->history_info_map_.end(); ++iter) { HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); map_entry->set_history_id(iter->first); const URLRow& url_row(iter->second); @@ -1148,7 +1070,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap( string16 title(UTF8ToUTF16(iter->title())); url_row.set_title(title); } - history_info_map_[history_id] = url_row; + private_data_->history_info_map_[history_id] = url_row; } return true; } diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index 5de5604..dfae2d4 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -21,6 +21,7 @@ #include "chrome/browser/autocomplete/autocomplete_match.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/browser/history/in_memory_url_index_cache.pb.h" #include "sql/connection.h" #include "testing/gtest/include/gtest/gtest_prod.h" @@ -41,39 +42,6 @@ namespace imui = in_memory_url_index; class URLDatabase; -// Specifies where an omnibox term occurs within a string. Used for specifying -// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in -// scoring a result. -struct TermMatch { - TermMatch(int term_num, size_t offset, size_t length) - : term_num(term_num), - offset(offset), - length(length) {} - - int term_num; // The index of the term in the original search string. - size_t offset; // The starting offset of the substring match. - size_t length; // The length of the substring match. -}; -typedef std::vector<TermMatch> TermMatches; - -// Used for intermediate history result operations. -struct ScoredHistoryMatch : public HistoryMatch { - ScoredHistoryMatch(); // Required by STL. - explicit ScoredHistoryMatch(const URLRow& url_info); - ~ScoredHistoryMatch(); - - static bool MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2); - - // An interim score taking into consideration location and completeness - // of the match. - int raw_score; - TermMatches url_matches; // Term matches within the URL. - TermMatches title_matches; // Term matches within the page title. - bool can_inline; // True if this is a candidate for in-line autocompletion. -}; -typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; - // The URL history source. // Holds portions of the URL database in memory in an indexed form. Used to // quickly look up matching URLs for a given query string. Used by @@ -99,10 +67,7 @@ class InMemoryURLIndex { // within the profile wherein the cache and transaction journals will be // stored. explicit InMemoryURLIndex(const FilePath& history_dir); - ~InMemoryURLIndex(); - - // Convenience types - typedef std::vector<string16> String16Vector; + virtual ~InMemoryURLIndex(); // Opens and indexes the URL history database. // |languages| gives a list of language encodings with which the history @@ -150,30 +115,6 @@ class InMemoryURLIndex { // 'quick' criteria). void DeleteURL(URLID row_id); - // Breaks the |uni_string| string down into individual words and return - // a vector with the individual words in their original order. If - // |break_on_space| is false then the resulting list will contain only words - // containing alpha-numeric characters. If |break_on_space| is true then the - // resulting list will contain strings broken at whitespace. - // - // Example: - // Given: |uni_string|: "http://www.google.com/ harry the rabbit." - // With |break_on_space| false the returned list will contain: - // "http", "www", "google", "com", "harry", "the", "rabbit" - // With |break_on_space| true the returned list will contain: - // "http://", "www.google.com/", "harry", "the", "rabbit." - static String16Vector WordVectorFromString16(const string16& uni_string, - bool break_on_space); - - // Extract and return the offsets from |matches|. - static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); - - // Replace the offsets in |matches| with those given in |offsets|, deleting - // any which are npos, and return the updated list of matches. - static TermMatches ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets); - private: friend class AddHistoryMatch; friend class InMemoryURLIndexTest; @@ -185,6 +126,7 @@ class InMemoryURLIndex { FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); @@ -194,29 +136,6 @@ class InMemoryURLIndex { // Creating one of me without a history path is not allowed (tests excepted). InMemoryURLIndex(); - // Convenience types. - typedef std::set<string16> String16Set; - typedef std::set<char16> Char16Set; - typedef std::vector<char16> Char16Vector; - - // An index into list of all of the words we have indexed. - typedef int WordID; - - // A map allowing a WordID to be determined given a word. - typedef std::map<string16, WordID> WordMap; - - // A map from character to word_ids. - typedef std::set<WordID> WordIDSet; // An index into the WordList. - typedef std::map<char16, WordIDSet> CharWordIDMap; - - // A map from word_id to history item. - // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit. - // Consider using a smaller type. - typedef URLID HistoryID; - typedef std::set<HistoryID> HistoryIDSet; - typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; - - // Support caching of term results so that we can optimize searches which // build upon a previous search. Each entry in this map represents one // search term from the most recent search. For example, if the user had @@ -248,12 +167,6 @@ class InMemoryURLIndex { }; typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap; - // TODO(rohitrao): Probably replace this with QueryResults. - typedef std::vector<URLRow> URLRowVector; - - // A map from history_id to the history's URL and title. - typedef std::map<HistoryID, URLRow> HistoryInfoMap; - // A helper class which performs the final filter on each candidate // history URL match, inserting accepted matches into |scored_matches_| // and trimming the maximum number of matches to 10. @@ -280,34 +193,19 @@ class InMemoryURLIndex { // Initializes the whitelist of URL schemes. static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); - // Breaks a string down into individual words. - static String16Set WordSetFromString16(const string16& uni_string); + // URL History indexing support functions. - // Given a set of Char16s, finds words containing those characters. - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); + // Indexes one URL history item. + void IndexRow(const URLRow& row); - // Creates a TermMatches which has an entry for each occurrence of the string - // |term| found in the string |string|. Mark each match with |term_num| so - // that the resulting TermMatches can be merged with other TermMatches for - // other terms. - static TermMatches MatchTermInString(const string16& term, - const string16& string, - int term_num); + // Parses and indexes the words in the URL and page title of |row|. + void AddRowWordsToIndex(const URLRow& row); - // URL History indexing support functions. + // Removes |row| and all associated words and characters from the index. + void RemoveRowFromIndex(const URLRow& row); - // Indexes one URL history item. - bool IndexRow(const URLRow& row); - - // Breaks the |uni_word| string down into its individual characters. - // Note that this is temporarily intended to work on a single word, but - // _will_ work on a string of words, perhaps with unexpected results. - // TODO(mrossetti): Lots of optimizations possible here for not restarting - // a search if the user is just typing along. Also, change this to uniString - // and properly handle substring matches, scoring and sorting the results - // by score. Also, provide the metrics for where the matches occur so that - // the UI can highlight the matched sections. - static Char16Set Char16SetFromString16(const string16& uni_word); + // Removes all words and characters associated with |row| from the index. + void RemoveRowWordsFromIndex(const URLRow& row); // Given a single word in |uni_word|, adds a reference for the containing // history item identified by |history_id| to the index. @@ -352,10 +250,6 @@ class InMemoryURLIndex { static int ScoreComponentForMatches(const TermMatches& matches, size_t max_length); - // Sorts and removes overlapping substring matches from |matches| and - // returns the cleaned up matches. - static TermMatches SortAndDeoverlap(const TermMatches& matches); - // Determines if |gurl| has a whitelisted scheme and returns true if so. bool URLSchemeIsWhitelisted(const GURL& gurl) const; @@ -395,21 +289,8 @@ class InMemoryURLIndex { // the InMemoryURLIndex was last populated. base::Time last_saved_; - // A list of all of indexed words. The index of a word in this list is the - // ID of the word in the word_map_. It reduces the memory overhead by - // replacing a potentially long and repeated string with a simple index. - // NOTE: A word will _never_ be removed from this vector thus insuring - // the immutability of the word_id throughout the session, reducing - // maintenance complexity. - // TODO(mrossetti): Profile the vector allocation and determine if judicious - // 'reserve' calls are called for. - String16Vector word_list_; - - int history_item_count_; - WordMap word_map_; - CharWordIDMap char_word_map_; - WordIDHistoryMap word_id_history_map_; - HistoryInfoMap history_info_map_; + // The index's durable private data. + scoped_ptr<URLIndexPrivateData> private_data_; // Cache of search terms. SearchTermCacheMap search_term_cache_; diff --git a/chrome/browser/history/in_memory_url_index_types.cc b/chrome/browser/history/in_memory_url_index_types.cc new file mode 100644 index 0000000..5315ccc --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.cc @@ -0,0 +1,198 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/history/in_memory_url_index_types.h" + +#include <algorithm> +#include <iterator> + +#include "base/i18n/break_iterator.h" +#include "base/i18n/case_conversion.h" +#include "base/string_util.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num) { + const size_t kMaxCompareLength = 2048; + const string16& short_string = (string.length() > kMaxCompareLength) ? + string.substr(0, kMaxCompareLength) : string; + TermMatches matches; + for (size_t location = short_string.find(term); location != string16::npos; + location = short_string.find(term, location + 1)) + matches.push_back(TermMatch(term_num, location, term.length())); + return matches; +} + +// Comparison function for sorting TermMatches by their offsets. +bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { + return m1.offset < m2.offset; +} + +TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { + if (matches.empty()) + return matches; + TermMatches sorted_matches = matches; + std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); + TermMatches clean_matches; + TermMatch last_match; + for (TermMatches::const_iterator iter = sorted_matches.begin(); + iter != sorted_matches.end(); ++iter) { + if (iter->offset >= last_match.offset + last_match.length) { + last_match = *iter; + clean_matches.push_back(last_match); + } + } + return clean_matches; +} + +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) { + std::vector<size_t> offsets; + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) + offsets.push_back(i->offset); + return offsets; +} + +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets) { + DCHECK_EQ(matches.size(), offsets.size()); + TermMatches new_matches; + std::vector<size_t>::const_iterator offset_iter = offsets.begin(); + for (TermMatches::const_iterator term_iter = matches.begin(); + term_iter != matches.end(); ++term_iter, ++offset_iter) { + if (*offset_iter != string16::npos) { + TermMatch new_match(*term_iter); + new_match.offset = *offset_iter; + new_matches.push_back(new_match); + } + } + return new_matches; +} + +// ScoredHistoryMatch ---------------------------------------------------------- + +ScoredHistoryMatch::ScoredHistoryMatch() + : raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) + : HistoryMatch(url_info, 0, false, false), + raw_score(0), + can_inline(false) {} + +ScoredHistoryMatch::~ScoredHistoryMatch() {} + +// Comparison function for sorting ScoredMatches by their scores. +bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2) { + return m1.raw_score >= m2.raw_score; +} + +// InMemoryURLIndex's Private Data --------------------------------------------- + +URLIndexPrivateData::URLIndexPrivateData() {} + +URLIndexPrivateData::~URLIndexPrivateData() {} + +void URLIndexPrivateData::Clear() { + word_list_.clear(); + available_words_.clear(); + word_map_.clear(); + char_word_map_.clear(); + word_id_history_map_.clear(); + history_id_word_map_.clear(); + history_info_map_.clear(); +} + +void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, + WordID word_id) { + HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); + if (iter != history_id_word_map_.end()) { + WordIDSet& word_id_set(iter->second); + word_id_set.insert(word_id); + } else { + WordIDSet word_id_set; + word_id_set.insert(word_id); + history_id_word_map_[history_id] = word_id_set; + } +} + +WordIDSet URLIndexPrivateData::WordIDSetForTermChars( + const Char16Set& term_chars) { + WordIDSet word_id_set; + for (Char16Set::const_iterator c_iter = term_chars.begin(); + c_iter != term_chars.end(); ++c_iter) { + CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); + if (char_iter == char_word_map_.end()) { + // A character was not found so there are no matching results: bail. + word_id_set.clear(); + break; + } + WordIDSet& char_word_id_set(char_iter->second); + // It is possible for there to no longer be any words associated with + // a particular character. Give up in that case. + if (char_word_id_set.empty()) { + word_id_set.clear(); + break; + } + + if (c_iter == term_chars.begin()) { + // First character results becomes base set of results. + word_id_set = char_word_id_set; + } else { + // Subsequent character results get intersected in. + WordIDSet new_word_id_set; + std::set_intersection(word_id_set.begin(), word_id_set.end(), + char_word_id_set.begin(), char_word_id_set.end(), + std::inserter(new_word_id_set, + new_word_id_set.begin())); + word_id_set.swap(new_word_id_set); + } + } + return word_id_set; +} + +// Utility Functions ----------------------------------------------------------- + +String16Set String16SetFromString16(const string16& uni_string) { + const size_t kMaxWordLength = 64; + String16Vector words = String16VectorFromString16(uni_string, false); + String16Set word_set; + for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); + ++iter) + word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); + return word_set; +} + +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space) { + base::i18n::BreakIterator iter(uni_string, + break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : + base::i18n::BreakIterator::BREAK_WORD); + String16Vector words; + if (!iter.Init()) + return words; + while (iter.Advance()) { + if (break_on_space || iter.IsWord()) { + string16 word = iter.GetString(); + if (break_on_space) + TrimWhitespace(word, TRIM_ALL, &word); + if (!word.empty()) + words.push_back(word); + } + } + return words; +} + +Char16Set Char16SetFromString16(const string16& term) { + Char16Set characters; + for (string16::const_iterator iter = term.begin(); iter != term.end(); ++iter) + characters.insert(*iter); + return characters; +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_types.h b/chrome/browser/history/in_memory_url_index_types.h new file mode 100644 index 0000000..03f772c --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types.h @@ -0,0 +1,208 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ +#pragma once + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/gtest_prod_util.h" +#include "base/string16.h" +#include "chrome/browser/autocomplete/history_provider_util.h" +#include "chrome/browser/history/history_types.h" + +namespace history { + +// Matches within URL and Title Strings ---------------------------------------- + +// Specifies where an omnibox term occurs within a string. Used for specifying +// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in +// scoring a result. +struct TermMatch { + TermMatch() : term_num(0), offset(0), length(0) {} + TermMatch(int term_num, size_t offset, size_t length) + : term_num(term_num), + offset(offset), + length(length) {} + + int term_num; // The index of the term in the original search string. + size_t offset; // The starting offset of the substring match. + size_t length; // The length of the substring match. +}; +typedef std::vector<TermMatch> TermMatches; + +// Returns a TermMatches which has an entry for each occurrence of the string +// |term| found in the string |string|. Mark each match with |term_num| so +// that the resulting TermMatches can be merged with other TermMatches for +// other terms. Note that only the first 2,048 characters of |string| are +// considered during the match operation. +TermMatches MatchTermInString(const string16& term, + const string16& string, + int term_num); + +// Sorts and removes overlapping substring matches from |matches| and +// returns the cleaned up matches. +TermMatches SortAndDeoverlapMatches(const TermMatches& matches); + +// Extracts and returns the offsets from |matches|. +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches); + +// Replaces the offsets in |matches| with those given in |offsets|, deleting +// any which are npos, and returns the updated list of matches. +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, + const std::vector<size_t>& offsets); + +// Used for intermediate history result operations ----------------------------- + +struct ScoredHistoryMatch : public history::HistoryMatch { + ScoredHistoryMatch(); // Required by STL. + explicit ScoredHistoryMatch(const history::URLRow& url_info); + ~ScoredHistoryMatch(); + + static bool MatchScoreGreater(const ScoredHistoryMatch& m1, + const ScoredHistoryMatch& m2); + + // An interim score taking into consideration location and completeness + // of the match. + int raw_score; + TermMatches url_matches; // Term matches within the URL. + TermMatches title_matches; // Term matches within the page title. + bool can_inline; // True if this is a candidate for in-line autocompletion. +}; +typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; + +// Convenience Types ----------------------------------------------------------- + +typedef std::vector<string16> String16Vector; +typedef std::set<string16> String16Set; +typedef std::set<char16> Char16Set; +typedef std::vector<char16> Char16Vector; + +// Utility Functions Relates to Convenience Types ------------------------------ + +// Breaks a string down into individual words. +String16Set String16SetFromString16(const string16& uni_string); + +// Breaks the |uni_string| string down into individual words and return +// a vector with the individual words in their original order. If +// |break_on_space| is false then the resulting list will contain only words +// containing alpha-numeric characters. If |break_on_space| is true then the +// resulting list will contain strings broken at whitespace. (|break_on_space| +// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE) +// approach is to be used. For a complete description of this algorithm +// refer to the comments in base/i18n/break_iterator.h.) +// +// Example: +// Given: |uni_string|: "http://www.google.com/ harry the rabbit." +// With |break_on_space| false the returned list will contain: +// "http", "www", "google", "com", "harry", "the", "rabbit" +// With |break_on_space| true the returned list will contain: +// "http://", "www.google.com/", "harry", "the", "rabbit." +String16Vector String16VectorFromString16(const string16& uni_string, + bool break_on_space); + +// Breaks the |uni_word| string down into its individual characters. +// Note that this is temporarily intended to work on a single word, but +// _will_ work on a string of words, perhaps with unexpected results. +// TODO(mrossetti): Lots of optimizations possible here for not restarting +// a search if the user is just typing along. Also, change this to uniString +// and properly handle substring matches, scoring and sorting the results +// by score. Also, provide the metrics for where the matches occur so that +// the UI can highlight the matched sections. +Char16Set Char16SetFromString16(const string16& uni_word); + +// Support for InMemoryURLIndex Private Data ----------------------------------- + +// An index into a list of all of the words we have indexed. +typedef size_t WordID; + +// A map allowing a WordID to be determined given a word. +typedef std::map<string16, WordID> WordMap; + +// A map from character to the word_ids of words containing that character. +typedef std::set<WordID> WordIDSet; // An index into the WordList. +typedef std::map<char16, WordIDSet> CharWordIDMap; + +// A map from word (by word_id) to history items containing that word. +typedef history::URLID HistoryID; +typedef std::set<HistoryID> HistoryIDSet; +typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; +typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap; + +// A map from history_id to the history's URL and title. +typedef std::map<HistoryID, URLRow> HistoryInfoMap; + +// TODO(rohitrao): Probably replace this with QueryResults. +typedef std::vector<history::URLRow> URLRowVector; + +// A structure describing the index's internal data. This structure is used +// by the InMemoryURLIndex, InMemoryURLIndexBackend, and +// InMemoryURLCacheDatabase classes. +class URLIndexPrivateData { + public: + URLIndexPrivateData(); + ~URLIndexPrivateData(); + + // Clear all of our data. + void Clear(); + + // Adds |word_id| to |history_id|'s entry in the history/word map, + // creating a new entry if one does not already exist. + void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); + + // Given a set of Char16s, finds words containing those characters. + WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); + + private: + friend class InMemoryURLIndex; + friend class InMemoryURLIndexTest; + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); + FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); + + // A list of all of indexed words. The index of a word in this list is the + // ID of the word in the word_map_. It reduces the memory overhead by + // replacing a potentially long and repeated string with a simple index. + String16Vector word_list_; + + // A list of available words slots in |word_list_|. An available word slot + // is the index of a unused word in word_list_ vector, also referred to as + // a WordID. As URL visits are added or modified new words may be added to + // the index, in which case any available words are used, if any, and then + // words are added to the end of the word_list_. When URL visits are + // modified or deleted old words may be removed from the index, in which + // case the slots for those words are added to available_words_ for resuse + // by future URL updates. + WordIDSet available_words_; + + // A one-to-one mapping from the a word string to its slot number (i.e. + // WordID) in the |word_list_|. + WordMap word_map_; + + // A one-to-many mapping from a single character to all WordIDs of words + // containing that character. + CharWordIDMap char_word_map_; + + // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as + // used in the history database) of history items in which the word occurs. + WordIDHistoryMap word_id_history_map_; + + // A one-to-many mapping from a HistoryID to all WordIDs of words that occur + // in the URL and/or page title of the history item referenced by that + // HistoryID. + HistoryIDWordMap history_id_word_map_; + + // A one-to-one mapping from HistoryID to the history item data governing + // index inclusion and relevance scoring. + HistoryInfoMap history_info_map_; +}; + +} // namespace history + +#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_ diff --git a/chrome/browser/history/in_memory_url_index_types_unittest.cc b/chrome/browser/history/in_memory_url_index_types_unittest.cc new file mode 100644 index 0000000..6a6bc7c --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_types_unittest.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/string16.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/in_memory_url_index_types.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace history { + +class InMemoryURLIndexTypesTest : public testing::Test { +}; + +TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) { + // Test WordVectorFromString16 + string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); + String16Vector string_vec = String16VectorFromString16(string_a, false); + ASSERT_EQ(7U, string_vec.size()); + // See if we got the words we expected. + EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); + + string_vec = String16VectorFromString16(string_a, true); + ASSERT_EQ(5U, string_vec.size()); + EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); + EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); + EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); + EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); + EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); + + // Test WordSetFromString16 + string16 string_b(ASCIIToUTF16( + "http://web.google.com/search Google Web Search")); + String16Set string_set = String16SetFromString16(string_b); + EXPECT_EQ(5U, string_set.size()); + // See if we got the words we expected. + EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); + EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); + + // Test SortAndDeoverlap + TermMatches matches_a; + matches_a.push_back(TermMatch(1, 13, 10)); + matches_a.push_back(TermMatch(2, 23, 10)); + matches_a.push_back(TermMatch(3, 3, 10)); + matches_a.push_back(TermMatch(4, 40, 5)); + TermMatches matches_b = SortAndDeoverlapMatches(matches_a); + // Nothing should have been eliminated. + EXPECT_EQ(matches_a.size(), matches_b.size()); + // The order should now be 3, 1, 2, 4. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(4, matches_b[3].term_num); + matches_a.push_back(TermMatch(5, 18, 10)); + matches_a.push_back(TermMatch(6, 38, 5)); + matches_b = SortAndDeoverlapMatches(matches_a); + // Two matches should have been eliminated. + EXPECT_EQ(matches_a.size() - 2, matches_b.size()); + // The order should now be 3, 1, 2, 6. + EXPECT_EQ(3, matches_b[0].term_num); + EXPECT_EQ(1, matches_b[1].term_num); + EXPECT_EQ(2, matches_b[2].term_num); + EXPECT_EQ(6, matches_b[3].term_num); + + // Test MatchTermInString + TermMatches matches_c = MatchTermInString( + UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); + const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; + ASSERT_EQ(arraysize(expected_offsets), matches_c.size()); + for (size_t i = 0; i < arraysize(expected_offsets); ++i) + EXPECT_EQ(expected_offsets[i], matches_c[i].offset); +} + +TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) { + // Test OffsetsFromTermMatches + history::TermMatches matches_a; + matches_a.push_back(history::TermMatch(1, 1, 2)); + matches_a.push_back(history::TermMatch(2, 4, 3)); + matches_a.push_back(history::TermMatch(3, 9, 1)); + matches_a.push_back(history::TermMatch(3, 10, 1)); + matches_a.push_back(history::TermMatch(4, 14, 5)); + std::vector<size_t> offsets = OffsetsFromTermMatches(matches_a); + const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; + ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); + for (size_t i = 0; i < offsets.size(); ++i) + EXPECT_EQ(expected_offsets_a[i], offsets[i]); + + // Test ReplaceOffsetsInTermMatches + offsets[2] = string16::npos; + history::TermMatches matches_b = + ReplaceOffsetsInTermMatches(matches_a, offsets); + const size_t expected_offsets_b[] = {1, 4, 10, 14}; + ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); + for (size_t i = 0; i < matches_b.size(); ++i) + EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); +} + +} // namespace history diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index 62b708d..c4420bc 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -2,24 +2,18 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <stdio.h> - #include <fstream> -#include <string> -#include <vector> #include "base/file_path.h" #include "base/file_util.h" -#include "base/memory/scoped_ptr.h" #include "base/path_service.h" +#include "base/string16.h" #include "base/string_util.h" -#include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/history/in_memory_database.h" #include "chrome/browser/history/in_memory_url_index.h" +#include "chrome/browser/history/in_memory_url_index_types.h" #include "chrome/common/chrome_paths.h" -#include "sql/connection.h" -#include "sql/statement.h" #include "sql/transaction.h" #include "testing/gtest/include/gtest/gtest.h" @@ -59,9 +53,8 @@ class InMemoryURLIndexTest : public testing::Test, int typed_count); // Convenience functions for easily creating vectors of search terms. - InMemoryURLIndex::String16Vector Make1Term(const char* term) const; - InMemoryURLIndex::String16Vector Make2Terms(const char* term_1, - const char* term_2) const; + String16Vector Make1Term(const char* term) const; + String16Vector Make2Terms(const char* term_1, const char* term_2) const; // Validates that the given |term| is contained in |cache| and that it is // marked as in-use. @@ -145,20 +138,18 @@ URLRow InMemoryURLIndexTest::MakeURLRow(const char* url, return row; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make1Term( - const char* term) const { - InMemoryURLIndex::String16Vector terms; - terms.push_back(UTF8ToUTF16(term)); - return terms; +String16Vector InMemoryURLIndexTest::Make1Term(const char* term) const { + String16Vector original_terms; + original_terms.push_back(UTF8ToUTF16(term)); + return original_terms; } -InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make2Terms( - const char* term_1, - const char* term_2) const { - InMemoryURLIndex::String16Vector terms; - terms.push_back(UTF8ToUTF16(term_1)); - terms.push_back(UTF8ToUTF16(term_2)); - return terms; +String16Vector InMemoryURLIndexTest::Make2Terms(const char* term_1, + const char* term_2) const { + String16Vector original_terms; + original_terms.push_back(UTF8ToUTF16(term_1)); + original_terms.push_back(UTF8ToUTF16(term_2)); + return original_terms; } void InMemoryURLIndexTest::CheckTerm( @@ -173,6 +164,27 @@ void InMemoryURLIndexTest::CheckTerm( << "Cache item '" << term << "' should be marked as being in use."; } +// Helper function which compares two maps for equivalence. The maps' values +// are associative containers and their contents are compared as well. +template<typename T> +void ExpectMapOfContainersIdentical(const T& expected, const T& actual) { + ASSERT_EQ(expected.size(), actual.size()); + for (typename T::const_iterator expected_iter = expected.begin(); + expected_iter != expected.end(); ++expected_iter) { + typename T::const_iterator actual_iter = actual.find(expected_iter->first); + ASSERT_NE(actual.end(), actual_iter); + typename T::mapped_type const& expected_values(expected_iter->second); + typename T::mapped_type const& actual_values(actual_iter->second); + ASSERT_EQ(expected_values.size(), actual_values.size()); + for (typename T::mapped_type::const_iterator set_iter = + expected_values.begin(); set_iter != expected_values.end(); ++set_iter) + EXPECT_EQ(actual_values.count(*set_iter), + expected_values.count(*set_iter)); + } +} + +//------------------------------------------------------------------------------ + class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest { protected: FilePath::StringType TestDBName() const; @@ -217,12 +229,12 @@ TEST_F(LimitedInMemoryURLIndexTest, Initialization) { EXPECT_EQ(1U, row_count); url_index_.reset(new InMemoryURLIndex(FilePath())); url_index_->Init(this, "en,ja,hi,zh"); - EXPECT_EQ(1, url_index_->history_item_count_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); // history_info_map_ should have the same number of items as were filtered. - EXPECT_EQ(1U, url_index_->history_info_map_.size()); - EXPECT_EQ(35U, url_index_->char_word_map_.size()); - EXPECT_EQ(17U, url_index_->word_map_.size()); + EXPECT_EQ(1U, private_data.history_info_map_.size()); + EXPECT_EQ(35U, private_data.char_word_map_.size()); + EXPECT_EQ(17U, private_data.word_map_.size()); } TEST_F(InMemoryURLIndexTest, Retrieval) { @@ -259,11 +271,11 @@ TEST_F(InMemoryURLIndexTest, Retrieval) { matches[0].url_info.title()); // Search which should result in very poor result. - InMemoryURLIndex::String16Vector terms; - terms.push_back(ASCIIToUTF16("z")); - terms.push_back(ASCIIToUTF16("y")); - terms.push_back(ASCIIToUTF16("x")); - matches = url_index_->HistoryItemsForTerms(terms); + String16Vector original_terms; + original_terms.push_back(ASCIIToUTF16("z")); + original_terms.push_back(ASCIIToUTF16("y")); + original_terms.push_back(ASCIIToUTF16("x")); + matches = url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(1U, matches.size()); // The results should have a poor score. EXPECT_LT(matches[0].raw_score, 500); @@ -296,14 +308,15 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) { url_index_.reset(new InMemoryURLIndex(FilePath())); url_index_->Init(this, "en,ja,hi,zh"); // Signal if someone has changed the test DB. - EXPECT_EQ(27U, url_index_->history_info_map_.size()); - InMemoryURLIndex::String16Vector terms; + EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size()); + String16Vector original_terms; // Ensure title is being searched. - terms.push_back(ASCIIToUTF16("MORTGAGE")); - terms.push_back(ASCIIToUTF16("RATE")); - terms.push_back(ASCIIToUTF16("DROPS")); - ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); + original_terms.push_back(ASCIIToUTF16("MORTGAGE")); + original_terms.push_back(ASCIIToUTF16("RATE")); + original_terms.push_back(ASCIIToUTF16("DROPS")); + ScoredHistoryMatches matches = + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(1U, matches.size()); // Verify that we got back the result we expected. @@ -315,6 +328,54 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) { matches[0].url_info.title()); } +TEST_F(InMemoryURLIndexTest, TitleChange) { + url_index_.reset(new InMemoryURLIndex(FilePath())); + url_index_->Init(this, "en,ja,hi,zh"); + + // Verify current title terms retrieves desired item. + String16Vector original_terms; + original_terms.push_back(ASCIIToUTF16("lebronomics")); + original_terms.push_back(ASCIIToUTF16("could")); + original_terms.push_back(ASCIIToUTF16("high")); + original_terms.push_back(ASCIIToUTF16("taxes")); + original_terms.push_back(ASCIIToUTF16("influence")); + ScoredHistoryMatches matches = + url_index_->HistoryItemsForTerms(original_terms); + ASSERT_EQ(1U, matches.size()); + + // Verify that we got back the result we expected. + const URLID expected_id = 3; + EXPECT_EQ(expected_id, matches[0].url_info.id()); + EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx", + matches[0].url_info.url().spec()); + EXPECT_EQ(ASCIIToUTF16( + "LeBronomics: Could High Taxes Influence James' Team Decision?"), + matches[0].url_info.title()); + URLRow old_row(matches[0].url_info); + + // Verify new title terms retrieves nothing. + String16Vector new_terms; + new_terms.push_back(ASCIIToUTF16("does")); + new_terms.push_back(ASCIIToUTF16("eat")); + new_terms.push_back(ASCIIToUTF16("oats")); + new_terms.push_back(ASCIIToUTF16("little")); + new_terms.push_back(ASCIIToUTF16("lambs")); + new_terms.push_back(ASCIIToUTF16("ivy")); + matches = url_index_->HistoryItemsForTerms(new_terms); + ASSERT_EQ(0U, matches.size()); + + // Update the row. + old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy")); + url_index_->UpdateURL(expected_id, old_row); + + // Verify we get the row using the new terms but not the original terms. + matches = url_index_->HistoryItemsForTerms(new_terms); + ASSERT_EQ(1U, matches.size()); + EXPECT_EQ(expected_id, matches[0].url_info.id()); + matches = url_index_->HistoryItemsForTerms(original_terms); + ASSERT_EQ(0U, matches.size()); +} + TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) { url_index_.reset(new InMemoryURLIndex(FilePath())); url_index_->Init(this, "en,ja,hi,zh"); @@ -345,101 +406,6 @@ TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) { EXPECT_EQ(28, matches[0].url_info.id()); } -TEST_F(InMemoryURLIndexTest, StaticFunctions) { - // Test WordVectorFromString16 - string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); - InMemoryURLIndex::String16Vector string_vec = - InMemoryURLIndex::WordVectorFromString16(string_a, false); - ASSERT_EQ(7U, string_vec.size()); - // See if we got the words we expected. - EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); - - string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true); - ASSERT_EQ(5U, string_vec.size()); - EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); - EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); - EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); - EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]); - EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]); - - // Test WordSetFromString16 - string16 string_b(ASCIIToUTF16( - "http://web.google.com/search Google Web Search")); - InMemoryURLIndex::String16Set string_set = - InMemoryURLIndex::WordSetFromString16(string_b); - EXPECT_EQ(5U, string_set.size()); - // See if we got the words we expected. - EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end()); - EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end()); - - // Test SortAndDeoverlap - TermMatches matches_a; - matches_a.push_back(TermMatch(1, 13, 10)); - matches_a.push_back(TermMatch(2, 23, 10)); - matches_a.push_back(TermMatch(3, 3, 10)); - matches_a.push_back(TermMatch(4, 40, 5)); - TermMatches matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Nothing should have been eliminated. - EXPECT_EQ(matches_a.size(), matches_b.size()); - // The order should now be 3, 1, 2, 4. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(4, matches_b[3].term_num); - matches_a.push_back(TermMatch(5, 18, 10)); - matches_a.push_back(TermMatch(6, 38, 5)); - matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a); - // Two matches should have been eliminated. - EXPECT_EQ(matches_a.size() - 2, matches_b.size()); - // The order should now be 3, 1, 2, 6. - EXPECT_EQ(3, matches_b[0].term_num); - EXPECT_EQ(1, matches_b[1].term_num); - EXPECT_EQ(2, matches_b[2].term_num); - EXPECT_EQ(6, matches_b[3].term_num); - - // Test MatchTermInString - TermMatches matches_c = InMemoryURLIndex::MatchTermInString( - UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123); - ASSERT_EQ(11U, matches_c.size()); - const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 }; - for (int i = 0; i < 11; ++i) - EXPECT_EQ(expected_offsets[i], matches_c[i].offset); -} - -TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) { - // Test OffsetsFromTermMatches - history::TermMatches matches_a; - matches_a.push_back(history::TermMatch(1, 1, 2)); - matches_a.push_back(history::TermMatch(2, 4, 3)); - matches_a.push_back(history::TermMatch(3, 9, 1)); - matches_a.push_back(history::TermMatch(3, 10, 1)); - matches_a.push_back(history::TermMatch(4, 14, 5)); - std::vector<size_t> offsets = - InMemoryURLIndex::OffsetsFromTermMatches(matches_a); - const size_t expected_offsets_a[] = {1, 4, 9, 10, 14}; - ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a)); - for (size_t i = 0; i < offsets.size(); ++i) - EXPECT_EQ(expected_offsets_a[i], offsets[i]); - - // Test ReplaceOffsetsInTermMatches - offsets[2] = string16::npos; - history::TermMatches matches_b = - InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets); - const size_t expected_offsets_b[] = {1, 4, 10, 14}; - ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size()); - for (size_t i = 0; i < matches_b.size(); ++i) - EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset); -} - TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Verify that match results for previously typed characters are retained // (in the term_char_word_set_cache_) and reused, if possible, in future @@ -460,55 +426,55 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { // Simulate typing "r" giving "r" in the simulated omnibox. The results for // 'r' will be not cached because it is only 1 character long. - InMemoryURLIndex::String16Vector terms; + String16Vector original_terms; string16 term_r = ASCIIToUTF16("r"); - terms.push_back(term_r); - url_index_->HistoryItemsForTerms(terms); + original_terms.push_back(term_r); + url_index_->HistoryItemsForTerms(original_terms); EXPECT_EQ(0U, cache.size()); // Simulate typing "re" giving "r re" in the simulated omnibox. string16 term_re = ASCIIToUTF16("re"); - terms.push_back(term_re); + original_terms.push_back(term_re); // 're' should be cached at this point but not 'r' as it is a single // character. - ASSERT_EQ(2U, terms.size()); - url_index_->HistoryItemsForTerms(terms); + ASSERT_EQ(2U, original_terms.size()); + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(1U, cache.size()); CheckTerm(cache, term_re); // Simulate typing "reco" giving "r re reco" in the simulated omnibox. string16 term_reco = ASCIIToUTF16("reco"); - terms.push_back(term_reco); + original_terms.push_back(term_reco); // 're' and 'reco' should be cached at this point but not 'r' as it is a // single character. - url_index_->HistoryItemsForTerms(terms); + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(2U, cache.size()); CheckTerm(cache, term_re); CheckTerm(cache, term_reco); - terms.clear(); // Simulate pressing <ESC>. + original_terms.clear(); // Simulate pressing <ESC>. // Simulate typing "mort". string16 term_mort = ASCIIToUTF16("mort"); - terms.push_back(term_mort); + original_terms.push_back(term_mort); // Since we now have only one search term, the cached results for 're' and // 'reco' should be purged, giving us only 1 item in the cache (for 'mort'). - url_index_->HistoryItemsForTerms(terms); + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(1U, cache.size()); CheckTerm(cache, term_mort); // Simulate typing "reco" giving "mort reco" in the simulated omnibox. - terms.push_back(term_reco); - url_index_->HistoryItemsForTerms(terms); + original_terms.push_back(term_reco); + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(2U, cache.size()); CheckTerm(cache, term_mort); CheckTerm(cache, term_reco); // Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'. - terms.resize(terms.size() - 1); + original_terms.resize(original_terms.size() - 1); string16 term_rec = ASCIIToUTF16("rec"); - terms.push_back(term_rec); - url_index_->HistoryItemsForTerms(terms); + original_terms.push_back(term_rec); + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(2U, cache.size()); CheckTerm(cache, term_mort); CheckTerm(cache, term_rec); @@ -552,14 +518,14 @@ TEST_F(InMemoryURLIndexTest, Scoring) { TEST_F(InMemoryURLIndexTest, AddNewRows) { url_index_.reset(new InMemoryURLIndex(FilePath())); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector original_terms; // Verify that the row we're going to add does not already exist. URLID new_row_id = 87654321; // Newly created URLRows get a last_visit time of 'right now' so it should // qualify as a quick result candidate. - terms.push_back(ASCIIToUTF16("brokeandalone")); - EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); + original_terms.push_back(ASCIIToUTF16("brokeandalone")); + EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty()); // Add a new row. URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id); @@ -567,26 +533,27 @@ TEST_F(InMemoryURLIndexTest, AddNewRows) { url_index_->UpdateURL(new_row_id, new_row); // Verify that we can retrieve it. - EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); + EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size()); // Add it again just to be sure that is harmless. url_index_->UpdateURL(new_row_id, new_row); - EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size()); + EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size()); } TEST_F(InMemoryURLIndexTest, DeleteRows) { url_index_.reset(new InMemoryURLIndex(FilePath())); url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; + String16Vector original_terms; // Make sure we actually get an existing result. - terms.push_back(ASCIIToUTF16("DrudgeReport")); - ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); + original_terms.push_back(ASCIIToUTF16("DrudgeReport")); + ScoredHistoryMatches matches = + url_index_->HistoryItemsForTerms(original_terms); ASSERT_EQ(1U, matches.size()); // Determine the row id for that result, delete that id, then search again. url_index_->DeleteURL(matches[0].url_info.id()); - EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); + EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty()); } TEST_F(InMemoryURLIndexTest, WhitelistedURLs) { @@ -697,77 +664,64 @@ TEST_F(InMemoryURLIndexTest, CacheSaveRestore) { url_index.SavePrivateData(&index_cache); // Capture our private data so we can later compare for equality. - int history_item_count(url_index.history_item_count_); - InMemoryURLIndex::String16Vector word_list(url_index.word_list_); - InMemoryURLIndex::WordMap word_map(url_index.word_map_); - InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_); - InMemoryURLIndex::WordIDHistoryMap word_id_history_map( - url_index.word_id_history_map_); - InMemoryURLIndex::HistoryInfoMap history_info_map( - url_index.history_info_map_); + URLIndexPrivateData& private_data(*(url_index_->private_data_)); + String16Vector word_list(private_data.word_list_); + WordMap word_map(private_data.word_map_); + CharWordIDMap char_word_map(private_data.char_word_map_); + WordIDHistoryMap word_id_history_map(private_data.word_id_history_map_); + HistoryIDWordMap history_id_word_map(private_data.history_id_word_map_); + HistoryInfoMap history_info_map(private_data.history_info_map_); // Prove that there is really something there. - EXPECT_GT(url_index.history_item_count_, 0); - EXPECT_FALSE(url_index.word_list_.empty()); - EXPECT_FALSE(url_index.word_map_.empty()); - EXPECT_FALSE(url_index.char_word_map_.empty()); - EXPECT_FALSE(url_index.word_id_history_map_.empty()); - EXPECT_FALSE(url_index.history_info_map_.empty()); + EXPECT_FALSE(private_data.word_list_.empty()); + // available_words_ will already be empty since we have freshly built the + // data set for this test. + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_FALSE(private_data.word_map_.empty()); + EXPECT_FALSE(private_data.char_word_map_.empty()); + EXPECT_FALSE(private_data.word_id_history_map_.empty()); + EXPECT_FALSE(private_data.history_id_word_map_.empty()); + EXPECT_FALSE(private_data.history_info_map_.empty()); // Clear and then prove it's clear. url_index.ClearPrivateData(); - EXPECT_EQ(0, url_index.history_item_count_); - EXPECT_TRUE(url_index.word_list_.empty()); - EXPECT_TRUE(url_index.word_map_.empty()); - EXPECT_TRUE(url_index.char_word_map_.empty()); - EXPECT_TRUE(url_index.word_id_history_map_.empty()); - EXPECT_TRUE(url_index.history_info_map_.empty()); + EXPECT_TRUE(private_data.word_list_.empty()); + EXPECT_TRUE(private_data.available_words_.empty()); + EXPECT_TRUE(private_data.word_map_.empty()); + EXPECT_TRUE(private_data.char_word_map_.empty()); + EXPECT_TRUE(private_data.word_id_history_map_.empty()); + EXPECT_TRUE(private_data.history_id_word_map_.empty()); + EXPECT_TRUE(private_data.history_info_map_.empty()); // Restore the cache. EXPECT_TRUE(url_index.RestorePrivateData(index_cache)); // Compare the restored and captured for equality. - EXPECT_EQ(history_item_count, url_index.history_item_count_); - EXPECT_EQ(word_list.size(), url_index.word_list_.size()); - EXPECT_EQ(word_map.size(), url_index.word_map_.size()); - EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size()); - EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size()); - EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size()); + EXPECT_EQ(word_list.size(), private_data.word_list_.size()); + EXPECT_EQ(word_map.size(), private_data.word_map_.size()); + EXPECT_EQ(char_word_map.size(), private_data.char_word_map_.size()); + EXPECT_EQ(word_id_history_map.size(), + private_data.word_id_history_map_.size()); + EXPECT_EQ(history_id_word_map.size(), + private_data.history_id_word_map_.size()); + EXPECT_EQ(history_info_map.size(), private_data.history_info_map_.size()); // WordList must be index-by-index equal. size_t count = word_list.size(); for (size_t i = 0; i < count; ++i) - EXPECT_EQ(word_list[i], url_index.word_list_[i]); - for (InMemoryURLIndex::CharWordIDMap::const_iterator expected = - char_word_map.begin(); expected != char_word_map.end(); ++expected) { - InMemoryURLIndex::CharWordIDMap::const_iterator actual = - url_index.char_word_map_.find(expected->first); - ASSERT_TRUE(url_index.char_word_map_.end() != actual); - const InMemoryURLIndex::WordIDSet& expected_set(expected->second); - const InMemoryURLIndex::WordIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::WordIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected = - word_id_history_map.begin(); expected != word_id_history_map.end(); - ++expected) { - InMemoryURLIndex::WordIDHistoryMap::const_iterator actual = - url_index.word_id_history_map_.find(expected->first); - ASSERT_TRUE(url_index.word_id_history_map_.end() != actual); - const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second); - const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected = - history_info_map.begin(); expected != history_info_map.end(); - ++expected) { - InMemoryURLIndex::HistoryInfoMap::const_iterator actual = - url_index.history_info_map_.find(expected->first); - ASSERT_FALSE(url_index.history_info_map_.end() == actual); + EXPECT_EQ(word_list[i], private_data.word_list_[i]); + + ExpectMapOfContainersIdentical(char_word_map, + private_data.char_word_map_); + ExpectMapOfContainersIdentical(word_id_history_map, + private_data.word_id_history_map_); + ExpectMapOfContainersIdentical(history_id_word_map, + private_data.history_id_word_map_); + + for (HistoryInfoMap::const_iterator expected = history_info_map.begin(); + expected != history_info_map.end(); ++expected) { + HistoryInfoMap::const_iterator actual = + private_data.history_info_map_.find(expected->first); + ASSERT_FALSE(private_data.history_info_map_.end() == actual); const URLRow& expected_row(expected->second); const URLRow& actual_row(actual->second); EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count()); diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 403699d..0b93e23 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -1362,6 +1362,8 @@ 'browser/history/in_memory_history_backend.h', 'browser/history/in_memory_url_index.cc', 'browser/history/in_memory_url_index.h', + 'browser/history/in_memory_url_index_types.cc', + 'browser/history/in_memory_url_index_types.h', 'browser/history/page_usage_data.cc', 'browser/history/page_usage_data.h', 'browser/history/query_parser.cc', diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index 32a603f..fcbf94c 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1366,6 +1366,7 @@ 'browser/history/history_unittest.cc', 'browser/history/history_unittest_base.cc', 'browser/history/history_unittest_base.h', + 'browser/history/in_memory_url_index_types_unittest.cc', 'browser/history/in_memory_url_index_unittest.cc', 'browser/history/query_parser_unittest.cc', 'browser/history/shortcuts_backend_unittest.cc', |