diff options
Diffstat (limited to 'chrome/browser/history/in_memory_url_index.cc')
-rw-r--r-- | chrome/browser/history/in_memory_url_index.cc | 458 |
1 files changed, 190 insertions, 268 deletions
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 6388686..b92a062 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -11,7 +11,6 @@ #include <numeric> #include "base/file_util.h" -#include "base/i18n/break_iterator.h" #include "base/i18n/case_conversion.h" #include "base/metrics/histogram.h" #include "base/string_util.h" @@ -59,23 +58,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; // each of these scores for each factor. const int kScoreRank[] = { 1425, 1200, 900, 400 }; -ScoredHistoryMatch::ScoredHistoryMatch() - : raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) - : HistoryMatch(url_info, 0, false, false), - raw_score(0), - can_inline(false) {} - -ScoredHistoryMatch::~ScoredHistoryMatch() {} - -// Comparison function for sorting ScoredMatches by their scores. -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, - const ScoredHistoryMatch& m2) { - return m1.raw_score >= m2.raw_score; -} - InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( const WordIDSet& word_id_set, const HistoryIDSet& history_id_set) @@ -88,11 +70,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} -// Comparison function for sorting TermMatches by their offsets. -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { - return m1.offset < m2.offset; -} - // Comparison function for sorting search terms by descending length. bool LengthGreater(const string16& string_a, const string16& string_b) { return string_a.length() > string_b.length(); @@ -137,14 +114,14 @@ int ScoreForValue(int value, const int* value_ranks) { InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) : history_dir_(history_dir), - history_item_count_(0), + private_data_(new URLIndexPrivateData), cached_at_shutdown_(false) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } // Called only by unit tests. InMemoryURLIndex::InMemoryURLIndex() - : history_item_count_(0), + : private_data_(new URLIndexPrivateData), cached_at_shutdown_(false) { InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); } @@ -170,7 +147,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist( // Indexing -bool InMemoryURLIndex::Init(history::URLDatabase* history_db, +bool InMemoryURLIndex::Init(URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; @@ -183,33 +160,47 @@ void InMemoryURLIndex::ShutDown() { cached_at_shutdown_ = true; } -bool InMemoryURLIndex::IndexRow(const URLRow& row) { +void InMemoryURLIndex::IndexRow(const URLRow& row) { const GURL& gurl(row.url()); // Index only URLs with a whitelisted scheme. if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) - return true; + return; + URLID row_id = row.id(); + // Strip out username and password before saving and indexing. string16 url(net::FormatUrl(gurl, languages_, net::kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, NULL, NULL, NULL)); - HistoryID history_id = static_cast<HistoryID>(row.id()); - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); + HistoryID history_id = static_cast<HistoryID>(row_id); + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); // Add the row for quick lookup in the history info store. - URLRow new_row(GURL(url), row.id()); + URLRow new_row(GURL(url), row_id); new_row.set_visit_count(row.visit_count()); new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_[history_id] = new_row; + private_data_->history_info_map_[history_id] = new_row; + + // Index the words contained in the URL and title of the row. + AddRowWordsToIndex(new_row); + return; +} +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { + HistoryID history_id = static_cast<HistoryID>(row.id()); // Split URL into individual, unique words then add in the title words. + const GURL& gurl(row.url()); + string16 url(net::FormatUrl(gurl, languages_, + net::kFormatUrlOmitUsernamePassword, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, + NULL, NULL, NULL)); url = base::i18n::ToLower(url); - String16Set url_words = WordSetFromString16(url); - String16Set title_words = WordSetFromString16(row.title()); + String16Set url_words = String16SetFromString16(url); + String16Set title_words = String16SetFromString16(row.title()); String16Set words; std::set_union(url_words.begin(), url_words.end(), title_words.begin(), title_words.end(), @@ -218,8 +209,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { word_iter != words.end(); ++word_iter) AddWordToIndex(*word_iter, history_id); - ++history_item_count_; - return true; + search_term_cache_.clear(); // Invalidate the term cache. +} + +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { + RemoveRowWordsFromIndex(row); + HistoryID history_id = static_cast<HistoryID>(row.id()); + private_data_->history_info_map_.erase(history_id); +} + +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { + // Remove the entries in history_id_word_map_ and word_id_history_map_ for + // this row. + URLIndexPrivateData& private_data(*(private_data_.get())); + HistoryID history_id = static_cast<HistoryID>(row.id()); + WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; + private_data.history_id_word_map_.erase(history_id); + + // Reconcile any changes to word usage. + for (WordIDSet::iterator word_id_iter = word_id_set.begin(); + word_id_iter != word_id_set.end(); ++word_id_iter) { + WordID word_id = *word_id_iter; + private_data.word_id_history_map_[word_id].erase(history_id); + if (!private_data.word_id_history_map_[word_id].empty()) + continue; // The word is still in use. + + // The word is no longer in use. Reconcile any changes to character usage. + string16 word = private_data.word_list_[word_id]; + Char16Set characters = Char16SetFromString16(word); + for (Char16Set::iterator uni_char_iter = characters.begin(); + uni_char_iter != characters.end(); ++uni_char_iter) { + char16 uni_char = *uni_char_iter; + private_data.char_word_map_[uni_char].erase(word_id); + if (private_data.char_word_map_[uni_char].empty()) + private_data.char_word_map_.erase(uni_char); // No longer in use. + } + + // Complete the removal of references to the word. + private_data.word_id_history_map_.erase(word_id); + private_data.word_map_.erase(word); + private_data.word_list_[word_id] = string16(); + private_data.available_words_.insert(word_id); + } } bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, @@ -236,10 +267,8 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) return false; URLRow row; - while (history_enum.GetNextURL(&row)) { - if (!IndexRow(row)) - return false; - } + while (history_enum.GetNextURL(&row)) + IndexRow(row); UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", base::TimeTicks::Now() - beginning_time); SaveToCacheFile(); @@ -248,12 +277,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, } void InMemoryURLIndex::ClearPrivateData() { - history_item_count_ = 0; - word_list_.clear(); - word_map_.clear(); - char_word_map_.clear(); - word_id_history_map_.clear(); - history_info_map_.clear(); + private_data_->Clear(); search_term_cache_.clear(); } @@ -289,10 +313,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() { UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", base::TimeTicks::Now() - beginning_time); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", + private_data_->history_id_word_map_.size()); UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", + private_data_->word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", + private_data_->char_word_map_.size()); return true; } @@ -327,29 +354,39 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { // indexed and it qualifies then it gets indexed. If it is already // indexed and still qualifies then it gets updated, otherwise it // is deleted from the index. - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); - if (row_pos == history_info_map_.end()) { + HistoryInfoMap::iterator row_pos = + private_data_->history_info_map_.find(row_id); + if (row_pos == private_data_->history_info_map_.end()) { // This new row should be indexed if it qualifies. - if (RowQualifiesAsSignificant(row, base::Time())) - IndexRow(row); + URLRow new_row(row); + new_row.set_id(row_id); + if (RowQualifiesAsSignificant(new_row, base::Time())) + IndexRow(new_row); } else if (RowQualifiesAsSignificant(row, base::Time())) { // This indexed row still qualifies and will be re-indexed. // The url won't have changed but the title, visit count, etc. // might have changed. - URLRow& old_row = row_pos->second; - old_row.set_visit_count(row.visit_count()); - old_row.set_typed_count(row.typed_count()); - old_row.set_last_visit(row.last_visit()); - // TODO(mrossetti): When we start indexing the title the next line - // will need attention. - old_row.set_title(row.title()); + URLRow& updated_row = row_pos->second; + updated_row.set_visit_count(row.visit_count()); + updated_row.set_typed_count(row.typed_count()); + updated_row.set_last_visit(row.last_visit()); + // While the URL is guaranteed to remain stable, the title may have changed. + // If so, then we need to update the index with the changed words. + if (updated_row.title() != row.title()) { + // Clear all words associated with this row and re-index both the + // URL and title. + RemoveRowWordsFromIndex(updated_row); + updated_row.set_title(row.title()); + AddRowWordsToIndex(updated_row); + } } else { - // This indexed row no longer qualifies and will be de-indexed. - history_info_map_.erase(row_id); + // This indexed row no longer qualifies and will be de-indexed by + // clearing all words associated with this row. + URLRow& removed_row = row_pos->second; + RemoveRowFromIndex(removed_row); } // This invalidates the cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } void InMemoryURLIndex::DeleteURL(URLID row_id) { @@ -358,10 +395,9 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { // hits against this row until that map is rebuilt, but since the // history_info_map_ no longer references the row no erroneous results // will propagate to the user. - history_info_map_.erase(row_id); + private_data_->history_info_map_.erase(row_id); // This invalidates the word cache. search_term_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. } // Searching @@ -369,6 +405,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) { ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( const String16Vector& terms) { ScoredHistoryMatches scored_items; + + // Do nothing if we have indexed no words (probably because we've not been + // initialized yet). + if (private_data_->word_list_.empty()) + return scored_items; + if (!terms.empty()) { // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep // approach. @@ -426,7 +468,7 @@ void InMemoryURLIndex::ResetSearchTermCache() { iter->second.used_ = false; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( const string16& uni_string) { // Break the terms down into individual terms (words), get the candidate // set for each term, and intersect each to get a final candidate list. @@ -434,7 +476,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( // a string like "http://www.somewebsite.com" which, from our perspective, // is four words: 'http', 'www', 'somewebsite', and 'com'. HistoryIDSet history_id_set; - String16Vector terms = WordVectorFromString16(uni_string, true); + String16Vector terms = String16VectorFromString16(uni_string, true); // Sort the terms into the longest first as such are likely to narrow down // the results quicker. Also, single character terms are the most expensive // to process so save them for last. @@ -461,7 +503,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( return history_id_set; } -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& term) { if (term.empty()) return HistoryIDSet(); @@ -471,7 +513,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // occuring words in the user's searches. size_t term_length = term.length(); - InMemoryURLIndex::WordIDSet word_id_set; + WordIDSet word_id_set; if (term_length > 1) { // See if this term or a prefix thereof is present in the cache. SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); @@ -517,7 +559,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Reduce the word set with any leftover, unprocessed characters. if (!unique_chars.empty()) { - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); + WordIDSet leftover_set( + private_data_->WordIDSetForTermChars(unique_chars)); // We might come up empty on the leftovers. if (leftover_set.empty()) { search_term_cache_[term] = SearchTermCacheItem(); @@ -540,13 +583,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // contains words which do not have the search term as a proper subset. for (WordIDSet::iterator word_set_iter = word_id_set.begin(); word_set_iter != word_id_set.end(); ) { - if (word_list_[*word_set_iter].find(term) == string16::npos) + if (private_data_->word_list_[*word_set_iter].find(term) == + string16::npos) word_id_set.erase(word_set_iter++); else ++word_set_iter; } } else { - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); + word_id_set = + private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); } // If any words resulted then we can compose a set of history IDs by unioning @@ -556,8 +601,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( for (WordIDSet::iterator word_id_iter = word_id_set.begin(); word_id_iter != word_id_set.end(); ++word_id_iter) { WordID word_id = *word_id_iter; - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); - if (word_iter != word_id_history_map_.end()) { + WordIDHistoryMap::iterator word_iter = + private_data_->word_id_history_map_.find(word_id); + if (word_iter != private_data_->word_id_history_map_.end()) { HistoryIDSet& word_history_id_set(word_iter->second); history_id_set.insert(word_history_id_set.begin(), word_history_id_set.end()); @@ -575,85 +621,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( // Utility Functions -// static -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( - const string16& uni_string) { - const size_t kMaxWordLength = 64; - String16Vector words = WordVectorFromString16(uni_string, false); - String16Set word_set; - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); - ++iter) - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); - return word_set; -} - -// static -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( - const string16& uni_string, - bool break_on_space) { - base::i18n::BreakIterator iter( - uni_string, - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE - : base::i18n::BreakIterator::BREAK_WORD); - String16Vector words; - if (!iter.Init()) - return words; - while (iter.Advance()) { - if (break_on_space || iter.IsWord()) { - string16 word = iter.GetString(); - if (break_on_space) - TrimWhitespace(word, TRIM_ALL, &word); - if (!word.empty()) - words.push_back(word); - } - } - return words; -} - -// static -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( - const string16& term) { - Char16Set characters; - for (string16::const_iterator iter = term.begin(); iter != term.end(); - ++iter) - characters.insert(*iter); - return characters; -} - void InMemoryURLIndex::AddWordToIndex(const string16& term, HistoryID history_id) { - WordMap::iterator word_pos = word_map_.find(term); - if (word_pos != word_map_.end()) + WordMap::iterator word_pos = private_data_->word_map_.find(term); + if (word_pos != private_data_->word_map_.end()) UpdateWordHistory(word_pos->second, history_id); else AddWordHistory(term, history_id); } void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); - DCHECK(history_pos != word_id_history_map_.end()); - HistoryIDSet& history_id_set(history_pos->second); - history_id_set.insert(history_id); + WordIDHistoryMap::iterator history_pos = + private_data_->word_id_history_map_.find(word_id); + DCHECK(history_pos != private_data_->word_id_history_map_.end()); + HistoryIDSet& history_id_set(history_pos->second); + history_id_set.insert(history_id); + private_data_->AddToHistoryIDWordMap(history_id, word_id); } // Add a new word to the word list and the word map, and then create a // new entry in the word/history map. void InMemoryURLIndex::AddWordHistory(const string16& term, HistoryID history_id) { - word_list_.push_back(term); - WordID word_id = word_list_.size() - 1; - word_map_[term] = word_id; + URLIndexPrivateData& private_data(*(private_data_.get())); + WordID word_id = private_data.word_list_.size(); + if (private_data.available_words_.empty()) { + private_data.word_list_.push_back(term); + } else { + word_id = *(private_data.available_words_.begin()); + private_data.word_list_[word_id] = term; + private_data.available_words_.erase(word_id); + } + private_data.word_map_[term] = word_id; + HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_[word_id] = history_id_set; + private_data.word_id_history_map_[word_id] = history_id_set; + private_data.AddToHistoryIDWordMap(history_id, word_id); + // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. Char16Set characters = Char16SetFromString16(term); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { char16 uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter != char_word_map_.end()) { + CharWordIDMap::iterator char_iter = + private_data.char_word_map_.find(uni_char); + if (char_iter != private_data.char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); word_id_set.insert(word_id); @@ -661,108 +675,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_[uni_char] = word_id_set; - } - } -} - -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - const Char16Set& term_chars) { - WordIDSet word_id_set; - for (Char16Set::const_iterator c_iter = term_chars.begin(); - c_iter != term_chars.end(); ++c_iter) { - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); - if (char_iter == char_word_map_.end()) { - // A character was not found so there are no matching results: bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - // It is possible for there to no longer be any words associated with - // a particular character. Give up in that case. - if (char_word_id_set.empty()) { - word_id_set.clear(); - break; - } - - if (c_iter == term_chars.begin()) { - // First character results becomes base set of results. - word_id_set = char_word_id_set; - } else { - // Subsequent character results get intersected in. - WordIDSet new_word_id_set; - std::set_intersection(word_id_set.begin(), word_id_set.end(), - char_word_id_set.begin(), char_word_id_set.end(), - std::inserter(new_word_id_set, - new_word_id_set.begin())); - word_id_set.swap(new_word_id_set); + private_data.char_word_map_[uni_char] = word_id_set; } } - return word_id_set; -} - -// static -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, - const string16& string, - int term_num) { - const size_t kMaxCompareLength = 2048; - const string16& short_string = (string.length() > kMaxCompareLength) ? - string.substr(0, kMaxCompareLength) : string; - TermMatches matches; - for (size_t location = short_string.find(term); location != string16::npos; - location = short_string.find(term, location + 1)) { - matches.push_back(TermMatch(term_num, location, term.length())); - } - return matches; -} - -// static -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { - if (matches.empty()) - return matches; - TermMatches sorted_matches = matches; - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); - TermMatches clean_matches; - TermMatch last_match = sorted_matches[0]; - clean_matches.push_back(last_match); - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; - iter != sorted_matches.end(); ++iter) { - if (iter->offset >= last_match.offset + last_match.length) { - last_match = *iter; - clean_matches.push_back(last_match); - } - } - return clean_matches; -} - -// static -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( - const TermMatches& matches) { - std::vector<size_t> offsets; - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) - offsets.push_back(i->offset); - return offsets; -} - -// static -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( - const TermMatches& matches, - const std::vector<size_t>& offsets) { - DCHECK_EQ(matches.size(), offsets.size()); - TermMatches new_matches; - std::vector<size_t>::const_iterator offset_iter = offsets.begin(); - for (TermMatches::const_iterator term_iter = matches.begin(); - term_iter != matches.end(); ++term_iter, ++offset_iter) { - if (*offset_iter != string16::npos) { - TermMatch new_match(*term_iter); - new_match.offset = *offset_iter; - new_matches.push_back(new_match); - } - } - return new_matches; } // static +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( const URLRow& row, const String16Vector& terms) { @@ -791,8 +710,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( } // Sort matches by offset and eliminate any which overlap. - match.url_matches = SortAndDeoverlap(match.url_matches); - match.title_matches = SortAndDeoverlap(match.title_matches); + match.url_matches = SortAndDeoverlapMatches(match.url_matches); + match.title_matches = SortAndDeoverlapMatches(match.title_matches); // We should not (currently) inline autocomplete a result unless both of the // following are true: @@ -906,19 +825,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( const InMemoryURLIndex& index, const String16Vector& lower_terms) : index_(index), - lower_terms_(lower_terms) { -} + lower_terms_(lower_terms) {} InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} -void InMemoryURLIndex::AddHistoryMatch::operator()( - const InMemoryURLIndex::HistoryID history_id) { +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = - index_.history_info_map_.find(history_id); + index_.private_data_->history_info_map_.find(history_id); // Note that a history_id may be present in the word_id_history_map_ yet not // be found in the history_info_map_. This occurs when an item has been // deleted by the user or the item no longer qualifies as a quick result. - if (hist_pos != index_.history_info_map_.end()) { + if (hist_pos != index_.private_data_->history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); if (match.raw_score > 0) @@ -940,7 +857,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { DCHECK(cache); cache->set_timestamp(base::Time::Now().ToInternalValue()); - cache->set_history_item_count(history_item_count_); + // history_item_count_ is no longer used but rather than change the protobuf + // definition use a placeholder. This will go away with the switch to SQLite. + cache->set_history_item_count(0); SaveWordList(cache); SaveWordMap(cache); SaveCharWordMap(cache); @@ -951,20 +870,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { bool InMemoryURLIndex::RestorePrivateData( const InMemoryURLIndexCacheItem& cache) { last_saved_ = base::Time::FromInternalValue(cache.timestamp()); - history_item_count_ = cache.history_item_count(); - return (history_item_count_ == 0) || (RestoreWordList(cache) && - RestoreWordMap(cache) && RestoreCharWordMap(cache) && - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); + return RestoreWordList(cache) && RestoreWordMap(cache) && + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && + RestoreHistoryInfoMap(cache); } - void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { - if (word_list_.empty()) + if (private_data_->word_list_.empty()) return; WordListItem* list_item = cache->mutable_word_list(); - list_item->set_word_count(word_list_.size()); - for (String16Vector::const_iterator iter = word_list_.begin(); - iter != word_list_.end(); ++iter) + list_item->set_word_count(private_data_->word_list_.size()); + for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); + iter != private_data_->word_list_.end(); ++iter) list_item->add_word(UTF16ToUTF8(*iter)); } @@ -979,17 +896,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<std::string>& words(list_item.word()); for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); iter != words.end(); ++iter) - word_list_.push_back(UTF8ToUTF16(*iter)); + private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); return true; } void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { - if (word_map_.empty()) + if (private_data_->word_map_.empty()) return; WordMapItem* map_item = cache->mutable_word_map(); - map_item->set_item_count(word_map_.size()); - for (WordMap::const_iterator iter = word_map_.begin(); - iter != word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_map_.size()); + for (WordMap::const_iterator iter = private_data_->word_map_.begin(); + iter != private_data_->word_map_.end(); ++iter) { WordMapEntry* map_entry = map_item->add_word_map_entry(); map_entry->set_word(UTF16ToUTF8(iter->first)); map_entry->set_word_id(iter->second); @@ -1007,17 +924,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); iter != entries.end(); ++iter) - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); return true; } void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { - if (char_word_map_.empty()) + if (private_data_->char_word_map_.empty()) return; CharWordMapItem* map_item = cache->mutable_char_word_map(); - map_item->set_item_count(char_word_map_.size()); - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); - iter != char_word_map_.end(); ++iter) { + map_item->set_item_count(private_data_->char_word_map_.size()); + for (CharWordIDMap::const_iterator iter = + private_data_->char_word_map_.begin(); + iter != private_data_->char_word_map_.end(); ++iter) { CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); map_entry->set_char_16(iter->first); const WordIDSet& word_id_set(iter->second); @@ -1051,19 +969,20 @@ bool InMemoryURLIndex::RestoreCharWordMap( for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); jiter != word_ids.end(); ++jiter) word_id_set.insert(*jiter); - char_word_map_[uni_char] = word_id_set; + private_data_->char_word_map_[uni_char] = word_id_set; } return true; } void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) const { - if (word_id_history_map_.empty()) + if (private_data_->word_id_history_map_.empty()) return; WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); - map_item->set_item_count(word_id_history_map_.size()); - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); - iter != word_id_history_map_.end(); ++iter) { + map_item->set_item_count(private_data_->word_id_history_map_.size()); + for (WordIDHistoryMap::const_iterator iter = + private_data_->word_id_history_map_.begin(); + iter != private_data_->word_id_history_map_.end(); ++iter) { WordIDHistoryMapEntry* map_entry = map_item->add_word_id_history_map_entry(); map_entry->set_word_id(iter->first); @@ -1096,21 +1015,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap( HistoryIDSet history_id_set; const RepeatedField<int64>& history_ids(iter->history_id()); for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); - jiter != history_ids.end(); ++jiter) + jiter != history_ids.end(); ++jiter) { history_id_set.insert(*jiter); - word_id_history_map_[word_id] = history_id_set; + private_data_->AddToHistoryIDWordMap(*jiter, word_id); + } + private_data_->word_id_history_map_[word_id] = history_id_set; } return true; } void InMemoryURLIndex::SaveHistoryInfoMap( InMemoryURLIndexCacheItem* cache) const { - if (history_info_map_.empty()) + if (private_data_->history_info_map_.empty()) return; HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); - map_item->set_item_count(history_info_map_.size()); - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); - iter != history_info_map_.end(); ++iter) { + map_item->set_item_count(private_data_->history_info_map_.size()); + for (HistoryInfoMap::const_iterator iter = + private_data_->history_info_map_.begin(); + iter != private_data_->history_info_map_.end(); ++iter) { HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); map_entry->set_history_id(iter->first); const URLRow& url_row(iter->second); @@ -1148,7 +1070,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap( string16 title(UTF8ToUTF16(iter->title())); url_row.set_title(title); } - history_info_map_[history_id] = url_row; + private_data_->history_info_map_[history_id] = url_row; } return true; } |