1 files changed, 190 insertions, 268 deletions
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 6388686..b92a062 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -11,7 +11,6 @@
 #include <numeric>
 
 #include "base/file_util.h"
-#include "base/i18n/break_iterator.h"
 #include "base/i18n/case_conversion.h"
 #include "base/metrics/histogram.h"
 #include "base/string_util.h"
@@ -59,23 +58,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
 // each of these scores for each factor.
 const int kScoreRank[] = { 1425, 1200, 900, 400 };
 
-ScoredHistoryMatch::ScoredHistoryMatch()
-    : raw_score(0),
-      can_inline(false) {}
-
-ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
-    : HistoryMatch(url_info, 0, false, false),
-      raw_score(0),
-      can_inline(false) {}
-
-ScoredHistoryMatch::~ScoredHistoryMatch() {}
-
-// Comparison function for sorting ScoredMatches by their scores.
-bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
-                                           const ScoredHistoryMatch& m2) {
-  return m1.raw_score >= m2.raw_score;
-}
-
 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
     const WordIDSet& word_id_set,
     const HistoryIDSet& history_id_set)
@@ -88,11 +70,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()
 
 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}
 
-// Comparison function for sorting TermMatches by their offsets.
-bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
-  return m1.offset < m2.offset;
-}
-
 // Comparison function for sorting search terms by descending length.
 bool LengthGreater(const string16& string_a, const string16& string_b) {
   return string_a.length() > string_b.length();
@@ -137,14 +114,14 @@ int ScoreForValue(int value, const int* value_ranks) {
 
 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
     : history_dir_(history_dir),
-      history_item_count_(0),
+      private_data_(new URLIndexPrivateData),
       cached_at_shutdown_(false) {
   InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
 }
 
 // Called only by unit tests.
 InMemoryURLIndex::InMemoryURLIndex()
-    : history_item_count_(0),
+    : private_data_(new URLIndexPrivateData),
       cached_at_shutdown_(false) {
   InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
 }
@@ -170,7 +147,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist(
 
 // Indexing
 
-bool InMemoryURLIndex::Init(history::URLDatabase* history_db,
+bool InMemoryURLIndex::Init(URLDatabase* history_db,
                             const std::string& languages) {
   // TODO(mrossetti): Register for profile/language change notifications.
   languages_ = languages;
@@ -183,33 +160,47 @@ void InMemoryURLIndex::ShutDown() {
   cached_at_shutdown_ = true;
 }
 
-bool InMemoryURLIndex::IndexRow(const URLRow& row) {
+void InMemoryURLIndex::IndexRow(const URLRow& row) {
   const GURL& gurl(row.url());
 
   // Index only URLs with a whitelisted scheme.
   if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
-    return true;
+    return;
 
+  URLID row_id = row.id();
+  // Strip out username and password before saving and indexing.
   string16 url(net::FormatUrl(gurl, languages_,
       net::kFormatUrlOmitUsernamePassword,
       UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
       NULL, NULL, NULL));
 
-  HistoryID history_id = static_cast<HistoryID>(row.id());
-  DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
+  HistoryID history_id = static_cast<HistoryID>(row_id);
+  DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
 
   // Add the row for quick lookup in the history info store.
-  URLRow new_row(GURL(url), row.id());
+  URLRow new_row(GURL(url), row_id);
   new_row.set_visit_count(row.visit_count());
   new_row.set_typed_count(row.typed_count());
   new_row.set_last_visit(row.last_visit());
   new_row.set_title(row.title());
-  history_info_map_[history_id] = new_row;
+  private_data_->history_info_map_[history_id] = new_row;
+
+  // Index the words contained in the URL and title of the row.
+  AddRowWordsToIndex(new_row);
+  return;
+}
 
+void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {
+  HistoryID history_id = static_cast<HistoryID>(row.id());
   // Split URL into individual, unique words then add in the title words.
+  const GURL& gurl(row.url());
+  string16 url(net::FormatUrl(gurl, languages_,
+      net::kFormatUrlOmitUsernamePassword,
+      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
+      NULL, NULL, NULL));
   url = base::i18n::ToLower(url);
-  String16Set url_words = WordSetFromString16(url);
-  String16Set title_words = WordSetFromString16(row.title());
+  String16Set url_words = String16SetFromString16(url);
+  String16Set title_words = String16SetFromString16(row.title());
   String16Set words;
   std::set_union(url_words.begin(), url_words.end(),
                  title_words.begin(), title_words.end(),
@@ -218,8 +209,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) {
        word_iter != words.end(); ++word_iter)
     AddWordToIndex(*word_iter, history_id);
 
-  ++history_item_count_;
-  return true;
+  search_term_cache_.clear();  // Invalidate the term cache.
+}
+
+void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {
+  RemoveRowWordsFromIndex(row);
+  HistoryID history_id = static_cast<HistoryID>(row.id());
+  private_data_->history_info_map_.erase(history_id);
+}
+
+void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {
+  // Remove the entries in history_id_word_map_ and word_id_history_map_ for
+  // this row.
+  URLIndexPrivateData& private_data(*(private_data_.get()));
+  HistoryID history_id = static_cast<HistoryID>(row.id());
+  WordIDSet word_id_set = private_data.history_id_word_map_[history_id];
+  private_data.history_id_word_map_.erase(history_id);
+
+  // Reconcile any changes to word usage.
+  for (WordIDSet::iterator word_id_iter = word_id_set.begin();
+       word_id_iter != word_id_set.end(); ++word_id_iter) {
+    WordID word_id = *word_id_iter;
+    private_data.word_id_history_map_[word_id].erase(history_id);
+    if (!private_data.word_id_history_map_[word_id].empty())
+      continue;  // The word is still in use.
+
+    // The word is no longer in use. Reconcile any changes to character usage.
+    string16 word = private_data.word_list_[word_id];
+    Char16Set characters = Char16SetFromString16(word);
+    for (Char16Set::iterator uni_char_iter = characters.begin();
+         uni_char_iter != characters.end(); ++uni_char_iter) {
+      char16 uni_char = *uni_char_iter;
+      private_data.char_word_map_[uni_char].erase(word_id);
+      if (private_data.char_word_map_[uni_char].empty())
+        private_data.char_word_map_.erase(uni_char);  // No longer in use.
+    }
+
+    // Complete the removal of references to the word.
+    private_data.word_id_history_map_.erase(word_id);
+    private_data.word_map_.erase(word);
+    private_data.word_list_[word_id] = string16();
+    private_data.available_words_.insert(word_id);
+  }
 }
 
 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
@@ -236,10 +267,8 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
     if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
       return false;
     URLRow row;
-    while (history_enum.GetNextURL(&row)) {
-      if (!IndexRow(row))
-        return false;
-    }
+    while (history_enum.GetNextURL(&row))
+      IndexRow(row);
     UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
                         base::TimeTicks::Now() - beginning_time);
     SaveToCacheFile();
@@ -248,12 +277,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
 }
 
 void InMemoryURLIndex::ClearPrivateData() {
-  history_item_count_ = 0;
-  word_list_.clear();
-  word_map_.clear();
-  char_word_map_.clear();
-  word_id_history_map_.clear();
-  history_info_map_.clear();
+  private_data_->Clear();
   search_term_cache_.clear();
 }
 
@@ -289,10 +313,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() {
 
   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
                       base::TimeTicks::Now() - beginning_time);
-  UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
+  UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
+                       private_data_->history_id_word_map_.size());
   UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
-  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
-  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
+  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
+                             private_data_->word_map_.size());
+  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
+                             private_data_->char_word_map_.size());
   return true;
 }
 
@@ -327,29 +354,39 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
   // indexed and it qualifies then it gets indexed. If it is already
   // indexed and still qualifies then it gets updated, otherwise it
   // is deleted from the index.
-  HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
-  if (row_pos == history_info_map_.end()) {
+  HistoryInfoMap::iterator row_pos =
+      private_data_->history_info_map_.find(row_id);
+  if (row_pos == private_data_->history_info_map_.end()) {
     // This new row should be indexed if it qualifies.
-    if (RowQualifiesAsSignificant(row, base::Time()))
-      IndexRow(row);
+    URLRow new_row(row);
+    new_row.set_id(row_id);
+    if (RowQualifiesAsSignificant(new_row, base::Time()))
+      IndexRow(new_row);
   } else if (RowQualifiesAsSignificant(row, base::Time())) {
     // This indexed row still qualifies and will be re-indexed.
     // The url won't have changed but the title, visit count, etc.
     // might have changed.
-    URLRow& old_row = row_pos->second;
-    old_row.set_visit_count(row.visit_count());
-    old_row.set_typed_count(row.typed_count());
-    old_row.set_last_visit(row.last_visit());
-    // TODO(mrossetti): When we start indexing the title the next line
-    // will need attention.
-    old_row.set_title(row.title());
+    URLRow& updated_row = row_pos->second;
+    updated_row.set_visit_count(row.visit_count());
+    updated_row.set_typed_count(row.typed_count());
+    updated_row.set_last_visit(row.last_visit());
+    // While the URL is guaranteed to remain stable, the title may have changed.
+    // If so, then we need to update the index with the changed words.
+    if (updated_row.title() != row.title()) {
+      // Clear all words associated with this row and re-index both the
+      // URL and title.
+      RemoveRowWordsFromIndex(updated_row);
+      updated_row.set_title(row.title());
+      AddRowWordsToIndex(updated_row);
+    }
   } else {
-    // This indexed row no longer qualifies and will be de-indexed.
-    history_info_map_.erase(row_id);
+    // This indexed row no longer qualifies and will be de-indexed by
+    // clearing all words associated with this row.
+    URLRow& removed_row = row_pos->second;
+    RemoveRowFromIndex(removed_row);
   }
   // This invalidates the cache.
   search_term_cache_.clear();
-  // TODO(mrossetti): Record this transaction in the cache.
 }
 
 void InMemoryURLIndex::DeleteURL(URLID row_id) {
@@ -358,10 +395,9 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) {
   // hits against this row until that map is rebuilt, but since the
   // history_info_map_ no longer references the row no erroneous results
   // will propagate to the user.
-  history_info_map_.erase(row_id);
+  private_data_->history_info_map_.erase(row_id);
   // This invalidates the word cache.
   search_term_cache_.clear();
-  // TODO(mrossetti): Record this transaction in the cache.
 }
 
 // Searching
@@ -369,6 +405,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) {
 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
     const String16Vector& terms) {
   ScoredHistoryMatches scored_items;
+
+  // Do nothing if we have indexed no words (probably because we've not been
+  // initialized yet).
+  if (private_data_->word_list_.empty())
+    return scored_items;
+
   if (!terms.empty()) {
     // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
     // approach.
@@ -426,7 +468,7 @@ void InMemoryURLIndex::ResetSearchTermCache() {
     iter->second.used_ = false;
 }
 
-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
+HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
     const string16& uni_string) {
   // Break the terms down into individual terms (words), get the candidate
   // set for each term, and intersect each to get a final candidate list.
@@ -434,7 +476,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
   // a string like "http://www.somewebsite.com" which, from our perspective,
   // is four words: 'http', 'www', 'somewebsite', and 'com'.
   HistoryIDSet history_id_set;
-  String16Vector terms = WordVectorFromString16(uni_string, true);
+  String16Vector terms = String16VectorFromString16(uni_string, true);
   // Sort the terms into the longest first as such are likely to narrow down
   // the results quicker. Also, single character terms are the most expensive
   // to process so save them for last.
@@ -461,7 +503,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
   return history_id_set;
 }
 
-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
+HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
     const string16& term) {
   if (term.empty())
     return HistoryIDSet();
@@ -471,7 +513,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
   // occuring words in the user's searches.
 
   size_t term_length = term.length();
-  InMemoryURLIndex::WordIDSet word_id_set;
+  WordIDSet word_id_set;
   if (term_length > 1) {
     // See if this term or a prefix thereof is present in the cache.
     SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
@@ -517,7 +559,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
 
     // Reduce the word set with any leftover, unprocessed characters.
     if (!unique_chars.empty()) {
-      WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
+      WordIDSet leftover_set(
+          private_data_->WordIDSetForTermChars(unique_chars));
       // We might come up empty on the leftovers.
       if (leftover_set.empty()) {
         search_term_cache_[term] = SearchTermCacheItem();
@@ -540,13 +583,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
     // contains words which do not have the search term as a proper subset.
     for (WordIDSet::iterator word_set_iter = word_id_set.begin();
          word_set_iter != word_id_set.end(); ) {
-      if (word_list_[*word_set_iter].find(term) == string16::npos)
+      if (private_data_->word_list_[*word_set_iter].find(term) ==
+          string16::npos)
         word_id_set.erase(word_set_iter++);
       else
         ++word_set_iter;
     }
   } else {
-    word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
+    word_id_set =
+        private_data_->WordIDSetForTermChars(Char16SetFromString16(term));
   }
 
   // If any words resulted then we can compose a set of history IDs by unioning
@@ -556,8 +601,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
     for (WordIDSet::iterator word_id_iter = word_id_set.begin();
          word_id_iter != word_id_set.end(); ++word_id_iter) {
       WordID word_id = *word_id_iter;
-      WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
-      if (word_iter != word_id_history_map_.end()) {
+      WordIDHistoryMap::iterator word_iter =
+          private_data_->word_id_history_map_.find(word_id);
+      if (word_iter != private_data_->word_id_history_map_.end()) {
         HistoryIDSet& word_history_id_set(word_iter->second);
         history_id_set.insert(word_history_id_set.begin(),
                               word_history_id_set.end());
@@ -575,85 +621,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
 
 // Utility Functions
 
-// static
-InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
-    const string16& uni_string) {
-  const size_t kMaxWordLength = 64;
-  String16Vector words = WordVectorFromString16(uni_string, false);
-  String16Set word_set;
-  for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
-       ++iter)
-    word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
-  return word_set;
-}
-
-// static
-InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
-    const string16& uni_string,
-    bool break_on_space) {
-  base::i18n::BreakIterator iter(
-      uni_string,
-      break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
-                     : base::i18n::BreakIterator::BREAK_WORD);
-  String16Vector words;
-  if (!iter.Init())
-    return words;
-  while (iter.Advance()) {
-    if (break_on_space || iter.IsWord()) {
-      string16 word = iter.GetString();
-      if (break_on_space)
-        TrimWhitespace(word, TRIM_ALL, &word);
-      if (!word.empty())
-        words.push_back(word);
-    }
-  }
-  return words;
-}
-
-// static
-InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
-    const string16& term) {
-  Char16Set characters;
-  for (string16::const_iterator iter = term.begin(); iter != term.end();
-       ++iter)
-    characters.insert(*iter);
-  return characters;
-}
-
 void InMemoryURLIndex::AddWordToIndex(const string16& term,
                                       HistoryID history_id) {
-  WordMap::iterator word_pos = word_map_.find(term);
-  if (word_pos != word_map_.end())
+  WordMap::iterator word_pos = private_data_->word_map_.find(term);
+  if (word_pos != private_data_->word_map_.end())
     UpdateWordHistory(word_pos->second, history_id);
   else
     AddWordHistory(term, history_id);
 }
 
 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
-    WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
-    DCHECK(history_pos != word_id_history_map_.end());
-    HistoryIDSet& history_id_set(history_pos->second);
-    history_id_set.insert(history_id);
+  WordIDHistoryMap::iterator history_pos =
+      private_data_->word_id_history_map_.find(word_id);
+  DCHECK(history_pos != private_data_->word_id_history_map_.end());
+  HistoryIDSet& history_id_set(history_pos->second);
+  history_id_set.insert(history_id);
+  private_data_->AddToHistoryIDWordMap(history_id, word_id);
 }
 
 // Add a new word to the word list and the word map, and then create a
 // new entry in the word/history map.
 void InMemoryURLIndex::AddWordHistory(const string16& term,
                                       HistoryID history_id) {
-  word_list_.push_back(term);
-  WordID word_id = word_list_.size() - 1;
-  word_map_[term] = word_id;
+  URLIndexPrivateData& private_data(*(private_data_.get()));
+  WordID word_id = private_data.word_list_.size();
+  if (private_data.available_words_.empty()) {
+    private_data.word_list_.push_back(term);
+  } else {
+    word_id = *(private_data.available_words_.begin());
+    private_data.word_list_[word_id] = term;
+    private_data.available_words_.erase(word_id);
+  }
+  private_data.word_map_[term] = word_id;
+
   HistoryIDSet history_id_set;
   history_id_set.insert(history_id);
-  word_id_history_map_[word_id] = history_id_set;
+  private_data.word_id_history_map_[word_id] = history_id_set;
+  private_data.AddToHistoryIDWordMap(history_id, word_id);
+
   // For each character in the newly added word (i.e. a word that is not
   // already in the word index), add the word to the character index.
   Char16Set characters = Char16SetFromString16(term);
   for (Char16Set::iterator uni_char_iter = characters.begin();
        uni_char_iter != characters.end(); ++uni_char_iter) {
     char16 uni_char = *uni_char_iter;
-    CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
-    if (char_iter != char_word_map_.end()) {
+    CharWordIDMap::iterator char_iter =
+        private_data.char_word_map_.find(uni_char);
+    if (char_iter != private_data.char_word_map_.end()) {
       // Update existing entry in the char/word index.
       WordIDSet& word_id_set(char_iter->second);
       word_id_set.insert(word_id);
@@ -661,108 +675,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term,
       // Create a new entry in the char/word index.
       WordIDSet word_id_set;
       word_id_set.insert(word_id);
-      char_word_map_[uni_char] = word_id_set;
-    }
-  }
-}
-
-InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
-    const Char16Set& term_chars) {
-  WordIDSet word_id_set;
-  for (Char16Set::const_iterator c_iter = term_chars.begin();
-       c_iter != term_chars.end(); ++c_iter) {
-    CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
-    if (char_iter == char_word_map_.end()) {
-      // A character was not found so there are no matching results: bail.
-      word_id_set.clear();
-      break;
-    }
-    WordIDSet& char_word_id_set(char_iter->second);
-    // It is possible for there to no longer be any words associated with
-    // a particular character. Give up in that case.
-    if (char_word_id_set.empty()) {
-      word_id_set.clear();
-      break;
-    }
-
-    if (c_iter == term_chars.begin()) {
-      // First character results becomes base set of results.
-      word_id_set = char_word_id_set;
-    } else {
-      // Subsequent character results get intersected in.
-      WordIDSet new_word_id_set;
-      std::set_intersection(word_id_set.begin(), word_id_set.end(),
-                            char_word_id_set.begin(), char_word_id_set.end(),
-                            std::inserter(new_word_id_set,
-                                          new_word_id_set.begin()));
-      word_id_set.swap(new_word_id_set);
+      private_data.char_word_map_[uni_char] = word_id_set;
     }
   }
-  return word_id_set;
-}
-
-// static
-TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
-                                                const string16& string,
-                                                int term_num) {
-  const size_t kMaxCompareLength = 2048;
-  const string16& short_string = (string.length() > kMaxCompareLength) ?
-      string.substr(0, kMaxCompareLength) : string;
-  TermMatches matches;
-  for (size_t location = short_string.find(term); location != string16::npos;
-       location = short_string.find(term, location + 1)) {
-    matches.push_back(TermMatch(term_num, location, term.length()));
-  }
-  return matches;
-}
-
-// static
-TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
-  if (matches.empty())
-    return matches;
-  TermMatches sorted_matches = matches;
-  std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
-  TermMatches clean_matches;
-  TermMatch last_match = sorted_matches[0];
-  clean_matches.push_back(last_match);
-  for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
-       iter != sorted_matches.end(); ++iter) {
-    if (iter->offset >= last_match.offset + last_match.length) {
-      last_match = *iter;
-      clean_matches.push_back(last_match);
-    }
-  }
-  return clean_matches;
-}
-
-// static
-std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
-    const TermMatches& matches) {
-  std::vector<size_t> offsets;
-  for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
-    offsets.push_back(i->offset);
-  return offsets;
-}
-
-// static
-TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
-    const TermMatches& matches,
-    const std::vector<size_t>& offsets) {
-  DCHECK_EQ(matches.size(), offsets.size());
-  TermMatches new_matches;
-  std::vector<size_t>::const_iterator offset_iter = offsets.begin();
-  for (TermMatches::const_iterator term_iter = matches.begin();
-       term_iter != matches.end(); ++term_iter, ++offset_iter) {
-    if (*offset_iter != string16::npos) {
-      TermMatch new_match(*term_iter);
-      new_match.offset = *offset_iter;
-      new_matches.push_back(new_match);
-    }
-  }
-  return new_matches;
 }
 
 // static
+// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.
 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
     const URLRow& row,
     const String16Vector& terms) {
@@ -791,8 +710,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
   }
 
   // Sort matches by offset and eliminate any which overlap.
-  match.url_matches = SortAndDeoverlap(match.url_matches);
-  match.title_matches = SortAndDeoverlap(match.title_matches);
+  match.url_matches = SortAndDeoverlapMatches(match.url_matches);
+  match.title_matches = SortAndDeoverlapMatches(match.title_matches);
 
   // We should not (currently) inline autocomplete a result unless both of the
   // following are true:
@@ -906,19 +825,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
     const InMemoryURLIndex& index,
     const String16Vector& lower_terms)
   : index_(index),
-    lower_terms_(lower_terms) {
-}
+    lower_terms_(lower_terms) {}
 
 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
 
-void InMemoryURLIndex::AddHistoryMatch::operator()(
-    const InMemoryURLIndex::HistoryID history_id) {
+void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {
   HistoryInfoMap::const_iterator hist_pos =
-      index_.history_info_map_.find(history_id);
+      index_.private_data_->history_info_map_.find(history_id);
   // Note that a history_id may be present in the word_id_history_map_ yet not
   // be found in the history_info_map_. This occurs when an item has been
   // deleted by the user or the item no longer qualifies as a quick result.
-  if (hist_pos != index_.history_info_map_.end()) {
+  if (hist_pos != index_.private_data_->history_info_map_.end()) {
     const URLRow& hist_item = hist_pos->second;
     ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
     if (match.raw_score > 0)
@@ -940,7 +857,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {
 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
   DCHECK(cache);
   cache->set_timestamp(base::Time::Now().ToInternalValue());
-  cache->set_history_item_count(history_item_count_);
+  // history_item_count_ is no longer used but rather than change the protobuf
+  // definition use a placeholder. This will go away with the switch to SQLite.
+  cache->set_history_item_count(0);
   SaveWordList(cache);
   SaveWordMap(cache);
   SaveCharWordMap(cache);
@@ -951,20 +870,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
 bool InMemoryURLIndex::RestorePrivateData(
     const InMemoryURLIndexCacheItem& cache) {
   last_saved_ = base::Time::FromInternalValue(cache.timestamp());
-  history_item_count_ = cache.history_item_count();
-  return (history_item_count_ == 0) || (RestoreWordList(cache) &&
-      RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
-      RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
+  return RestoreWordList(cache) && RestoreWordMap(cache) &&
+      RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
+      RestoreHistoryInfoMap(cache);
 }
 
-
 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
-  if (word_list_.empty())
+  if (private_data_->word_list_.empty())
     return;
   WordListItem* list_item = cache->mutable_word_list();
-  list_item->set_word_count(word_list_.size());
-  for (String16Vector::const_iterator iter = word_list_.begin();
-       iter != word_list_.end(); ++iter)
+  list_item->set_word_count(private_data_->word_list_.size());
+  for (String16Vector::const_iterator iter = private_data_->word_list_.begin();
+       iter != private_data_->word_list_.end(); ++iter)
     list_item->add_word(UTF16ToUTF8(*iter));
 }
 
@@ -979,17 +896,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
   const RepeatedPtrField<std::string>& words(list_item.word());
   for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
        iter != words.end(); ++iter)
-    word_list_.push_back(UTF8ToUTF16(*iter));
+    private_data_->word_list_.push_back(UTF8ToUTF16(*iter));
   return true;
 }
 
 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
-  if (word_map_.empty())
+  if (private_data_->word_map_.empty())
     return;
   WordMapItem* map_item = cache->mutable_word_map();
-  map_item->set_item_count(word_map_.size());
-  for (WordMap::const_iterator iter = word_map_.begin();
-       iter != word_map_.end(); ++iter) {
+  map_item->set_item_count(private_data_->word_map_.size());
+  for (WordMap::const_iterator iter = private_data_->word_map_.begin();
+       iter != private_data_->word_map_.end(); ++iter) {
     WordMapEntry* map_entry = map_item->add_word_map_entry();
     map_entry->set_word(UTF16ToUTF8(iter->first));
     map_entry->set_word_id(iter->second);
@@ -1007,17 +924,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
   const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
   for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
        iter != entries.end(); ++iter)
-    word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
+    private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
   return true;
 }
 
 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
-  if (char_word_map_.empty())
+  if (private_data_->char_word_map_.empty())
     return;
   CharWordMapItem* map_item = cache->mutable_char_word_map();
-  map_item->set_item_count(char_word_map_.size());
-  for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
-       iter != char_word_map_.end(); ++iter) {
+  map_item->set_item_count(private_data_->char_word_map_.size());
+  for (CharWordIDMap::const_iterator iter =
+          private_data_->char_word_map_.begin();
+       iter != private_data_->char_word_map_.end(); ++iter) {
     CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
     map_entry->set_char_16(iter->first);
     const WordIDSet& word_id_set(iter->second);
@@ -1051,19 +969,20 @@ bool InMemoryURLIndex::RestoreCharWordMap(
     for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
          jiter != word_ids.end(); ++jiter)
       word_id_set.insert(*jiter);
-    char_word_map_[uni_char] = word_id_set;
+    private_data_->char_word_map_[uni_char] = word_id_set;
   }
   return true;
 }
 
 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
     const {
-  if (word_id_history_map_.empty())
+  if (private_data_->word_id_history_map_.empty())
     return;
   WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
-  map_item->set_item_count(word_id_history_map_.size());
-  for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
-       iter != word_id_history_map_.end(); ++iter) {
+  map_item->set_item_count(private_data_->word_id_history_map_.size());
+  for (WordIDHistoryMap::const_iterator iter =
+           private_data_->word_id_history_map_.begin();
+       iter != private_data_->word_id_history_map_.end(); ++iter) {
     WordIDHistoryMapEntry* map_entry =
         map_item->add_word_id_history_map_entry();
     map_entry->set_word_id(iter->first);
@@ -1096,21 +1015,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap(
     HistoryIDSet history_id_set;
     const RepeatedField<int64>& history_ids(iter->history_id());
     for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
-         jiter != history_ids.end(); ++jiter)
+         jiter != history_ids.end(); ++jiter) {
       history_id_set.insert(*jiter);
-    word_id_history_map_[word_id] = history_id_set;
+      private_data_->AddToHistoryIDWordMap(*jiter, word_id);
+    }
+    private_data_->word_id_history_map_[word_id] = history_id_set;
   }
   return true;
 }
 
 void InMemoryURLIndex::SaveHistoryInfoMap(
     InMemoryURLIndexCacheItem* cache) const {
-  if (history_info_map_.empty())
+  if (private_data_->history_info_map_.empty())
     return;
   HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
-  map_item->set_item_count(history_info_map_.size());
-  for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
-       iter != history_info_map_.end(); ++iter) {
+  map_item->set_item_count(private_data_->history_info_map_.size());
+  for (HistoryInfoMap::const_iterator iter =
+           private_data_->history_info_map_.begin();
+       iter != private_data_->history_info_map_.end(); ++iter) {
     HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
     map_entry->set_history_id(iter->first);
     const URLRow& url_row(iter->second);
@@ -1148,7 +1070,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap(
       string16 title(UTF8ToUTF16(iter->title()));
       url_row.set_title(title);
     }
-    history_info_map_[history_id] = url_row;
+    private_data_->history_info_map_[history_id] = url_row;
   }
   return true;
 }