summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-30 03:13:06 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-10-30 03:13:06 +0000
commit0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce (patch)
tree507a523bad332280a11599d0e782bde7da396abb /chrome
parentde614d301995efdb349e998cea13c316ae3add7d (diff)
downloadchromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.zip
chromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.tar.gz
chromium_src-0c33bf8d8c674826c39d9d0e8e7e6a09687e2fce.tar.bz2
Create Private Data for InMemoryURLIndex (in Preparation for SQLite Cache)
1. Encapsulate the private, persistent data for the InMemoryURLIndex in a new class, URLIndexPrivateData (found in in_memory_url_index_types.h). 2. Move most of the support types, including the new URLIndexPrivateData class, into in_memory_url_index_types.h. 3. Correctly handle the adding and removing of page title words when a URL change is detected. 4. Replace static class member functions with non-friend, non-class functions for better flexibility. 5. Move convenience types out from InMemoryURLIndex class up into history namespace. 6. Rename convenience types to generalize their intent. BUG=92718 TEST=Enhanced unit tests. Review URL: http://codereview.chromium.org/8359019 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@107893 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.cc19
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.h5
-rw-r--r--chrome/browser/autocomplete/history_quick_provider_unittest.cc19
-rw-r--r--chrome/browser/history/history_types.h12
-rw-r--r--chrome/browser/history/in_memory_url_index.cc458
-rw-r--r--chrome/browser/history/in_memory_url_index.h147
-rw-r--r--chrome/browser/history/in_memory_url_index_types.cc198
-rw-r--r--chrome/browser/history/in_memory_url_index_types.h208
-rw-r--r--chrome/browser/history/in_memory_url_index_types_unittest.cc107
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc386
-rw-r--r--chrome/chrome_browser.gypi2
-rw-r--r--chrome/chrome_tests.gypi1
12 files changed, 918 insertions, 644 deletions
diff --git a/chrome/browser/autocomplete/history_quick_provider.cc b/chrome/browser/autocomplete/history_quick_provider.cc
index d3f26d4..7dba1cc 100644
--- a/chrome/browser/autocomplete/history_quick_provider.cc
+++ b/chrome/browser/autocomplete/history_quick_provider.cc
@@ -16,6 +16,7 @@
#include "base/utf_string_conversions.h"
#include "chrome/browser/history/history.h"
#include "chrome/browser/history/in_memory_url_index.h"
+#include "chrome/browser/history/in_memory_url_index_types.h"
#include "chrome/browser/net/url_fixer_upper.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/profiles/profile.h"
@@ -81,8 +82,7 @@ void HistoryQuickProvider::Start(const AutocompleteInput& input,
}
}
-// HistoryQuickProvider matches are currently not deletable.
-// TODO(mrossetti): Determine when a match should be deletable.
+// TODO(mrossetti): Implement this function. (Will happen in next CL.)
void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {}
void HistoryQuickProvider::DoAutocomplete() {
@@ -90,8 +90,8 @@ void HistoryQuickProvider::DoAutocomplete() {
string16 term_string = autocomplete_input_.text();
term_string = net::UnescapeURLComponent(term_string,
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
- history::InMemoryURLIndex::String16Vector terms(
- InMemoryURLIndex::WordVectorFromString16(term_string, false));
+ history::String16Vector terms(
+ history::String16VectorFromString16(term_string, false));
ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(terms);
if (matches.empty())
return;
@@ -131,13 +131,12 @@ AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
// Format the URL autocomplete presentation.
std::vector<size_t> offsets =
- InMemoryURLIndex::OffsetsFromTermMatches(history_match.url_matches);
+ OffsetsFromTermMatches(history_match.url_matches);
match.contents =
net::FormatUrlWithOffsets(info.url(), languages_, net::kFormatUrlOmitAll,
UnescapeRule::SPACES, NULL, NULL, &offsets);
history::TermMatches new_matches =
- InMemoryURLIndex::ReplaceOffsetsInTermMatches(history_match.url_matches,
- offsets);
+ ReplaceOffsetsInTermMatches(history_match.url_matches, offsets);
match.contents_class =
SpansFromTermMatch(new_matches, match.contents.length(), true);
match.fill_into_edit = match.contents;
@@ -170,12 +169,6 @@ history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
return history_service->InMemoryIndex();
}
-void HistoryQuickProvider::SetIndexForTesting(
- history::InMemoryURLIndex* index) {
- DCHECK(index);
- index_for_testing_.reset(index);
-}
-
// static
int HistoryQuickProvider::CalculateRelevance(
const ScoredHistoryMatch& history_match,
diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h
index b14fda4..c8dadf9 100644
--- a/chrome/browser/autocomplete/history_quick_provider.h
+++ b/chrome/browser/autocomplete/history_quick_provider.h
@@ -81,7 +81,10 @@ class HistoryQuickProvider : public HistoryProvider {
bool is_url);
// Only for use in unittests. Takes ownership of |index|.
- void SetIndexForTesting(history::InMemoryURLIndex* index);
+ void set_index(history::InMemoryURLIndex* index) {
+ index_for_testing_.reset(index);
+ }
+
AutocompleteInput autocomplete_input_;
std::string languages_;
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
index c1fd137..76a17dc 100644
--- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
@@ -142,6 +142,12 @@ void HistoryQuickProviderTest::OnProviderUpdate(bool updated_matches) {
void HistoryQuickProviderTest::FillData() {
history::URLDatabase* db = history_service_->InMemoryDatabase();
ASSERT_TRUE(db != NULL);
+
+ history::InMemoryURLIndex* index =
+ new history::InMemoryURLIndex(FilePath());
+ PrefService* prefs = profile_->GetPrefs();
+ std::string languages(prefs->GetString(prefs::kAcceptLanguages));
+ index->Init(db, languages);
for (size_t i = 0; i < arraysize(quick_test_db); ++i) {
const TestURLInfo& cur = quick_test_db[i];
const GURL current_url(cur.url);
@@ -153,19 +159,10 @@ void HistoryQuickProviderTest::FillData() {
url_info.set_typed_count(cur.typed_count);
url_info.set_last_visit(visit_time);
url_info.set_hidden(false);
- EXPECT_TRUE(db->AddURL(url_info));
-
- history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title),
- cur.visit_count, cur.typed_count,
- visit_time, false,
- history::SOURCE_BROWSED);
+ index->UpdateURL(i, url_info);
}
- history::InMemoryURLIndex* index = new history::InMemoryURLIndex(FilePath());
- PrefService* prefs = profile_->GetPrefs();
- std::string languages(prefs->GetString(prefs::kAcceptLanguages));
- index->Init(db, languages);
- provider_->SetIndexForTesting(index);
+ provider_->set_index(index);
}
HistoryQuickProviderTest::SetShouldContain::SetShouldContain(
diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h
index 001e4ca..5567052 100644
--- a/chrome/browser/history/history_types.h
+++ b/chrome/browser/history/history_types.h
@@ -74,6 +74,13 @@ class URLRow {
URLRow& operator=(const URLRow& other);
URLID id() const { return id_; }
+
+ // Sets the id of the row. The id should only be manually set when a row has
+ // been retrieved from the history database or other dataset based on criteria
+ // other than its id (i.e. by URL) and when the id has not yet been set in the
+ // row.
+ void set_id(URLID id) { id_ = id; }
+
const GURL& url() const { return url_; }
const string16& title() const {
@@ -140,8 +147,9 @@ class URLRow {
// This excludes objects which autoinitialize such as strings.
void Initialize();
- // The row ID of this URL. Immutable except for the database which sets it
- // when it pulls them out.
+ // The row ID of this URL from the history database. This is immutable except
+ // when retrieving the row from the database or when determining if the URL
+ // referenced by the URLRow already exists in the database.
URLID id_;
// The URL of this row. Immutable except for the database which sets it
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 6388686..b92a062 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -11,7 +11,6 @@
#include <numeric>
#include "base/file_util.h"
-#include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h"
#include "base/metrics/histogram.h"
#include "base/string_util.h"
@@ -59,23 +58,6 @@ const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
// each of these scores for each factor.
const int kScoreRank[] = { 1425, 1200, 900, 400 };
-ScoredHistoryMatch::ScoredHistoryMatch()
- : raw_score(0),
- can_inline(false) {}
-
-ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
- : HistoryMatch(url_info, 0, false, false),
- raw_score(0),
- can_inline(false) {}
-
-ScoredHistoryMatch::~ScoredHistoryMatch() {}
-
-// Comparison function for sorting ScoredMatches by their scores.
-bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
- const ScoredHistoryMatch& m2) {
- return m1.raw_score >= m2.raw_score;
-}
-
InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
const WordIDSet& word_id_set,
const HistoryIDSet& history_id_set)
@@ -88,11 +70,6 @@ InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()
InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}
-// Comparison function for sorting TermMatches by their offsets.
-bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
- return m1.offset < m2.offset;
-}
-
// Comparison function for sorting search terms by descending length.
bool LengthGreater(const string16& string_a, const string16& string_b) {
return string_a.length() > string_b.length();
@@ -137,14 +114,14 @@ int ScoreForValue(int value, const int* value_ranks) {
InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
: history_dir_(history_dir),
- history_item_count_(0),
+ private_data_(new URLIndexPrivateData),
cached_at_shutdown_(false) {
InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
}
// Called only by unit tests.
InMemoryURLIndex::InMemoryURLIndex()
- : history_item_count_(0),
+ : private_data_(new URLIndexPrivateData),
cached_at_shutdown_(false) {
InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
}
@@ -170,7 +147,7 @@ void InMemoryURLIndex::InitializeSchemeWhitelist(
// Indexing
-bool InMemoryURLIndex::Init(history::URLDatabase* history_db,
+bool InMemoryURLIndex::Init(URLDatabase* history_db,
const std::string& languages) {
// TODO(mrossetti): Register for profile/language change notifications.
languages_ = languages;
@@ -183,33 +160,47 @@ void InMemoryURLIndex::ShutDown() {
cached_at_shutdown_ = true;
}
-bool InMemoryURLIndex::IndexRow(const URLRow& row) {
+void InMemoryURLIndex::IndexRow(const URLRow& row) {
const GURL& gurl(row.url());
// Index only URLs with a whitelisted scheme.
if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
- return true;
+ return;
+ URLID row_id = row.id();
+ // Strip out username and password before saving and indexing.
string16 url(net::FormatUrl(gurl, languages_,
net::kFormatUrlOmitUsernamePassword,
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
NULL, NULL, NULL));
- HistoryID history_id = static_cast<HistoryID>(row.id());
- DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
+ HistoryID history_id = static_cast<HistoryID>(row_id);
+ DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
// Add the row for quick lookup in the history info store.
- URLRow new_row(GURL(url), row.id());
+ URLRow new_row(GURL(url), row_id);
new_row.set_visit_count(row.visit_count());
new_row.set_typed_count(row.typed_count());
new_row.set_last_visit(row.last_visit());
new_row.set_title(row.title());
- history_info_map_[history_id] = new_row;
+ private_data_->history_info_map_[history_id] = new_row;
+
+ // Index the words contained in the URL and title of the row.
+ AddRowWordsToIndex(new_row);
+ return;
+}
+void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {
+ HistoryID history_id = static_cast<HistoryID>(row.id());
// Split URL into individual, unique words then add in the title words.
+ const GURL& gurl(row.url());
+ string16 url(net::FormatUrl(gurl, languages_,
+ net::kFormatUrlOmitUsernamePassword,
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
+ NULL, NULL, NULL));
url = base::i18n::ToLower(url);
- String16Set url_words = WordSetFromString16(url);
- String16Set title_words = WordSetFromString16(row.title());
+ String16Set url_words = String16SetFromString16(url);
+ String16Set title_words = String16SetFromString16(row.title());
String16Set words;
std::set_union(url_words.begin(), url_words.end(),
title_words.begin(), title_words.end(),
@@ -218,8 +209,48 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) {
word_iter != words.end(); ++word_iter)
AddWordToIndex(*word_iter, history_id);
- ++history_item_count_;
- return true;
+ search_term_cache_.clear(); // Invalidate the term cache.
+}
+
+void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {
+ RemoveRowWordsFromIndex(row);
+ HistoryID history_id = static_cast<HistoryID>(row.id());
+ private_data_->history_info_map_.erase(history_id);
+}
+
+void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {
+ // Remove the entries in history_id_word_map_ and word_id_history_map_ for
+ // this row.
+ URLIndexPrivateData& private_data(*(private_data_.get()));
+ HistoryID history_id = static_cast<HistoryID>(row.id());
+ WordIDSet word_id_set = private_data.history_id_word_map_[history_id];
+ private_data.history_id_word_map_.erase(history_id);
+
+ // Reconcile any changes to word usage.
+ for (WordIDSet::iterator word_id_iter = word_id_set.begin();
+ word_id_iter != word_id_set.end(); ++word_id_iter) {
+ WordID word_id = *word_id_iter;
+ private_data.word_id_history_map_[word_id].erase(history_id);
+ if (!private_data.word_id_history_map_[word_id].empty())
+ continue; // The word is still in use.
+
+ // The word is no longer in use. Reconcile any changes to character usage.
+ string16 word = private_data.word_list_[word_id];
+ Char16Set characters = Char16SetFromString16(word);
+ for (Char16Set::iterator uni_char_iter = characters.begin();
+ uni_char_iter != characters.end(); ++uni_char_iter) {
+ char16 uni_char = *uni_char_iter;
+ private_data.char_word_map_[uni_char].erase(word_id);
+ if (private_data.char_word_map_[uni_char].empty())
+ private_data.char_word_map_.erase(uni_char); // No longer in use.
+ }
+
+ // Complete the removal of references to the word.
+ private_data.word_id_history_map_.erase(word_id);
+ private_data.word_map_.erase(word);
+ private_data.word_list_[word_id] = string16();
+ private_data.available_words_.insert(word_id);
+ }
}
bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
@@ -236,10 +267,8 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
return false;
URLRow row;
- while (history_enum.GetNextURL(&row)) {
- if (!IndexRow(row))
- return false;
- }
+ while (history_enum.GetNextURL(&row))
+ IndexRow(row);
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
base::TimeTicks::Now() - beginning_time);
SaveToCacheFile();
@@ -248,12 +277,7 @@ bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
}
void InMemoryURLIndex::ClearPrivateData() {
- history_item_count_ = 0;
- word_list_.clear();
- word_map_.clear();
- char_word_map_.clear();
- word_id_history_map_.clear();
- history_info_map_.clear();
+ private_data_->Clear();
search_term_cache_.clear();
}
@@ -289,10 +313,13 @@ bool InMemoryURLIndex::RestoreFromCacheFile() {
UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
base::TimeTicks::Now() - beginning_time);
- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
+ UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
+ private_data_->history_id_word_map_.size());
UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
+ private_data_->word_map_.size());
+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
+ private_data_->char_word_map_.size());
return true;
}
@@ -327,29 +354,39 @@ void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
// indexed and it qualifies then it gets indexed. If it is already
// indexed and still qualifies then it gets updated, otherwise it
// is deleted from the index.
- HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
- if (row_pos == history_info_map_.end()) {
+ HistoryInfoMap::iterator row_pos =
+ private_data_->history_info_map_.find(row_id);
+ if (row_pos == private_data_->history_info_map_.end()) {
// This new row should be indexed if it qualifies.
- if (RowQualifiesAsSignificant(row, base::Time()))
- IndexRow(row);
+ URLRow new_row(row);
+ new_row.set_id(row_id);
+ if (RowQualifiesAsSignificant(new_row, base::Time()))
+ IndexRow(new_row);
} else if (RowQualifiesAsSignificant(row, base::Time())) {
// This indexed row still qualifies and will be re-indexed.
// The url won't have changed but the title, visit count, etc.
// might have changed.
- URLRow& old_row = row_pos->second;
- old_row.set_visit_count(row.visit_count());
- old_row.set_typed_count(row.typed_count());
- old_row.set_last_visit(row.last_visit());
- // TODO(mrossetti): When we start indexing the title the next line
- // will need attention.
- old_row.set_title(row.title());
+ URLRow& updated_row = row_pos->second;
+ updated_row.set_visit_count(row.visit_count());
+ updated_row.set_typed_count(row.typed_count());
+ updated_row.set_last_visit(row.last_visit());
+ // While the URL is guaranteed to remain stable, the title may have changed.
+ // If so, then we need to update the index with the changed words.
+ if (updated_row.title() != row.title()) {
+ // Clear all words associated with this row and re-index both the
+ // URL and title.
+ RemoveRowWordsFromIndex(updated_row);
+ updated_row.set_title(row.title());
+ AddRowWordsToIndex(updated_row);
+ }
} else {
- // This indexed row no longer qualifies and will be de-indexed.
- history_info_map_.erase(row_id);
+ // This indexed row no longer qualifies and will be de-indexed by
+ // clearing all words associated with this row.
+ URLRow& removed_row = row_pos->second;
+ RemoveRowFromIndex(removed_row);
}
// This invalidates the cache.
search_term_cache_.clear();
- // TODO(mrossetti): Record this transaction in the cache.
}
void InMemoryURLIndex::DeleteURL(URLID row_id) {
@@ -358,10 +395,9 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) {
// hits against this row until that map is rebuilt, but since the
// history_info_map_ no longer references the row no erroneous results
// will propagate to the user.
- history_info_map_.erase(row_id);
+ private_data_->history_info_map_.erase(row_id);
// This invalidates the word cache.
search_term_cache_.clear();
- // TODO(mrossetti): Record this transaction in the cache.
}
// Searching
@@ -369,6 +405,12 @@ void InMemoryURLIndex::DeleteURL(URLID row_id) {
ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
const String16Vector& terms) {
ScoredHistoryMatches scored_items;
+
+ // Do nothing if we have indexed no words (probably because we've not been
+ // initialized yet).
+ if (private_data_->word_list_.empty())
+ return scored_items;
+
if (!terms.empty()) {
// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
// approach.
@@ -426,7 +468,7 @@ void InMemoryURLIndex::ResetSearchTermCache() {
iter->second.used_ = false;
}
-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
+HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
const string16& uni_string) {
// Break the terms down into individual terms (words), get the candidate
// set for each term, and intersect each to get a final candidate list.
@@ -434,7 +476,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
// a string like "http://www.somewebsite.com" which, from our perspective,
// is four words: 'http', 'www', 'somewebsite', and 'com'.
HistoryIDSet history_id_set;
- String16Vector terms = WordVectorFromString16(uni_string, true);
+ String16Vector terms = String16VectorFromString16(uni_string, true);
// Sort the terms into the longest first as such are likely to narrow down
// the results quicker. Also, single character terms are the most expensive
// to process so save them for last.
@@ -461,7 +503,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
return history_id_set;
}
-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
+HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
const string16& term) {
if (term.empty())
return HistoryIDSet();
@@ -471,7 +513,7 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
// occuring words in the user's searches.
size_t term_length = term.length();
- InMemoryURLIndex::WordIDSet word_id_set;
+ WordIDSet word_id_set;
if (term_length > 1) {
// See if this term or a prefix thereof is present in the cache.
SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
@@ -517,7 +559,8 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
// Reduce the word set with any leftover, unprocessed characters.
if (!unique_chars.empty()) {
- WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
+ WordIDSet leftover_set(
+ private_data_->WordIDSetForTermChars(unique_chars));
// We might come up empty on the leftovers.
if (leftover_set.empty()) {
search_term_cache_[term] = SearchTermCacheItem();
@@ -540,13 +583,15 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
// contains words which do not have the search term as a proper subset.
for (WordIDSet::iterator word_set_iter = word_id_set.begin();
word_set_iter != word_id_set.end(); ) {
- if (word_list_[*word_set_iter].find(term) == string16::npos)
+ if (private_data_->word_list_[*word_set_iter].find(term) ==
+ string16::npos)
word_id_set.erase(word_set_iter++);
else
++word_set_iter;
}
} else {
- word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
+ word_id_set =
+ private_data_->WordIDSetForTermChars(Char16SetFromString16(term));
}
// If any words resulted then we can compose a set of history IDs by unioning
@@ -556,8 +601,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
for (WordIDSet::iterator word_id_iter = word_id_set.begin();
word_id_iter != word_id_set.end(); ++word_id_iter) {
WordID word_id = *word_id_iter;
- WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
- if (word_iter != word_id_history_map_.end()) {
+ WordIDHistoryMap::iterator word_iter =
+ private_data_->word_id_history_map_.find(word_id);
+ if (word_iter != private_data_->word_id_history_map_.end()) {
HistoryIDSet& word_history_id_set(word_iter->second);
history_id_set.insert(word_history_id_set.begin(),
word_history_id_set.end());
@@ -575,85 +621,53 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
// Utility Functions
-// static
-InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
- const string16& uni_string) {
- const size_t kMaxWordLength = 64;
- String16Vector words = WordVectorFromString16(uni_string, false);
- String16Set word_set;
- for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
- ++iter)
- word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
- return word_set;
-}
-
-// static
-InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
- const string16& uni_string,
- bool break_on_space) {
- base::i18n::BreakIterator iter(
- uni_string,
- break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
- : base::i18n::BreakIterator::BREAK_WORD);
- String16Vector words;
- if (!iter.Init())
- return words;
- while (iter.Advance()) {
- if (break_on_space || iter.IsWord()) {
- string16 word = iter.GetString();
- if (break_on_space)
- TrimWhitespace(word, TRIM_ALL, &word);
- if (!word.empty())
- words.push_back(word);
- }
- }
- return words;
-}
-
-// static
-InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
- const string16& term) {
- Char16Set characters;
- for (string16::const_iterator iter = term.begin(); iter != term.end();
- ++iter)
- characters.insert(*iter);
- return characters;
-}
-
void InMemoryURLIndex::AddWordToIndex(const string16& term,
HistoryID history_id) {
- WordMap::iterator word_pos = word_map_.find(term);
- if (word_pos != word_map_.end())
+ WordMap::iterator word_pos = private_data_->word_map_.find(term);
+ if (word_pos != private_data_->word_map_.end())
UpdateWordHistory(word_pos->second, history_id);
else
AddWordHistory(term, history_id);
}
void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
- WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
- DCHECK(history_pos != word_id_history_map_.end());
- HistoryIDSet& history_id_set(history_pos->second);
- history_id_set.insert(history_id);
+ WordIDHistoryMap::iterator history_pos =
+ private_data_->word_id_history_map_.find(word_id);
+ DCHECK(history_pos != private_data_->word_id_history_map_.end());
+ HistoryIDSet& history_id_set(history_pos->second);
+ history_id_set.insert(history_id);
+ private_data_->AddToHistoryIDWordMap(history_id, word_id);
}
// Add a new word to the word list and the word map, and then create a
// new entry in the word/history map.
void InMemoryURLIndex::AddWordHistory(const string16& term,
HistoryID history_id) {
- word_list_.push_back(term);
- WordID word_id = word_list_.size() - 1;
- word_map_[term] = word_id;
+ URLIndexPrivateData& private_data(*(private_data_.get()));
+ WordID word_id = private_data.word_list_.size();
+ if (private_data.available_words_.empty()) {
+ private_data.word_list_.push_back(term);
+ } else {
+ word_id = *(private_data.available_words_.begin());
+ private_data.word_list_[word_id] = term;
+ private_data.available_words_.erase(word_id);
+ }
+ private_data.word_map_[term] = word_id;
+
HistoryIDSet history_id_set;
history_id_set.insert(history_id);
- word_id_history_map_[word_id] = history_id_set;
+ private_data.word_id_history_map_[word_id] = history_id_set;
+ private_data.AddToHistoryIDWordMap(history_id, word_id);
+
// For each character in the newly added word (i.e. a word that is not
// already in the word index), add the word to the character index.
Char16Set characters = Char16SetFromString16(term);
for (Char16Set::iterator uni_char_iter = characters.begin();
uni_char_iter != characters.end(); ++uni_char_iter) {
char16 uni_char = *uni_char_iter;
- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
- if (char_iter != char_word_map_.end()) {
+ CharWordIDMap::iterator char_iter =
+ private_data.char_word_map_.find(uni_char);
+ if (char_iter != private_data.char_word_map_.end()) {
// Update existing entry in the char/word index.
WordIDSet& word_id_set(char_iter->second);
word_id_set.insert(word_id);
@@ -661,108 +675,13 @@ void InMemoryURLIndex::AddWordHistory(const string16& term,
// Create a new entry in the char/word index.
WordIDSet word_id_set;
word_id_set.insert(word_id);
- char_word_map_[uni_char] = word_id_set;
- }
- }
-}
-
-InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
- const Char16Set& term_chars) {
- WordIDSet word_id_set;
- for (Char16Set::const_iterator c_iter = term_chars.begin();
- c_iter != term_chars.end(); ++c_iter) {
- CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
- if (char_iter == char_word_map_.end()) {
- // A character was not found so there are no matching results: bail.
- word_id_set.clear();
- break;
- }
- WordIDSet& char_word_id_set(char_iter->second);
- // It is possible for there to no longer be any words associated with
- // a particular character. Give up in that case.
- if (char_word_id_set.empty()) {
- word_id_set.clear();
- break;
- }
-
- if (c_iter == term_chars.begin()) {
- // First character results becomes base set of results.
- word_id_set = char_word_id_set;
- } else {
- // Subsequent character results get intersected in.
- WordIDSet new_word_id_set;
- std::set_intersection(word_id_set.begin(), word_id_set.end(),
- char_word_id_set.begin(), char_word_id_set.end(),
- std::inserter(new_word_id_set,
- new_word_id_set.begin()));
- word_id_set.swap(new_word_id_set);
+ private_data.char_word_map_[uni_char] = word_id_set;
}
}
- return word_id_set;
-}
-
-// static
-TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
- const string16& string,
- int term_num) {
- const size_t kMaxCompareLength = 2048;
- const string16& short_string = (string.length() > kMaxCompareLength) ?
- string.substr(0, kMaxCompareLength) : string;
- TermMatches matches;
- for (size_t location = short_string.find(term); location != string16::npos;
- location = short_string.find(term, location + 1)) {
- matches.push_back(TermMatch(term_num, location, term.length()));
- }
- return matches;
-}
-
-// static
-TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
- if (matches.empty())
- return matches;
- TermMatches sorted_matches = matches;
- std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
- TermMatches clean_matches;
- TermMatch last_match = sorted_matches[0];
- clean_matches.push_back(last_match);
- for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
- iter != sorted_matches.end(); ++iter) {
- if (iter->offset >= last_match.offset + last_match.length) {
- last_match = *iter;
- clean_matches.push_back(last_match);
- }
- }
- return clean_matches;
-}
-
-// static
-std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
- const TermMatches& matches) {
- std::vector<size_t> offsets;
- for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
- offsets.push_back(i->offset);
- return offsets;
-}
-
-// static
-TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
- const TermMatches& matches,
- const std::vector<size_t>& offsets) {
- DCHECK_EQ(matches.size(), offsets.size());
- TermMatches new_matches;
- std::vector<size_t>::const_iterator offset_iter = offsets.begin();
- for (TermMatches::const_iterator term_iter = matches.begin();
- term_iter != matches.end(); ++term_iter, ++offset_iter) {
- if (*offset_iter != string16::npos) {
- TermMatch new_match(*term_iter);
- new_match.offset = *offset_iter;
- new_matches.push_back(new_match);
- }
- }
- return new_matches;
}
// static
+// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.
ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
const URLRow& row,
const String16Vector& terms) {
@@ -791,8 +710,8 @@ ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
}
// Sort matches by offset and eliminate any which overlap.
- match.url_matches = SortAndDeoverlap(match.url_matches);
- match.title_matches = SortAndDeoverlap(match.title_matches);
+ match.url_matches = SortAndDeoverlapMatches(match.url_matches);
+ match.title_matches = SortAndDeoverlapMatches(match.title_matches);
// We should not (currently) inline autocomplete a result unless both of the
// following are true:
@@ -906,19 +825,17 @@ InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
const InMemoryURLIndex& index,
const String16Vector& lower_terms)
: index_(index),
- lower_terms_(lower_terms) {
-}
+ lower_terms_(lower_terms) {}
InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
-void InMemoryURLIndex::AddHistoryMatch::operator()(
- const InMemoryURLIndex::HistoryID history_id) {
+void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {
HistoryInfoMap::const_iterator hist_pos =
- index_.history_info_map_.find(history_id);
+ index_.private_data_->history_info_map_.find(history_id);
// Note that a history_id may be present in the word_id_history_map_ yet not
// be found in the history_info_map_. This occurs when an item has been
// deleted by the user or the item no longer qualifies as a quick result.
- if (hist_pos != index_.history_info_map_.end()) {
+ if (hist_pos != index_.private_data_->history_info_map_.end()) {
const URLRow& hist_item = hist_pos->second;
ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
if (match.raw_score > 0)
@@ -940,7 +857,9 @@ bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {
void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
DCHECK(cache);
cache->set_timestamp(base::Time::Now().ToInternalValue());
- cache->set_history_item_count(history_item_count_);
+ // history_item_count_ is no longer used but rather than change the protobuf
+ // definition use a placeholder. This will go away with the switch to SQLite.
+ cache->set_history_item_count(0);
SaveWordList(cache);
SaveWordMap(cache);
SaveCharWordMap(cache);
@@ -951,20 +870,18 @@ void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
bool InMemoryURLIndex::RestorePrivateData(
const InMemoryURLIndexCacheItem& cache) {
last_saved_ = base::Time::FromInternalValue(cache.timestamp());
- history_item_count_ = cache.history_item_count();
- return (history_item_count_ == 0) || (RestoreWordList(cache) &&
- RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
- RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
+ return RestoreWordList(cache) && RestoreWordMap(cache) &&
+ RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
+ RestoreHistoryInfoMap(cache);
}
-
void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
- if (word_list_.empty())
+ if (private_data_->word_list_.empty())
return;
WordListItem* list_item = cache->mutable_word_list();
- list_item->set_word_count(word_list_.size());
- for (String16Vector::const_iterator iter = word_list_.begin();
- iter != word_list_.end(); ++iter)
+ list_item->set_word_count(private_data_->word_list_.size());
+ for (String16Vector::const_iterator iter = private_data_->word_list_.begin();
+ iter != private_data_->word_list_.end(); ++iter)
list_item->add_word(UTF16ToUTF8(*iter));
}
@@ -979,17 +896,17 @@ bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
const RepeatedPtrField<std::string>& words(list_item.word());
for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
iter != words.end(); ++iter)
- word_list_.push_back(UTF8ToUTF16(*iter));
+ private_data_->word_list_.push_back(UTF8ToUTF16(*iter));
return true;
}
void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
- if (word_map_.empty())
+ if (private_data_->word_map_.empty())
return;
WordMapItem* map_item = cache->mutable_word_map();
- map_item->set_item_count(word_map_.size());
- for (WordMap::const_iterator iter = word_map_.begin();
- iter != word_map_.end(); ++iter) {
+ map_item->set_item_count(private_data_->word_map_.size());
+ for (WordMap::const_iterator iter = private_data_->word_map_.begin();
+ iter != private_data_->word_map_.end(); ++iter) {
WordMapEntry* map_entry = map_item->add_word_map_entry();
map_entry->set_word(UTF16ToUTF8(iter->first));
map_entry->set_word_id(iter->second);
@@ -1007,17 +924,18 @@ bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
iter != entries.end(); ++iter)
- word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
+ private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
return true;
}
void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
- if (char_word_map_.empty())
+ if (private_data_->char_word_map_.empty())
return;
CharWordMapItem* map_item = cache->mutable_char_word_map();
- map_item->set_item_count(char_word_map_.size());
- for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
- iter != char_word_map_.end(); ++iter) {
+ map_item->set_item_count(private_data_->char_word_map_.size());
+ for (CharWordIDMap::const_iterator iter =
+ private_data_->char_word_map_.begin();
+ iter != private_data_->char_word_map_.end(); ++iter) {
CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
map_entry->set_char_16(iter->first);
const WordIDSet& word_id_set(iter->second);
@@ -1051,19 +969,20 @@ bool InMemoryURLIndex::RestoreCharWordMap(
for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
jiter != word_ids.end(); ++jiter)
word_id_set.insert(*jiter);
- char_word_map_[uni_char] = word_id_set;
+ private_data_->char_word_map_[uni_char] = word_id_set;
}
return true;
}
void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
const {
- if (word_id_history_map_.empty())
+ if (private_data_->word_id_history_map_.empty())
return;
WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
- map_item->set_item_count(word_id_history_map_.size());
- for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
- iter != word_id_history_map_.end(); ++iter) {
+ map_item->set_item_count(private_data_->word_id_history_map_.size());
+ for (WordIDHistoryMap::const_iterator iter =
+ private_data_->word_id_history_map_.begin();
+ iter != private_data_->word_id_history_map_.end(); ++iter) {
WordIDHistoryMapEntry* map_entry =
map_item->add_word_id_history_map_entry();
map_entry->set_word_id(iter->first);
@@ -1096,21 +1015,24 @@ bool InMemoryURLIndex::RestoreWordIDHistoryMap(
HistoryIDSet history_id_set;
const RepeatedField<int64>& history_ids(iter->history_id());
for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
- jiter != history_ids.end(); ++jiter)
+ jiter != history_ids.end(); ++jiter) {
history_id_set.insert(*jiter);
- word_id_history_map_[word_id] = history_id_set;
+ private_data_->AddToHistoryIDWordMap(*jiter, word_id);
+ }
+ private_data_->word_id_history_map_[word_id] = history_id_set;
}
return true;
}
void InMemoryURLIndex::SaveHistoryInfoMap(
InMemoryURLIndexCacheItem* cache) const {
- if (history_info_map_.empty())
+ if (private_data_->history_info_map_.empty())
return;
HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
- map_item->set_item_count(history_info_map_.size());
- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
- iter != history_info_map_.end(); ++iter) {
+ map_item->set_item_count(private_data_->history_info_map_.size());
+ for (HistoryInfoMap::const_iterator iter =
+ private_data_->history_info_map_.begin();
+ iter != private_data_->history_info_map_.end(); ++iter) {
HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
map_entry->set_history_id(iter->first);
const URLRow& url_row(iter->second);
@@ -1148,7 +1070,7 @@ bool InMemoryURLIndex::RestoreHistoryInfoMap(
string16 title(UTF8ToUTF16(iter->title()));
url_row.set_title(title);
}
- history_info_map_[history_id] = url_row;
+ private_data_->history_info_map_[history_id] = url_row;
}
return true;
}
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index 5de5604..dfae2d4 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -21,6 +21,7 @@
#include "chrome/browser/autocomplete/autocomplete_match.h"
#include "chrome/browser/autocomplete/history_provider_util.h"
#include "chrome/browser/history/history_types.h"
+#include "chrome/browser/history/in_memory_url_index_types.h"
#include "chrome/browser/history/in_memory_url_index_cache.pb.h"
#include "sql/connection.h"
#include "testing/gtest/include/gtest/gtest_prod.h"
@@ -41,39 +42,6 @@ namespace imui = in_memory_url_index;
class URLDatabase;
-// Specifies where an omnibox term occurs within a string. Used for specifying
-// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in
-// scoring a result.
-struct TermMatch {
- TermMatch(int term_num, size_t offset, size_t length)
- : term_num(term_num),
- offset(offset),
- length(length) {}
-
- int term_num; // The index of the term in the original search string.
- size_t offset; // The starting offset of the substring match.
- size_t length; // The length of the substring match.
-};
-typedef std::vector<TermMatch> TermMatches;
-
-// Used for intermediate history result operations.
-struct ScoredHistoryMatch : public HistoryMatch {
- ScoredHistoryMatch(); // Required by STL.
- explicit ScoredHistoryMatch(const URLRow& url_info);
- ~ScoredHistoryMatch();
-
- static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
- const ScoredHistoryMatch& m2);
-
- // An interim score taking into consideration location and completeness
- // of the match.
- int raw_score;
- TermMatches url_matches; // Term matches within the URL.
- TermMatches title_matches; // Term matches within the page title.
- bool can_inline; // True if this is a candidate for in-line autocompletion.
-};
-typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
-
// The URL history source.
// Holds portions of the URL database in memory in an indexed form. Used to
// quickly look up matching URLs for a given query string. Used by
@@ -99,10 +67,7 @@ class InMemoryURLIndex {
// within the profile wherein the cache and transaction journals will be
// stored.
explicit InMemoryURLIndex(const FilePath& history_dir);
- ~InMemoryURLIndex();
-
- // Convenience types
- typedef std::vector<string16> String16Vector;
+ virtual ~InMemoryURLIndex();
// Opens and indexes the URL history database.
// |languages| gives a list of language encodings with which the history
@@ -150,30 +115,6 @@ class InMemoryURLIndex {
// 'quick' criteria).
void DeleteURL(URLID row_id);
- // Breaks the |uni_string| string down into individual words and return
- // a vector with the individual words in their original order. If
- // |break_on_space| is false then the resulting list will contain only words
- // containing alpha-numeric characters. If |break_on_space| is true then the
- // resulting list will contain strings broken at whitespace.
- //
- // Example:
- // Given: |uni_string|: "http://www.google.com/ harry the rabbit."
- // With |break_on_space| false the returned list will contain:
- // "http", "www", "google", "com", "harry", "the", "rabbit"
- // With |break_on_space| true the returned list will contain:
- // "http://", "www.google.com/", "harry", "the", "rabbit."
- static String16Vector WordVectorFromString16(const string16& uni_string,
- bool break_on_space);
-
- // Extract and return the offsets from |matches|.
- static std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
-
- // Replace the offsets in |matches| with those given in |offsets|, deleting
- // any which are npos, and return the updated list of matches.
- static TermMatches ReplaceOffsetsInTermMatches(
- const TermMatches& matches,
- const std::vector<size_t>& offsets);
-
private:
friend class AddHistoryMatch;
friend class InMemoryURLIndexTest;
@@ -185,6 +126,7 @@ class InMemoryURLIndex {
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs);
@@ -194,29 +136,6 @@ class InMemoryURLIndex {
// Creating one of me without a history path is not allowed (tests excepted).
InMemoryURLIndex();
- // Convenience types.
- typedef std::set<string16> String16Set;
- typedef std::set<char16> Char16Set;
- typedef std::vector<char16> Char16Vector;
-
- // An index into list of all of the words we have indexed.
- typedef int WordID;
-
- // A map allowing a WordID to be determined given a word.
- typedef std::map<string16, WordID> WordMap;
-
- // A map from character to word_ids.
- typedef std::set<WordID> WordIDSet; // An index into the WordList.
- typedef std::map<char16, WordIDSet> CharWordIDMap;
-
- // A map from word_id to history item.
- // TODO(mrossetti): URLID is 64 bit: a memory bloat and performance hit.
- // Consider using a smaller type.
- typedef URLID HistoryID;
- typedef std::set<HistoryID> HistoryIDSet;
- typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
-
-
// Support caching of term results so that we can optimize searches which
// build upon a previous search. Each entry in this map represents one
// search term from the most recent search. For example, if the user had
@@ -248,12 +167,6 @@ class InMemoryURLIndex {
};
typedef std::map<string16, SearchTermCacheItem> SearchTermCacheMap;
- // TODO(rohitrao): Probably replace this with QueryResults.
- typedef std::vector<URLRow> URLRowVector;
-
- // A map from history_id to the history's URL and title.
- typedef std::map<HistoryID, URLRow> HistoryInfoMap;
-
// A helper class which performs the final filter on each candidate
// history URL match, inserting accepted matches into |scored_matches_|
// and trimming the maximum number of matches to 10.
@@ -280,34 +193,19 @@ class InMemoryURLIndex {
// Initializes the whitelist of URL schemes.
static void InitializeSchemeWhitelist(std::set<std::string>* whitelist);
- // Breaks a string down into individual words.
- static String16Set WordSetFromString16(const string16& uni_string);
+ // URL History indexing support functions.
- // Given a set of Char16s, finds words containing those characters.
- WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
+ // Indexes one URL history item.
+ void IndexRow(const URLRow& row);
- // Creates a TermMatches which has an entry for each occurrence of the string
- // |term| found in the string |string|. Mark each match with |term_num| so
- // that the resulting TermMatches can be merged with other TermMatches for
- // other terms.
- static TermMatches MatchTermInString(const string16& term,
- const string16& string,
- int term_num);
+ // Parses and indexes the words in the URL and page title of |row|.
+ void AddRowWordsToIndex(const URLRow& row);
- // URL History indexing support functions.
+ // Removes |row| and all associated words and characters from the index.
+ void RemoveRowFromIndex(const URLRow& row);
- // Indexes one URL history item.
- bool IndexRow(const URLRow& row);
-
- // Breaks the |uni_word| string down into its individual characters.
- // Note that this is temporarily intended to work on a single word, but
- // _will_ work on a string of words, perhaps with unexpected results.
- // TODO(mrossetti): Lots of optimizations possible here for not restarting
- // a search if the user is just typing along. Also, change this to uniString
- // and properly handle substring matches, scoring and sorting the results
- // by score. Also, provide the metrics for where the matches occur so that
- // the UI can highlight the matched sections.
- static Char16Set Char16SetFromString16(const string16& uni_word);
+ // Removes all words and characters associated with |row| from the index.
+ void RemoveRowWordsFromIndex(const URLRow& row);
// Given a single word in |uni_word|, adds a reference for the containing
// history item identified by |history_id| to the index.
@@ -352,10 +250,6 @@ class InMemoryURLIndex {
static int ScoreComponentForMatches(const TermMatches& matches,
size_t max_length);
- // Sorts and removes overlapping substring matches from |matches| and
- // returns the cleaned up matches.
- static TermMatches SortAndDeoverlap(const TermMatches& matches);
-
// Determines if |gurl| has a whitelisted scheme and returns true if so.
bool URLSchemeIsWhitelisted(const GURL& gurl) const;
@@ -395,21 +289,8 @@ class InMemoryURLIndex {
// the InMemoryURLIndex was last populated.
base::Time last_saved_;
- // A list of all of indexed words. The index of a word in this list is the
- // ID of the word in the word_map_. It reduces the memory overhead by
- // replacing a potentially long and repeated string with a simple index.
- // NOTE: A word will _never_ be removed from this vector thus insuring
- // the immutability of the word_id throughout the session, reducing
- // maintenance complexity.
- // TODO(mrossetti): Profile the vector allocation and determine if judicious
- // 'reserve' calls are called for.
- String16Vector word_list_;
-
- int history_item_count_;
- WordMap word_map_;
- CharWordIDMap char_word_map_;
- WordIDHistoryMap word_id_history_map_;
- HistoryInfoMap history_info_map_;
+ // The index's durable private data.
+ scoped_ptr<URLIndexPrivateData> private_data_;
// Cache of search terms.
SearchTermCacheMap search_term_cache_;
diff --git a/chrome/browser/history/in_memory_url_index_types.cc b/chrome/browser/history/in_memory_url_index_types.cc
new file mode 100644
index 0000000..5315ccc
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index_types.cc
@@ -0,0 +1,198 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/history/in_memory_url_index_types.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "base/i18n/break_iterator.h"
+#include "base/i18n/case_conversion.h"
+#include "base/string_util.h"
+
+namespace history {
+
+// Matches within URL and Title Strings ----------------------------------------
+
+TermMatches MatchTermInString(const string16& term,
+ const string16& string,
+ int term_num) {
+ const size_t kMaxCompareLength = 2048;
+ const string16& short_string = (string.length() > kMaxCompareLength) ?
+ string.substr(0, kMaxCompareLength) : string;
+ TermMatches matches;
+ for (size_t location = short_string.find(term); location != string16::npos;
+ location = short_string.find(term, location + 1))
+ matches.push_back(TermMatch(term_num, location, term.length()));
+ return matches;
+}
+
+// Comparison function for sorting TermMatches by their offsets.
+bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
+ return m1.offset < m2.offset;
+}
+
+TermMatches SortAndDeoverlapMatches(const TermMatches& matches) {
+ if (matches.empty())
+ return matches;
+ TermMatches sorted_matches = matches;
+ std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
+ TermMatches clean_matches;
+ TermMatch last_match;
+ for (TermMatches::const_iterator iter = sorted_matches.begin();
+ iter != sorted_matches.end(); ++iter) {
+ if (iter->offset >= last_match.offset + last_match.length) {
+ last_match = *iter;
+ clean_matches.push_back(last_match);
+ }
+ }
+ return clean_matches;
+}
+
+std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) {
+ std::vector<size_t> offsets;
+ for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
+ offsets.push_back(i->offset);
+ return offsets;
+}
+
+TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
+ const std::vector<size_t>& offsets) {
+ DCHECK_EQ(matches.size(), offsets.size());
+ TermMatches new_matches;
+ std::vector<size_t>::const_iterator offset_iter = offsets.begin();
+ for (TermMatches::const_iterator term_iter = matches.begin();
+ term_iter != matches.end(); ++term_iter, ++offset_iter) {
+ if (*offset_iter != string16::npos) {
+ TermMatch new_match(*term_iter);
+ new_match.offset = *offset_iter;
+ new_matches.push_back(new_match);
+ }
+ }
+ return new_matches;
+}
+
+// ScoredHistoryMatch ----------------------------------------------------------
+
+ScoredHistoryMatch::ScoredHistoryMatch()
+ : raw_score(0),
+ can_inline(false) {}
+
+ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
+ : HistoryMatch(url_info, 0, false, false),
+ raw_score(0),
+ can_inline(false) {}
+
+ScoredHistoryMatch::~ScoredHistoryMatch() {}
+
+// Comparison function for sorting ScoredMatches by their scores.
+bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
+ const ScoredHistoryMatch& m2) {
+ return m1.raw_score >= m2.raw_score;
+}
+
+// InMemoryURLIndex's Private Data ---------------------------------------------
+
+URLIndexPrivateData::URLIndexPrivateData() {}
+
+URLIndexPrivateData::~URLIndexPrivateData() {}
+
+void URLIndexPrivateData::Clear() {
+ word_list_.clear();
+ available_words_.clear();
+ word_map_.clear();
+ char_word_map_.clear();
+ word_id_history_map_.clear();
+ history_id_word_map_.clear();
+ history_info_map_.clear();
+}
+
+void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
+ WordID word_id) {
+ HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
+ if (iter != history_id_word_map_.end()) {
+ WordIDSet& word_id_set(iter->second);
+ word_id_set.insert(word_id);
+ } else {
+ WordIDSet word_id_set;
+ word_id_set.insert(word_id);
+ history_id_word_map_[history_id] = word_id_set;
+ }
+}
+
+WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
+ const Char16Set& term_chars) {
+ WordIDSet word_id_set;
+ for (Char16Set::const_iterator c_iter = term_chars.begin();
+ c_iter != term_chars.end(); ++c_iter) {
+ CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
+ if (char_iter == char_word_map_.end()) {
+ // A character was not found so there are no matching results: bail.
+ word_id_set.clear();
+ break;
+ }
+ WordIDSet& char_word_id_set(char_iter->second);
+ // It is possible for there to no longer be any words associated with
+ // a particular character. Give up in that case.
+ if (char_word_id_set.empty()) {
+ word_id_set.clear();
+ break;
+ }
+
+ if (c_iter == term_chars.begin()) {
+ // First character results becomes base set of results.
+ word_id_set = char_word_id_set;
+ } else {
+ // Subsequent character results get intersected in.
+ WordIDSet new_word_id_set;
+ std::set_intersection(word_id_set.begin(), word_id_set.end(),
+ char_word_id_set.begin(), char_word_id_set.end(),
+ std::inserter(new_word_id_set,
+ new_word_id_set.begin()));
+ word_id_set.swap(new_word_id_set);
+ }
+ }
+ return word_id_set;
+}
+
+// Utility Functions -----------------------------------------------------------
+
+String16Set String16SetFromString16(const string16& uni_string) {
+ const size_t kMaxWordLength = 64;
+ String16Vector words = String16VectorFromString16(uni_string, false);
+ String16Set word_set;
+ for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
+ ++iter)
+ word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
+ return word_set;
+}
+
+String16Vector String16VectorFromString16(const string16& uni_string,
+ bool break_on_space) {
+ base::i18n::BreakIterator iter(uni_string,
+ break_on_space ? base::i18n::BreakIterator::BREAK_SPACE :
+ base::i18n::BreakIterator::BREAK_WORD);
+ String16Vector words;
+ if (!iter.Init())
+ return words;
+ while (iter.Advance()) {
+ if (break_on_space || iter.IsWord()) {
+ string16 word = iter.GetString();
+ if (break_on_space)
+ TrimWhitespace(word, TRIM_ALL, &word);
+ if (!word.empty())
+ words.push_back(word);
+ }
+ }
+ return words;
+}
+
+Char16Set Char16SetFromString16(const string16& term) {
+ Char16Set characters;
+ for (string16::const_iterator iter = term.begin(); iter != term.end(); ++iter)
+ characters.insert(*iter);
+ return characters;
+}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_url_index_types.h b/chrome/browser/history/in_memory_url_index_types.h
new file mode 100644
index 0000000..03f772c
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index_types.h
@@ -0,0 +1,208 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
+#define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/gtest_prod_util.h"
+#include "base/string16.h"
+#include "chrome/browser/autocomplete/history_provider_util.h"
+#include "chrome/browser/history/history_types.h"
+
+namespace history {
+
+// Matches within URL and Title Strings ----------------------------------------
+
+// Specifies where an omnibox term occurs within a string. Used for specifying
+// highlights in AutocompleteMatches (ACMatchClassifications) and to assist in
+// scoring a result.
+struct TermMatch {
+ TermMatch() : term_num(0), offset(0), length(0) {}
+ TermMatch(int term_num, size_t offset, size_t length)
+ : term_num(term_num),
+ offset(offset),
+ length(length) {}
+
+ int term_num; // The index of the term in the original search string.
+ size_t offset; // The starting offset of the substring match.
+ size_t length; // The length of the substring match.
+};
+typedef std::vector<TermMatch> TermMatches;
+
+// Returns a TermMatches which has an entry for each occurrence of the string
+// |term| found in the string |string|. Mark each match with |term_num| so
+// that the resulting TermMatches can be merged with other TermMatches for
+// other terms. Note that only the first 2,048 characters of |string| are
+// considered during the match operation.
+TermMatches MatchTermInString(const string16& term,
+ const string16& string,
+ int term_num);
+
+// Sorts and removes overlapping substring matches from |matches| and
+// returns the cleaned up matches.
+TermMatches SortAndDeoverlapMatches(const TermMatches& matches);
+
+// Extracts and returns the offsets from |matches|.
+std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches);
+
+// Replaces the offsets in |matches| with those given in |offsets|, deleting
+// any which are npos, and returns the updated list of matches.
+TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
+ const std::vector<size_t>& offsets);
+
+// Used for intermediate history result operations -----------------------------
+
+struct ScoredHistoryMatch : public history::HistoryMatch {
+ ScoredHistoryMatch(); // Required by STL.
+ explicit ScoredHistoryMatch(const history::URLRow& url_info);
+ ~ScoredHistoryMatch();
+
+ static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
+ const ScoredHistoryMatch& m2);
+
+ // An interim score taking into consideration location and completeness
+ // of the match.
+ int raw_score;
+ TermMatches url_matches; // Term matches within the URL.
+ TermMatches title_matches; // Term matches within the page title.
+ bool can_inline; // True if this is a candidate for in-line autocompletion.
+};
+typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
+
+// Convenience Types -----------------------------------------------------------
+
+typedef std::vector<string16> String16Vector;
+typedef std::set<string16> String16Set;
+typedef std::set<char16> Char16Set;
+typedef std::vector<char16> Char16Vector;
+
+// Utility Functions Relates to Convenience Types ------------------------------
+
+// Breaks a string down into individual words.
+String16Set String16SetFromString16(const string16& uni_string);
+
+// Breaks the |uni_string| string down into individual words and return
+// a vector with the individual words in their original order. If
+// |break_on_space| is false then the resulting list will contain only words
+// containing alpha-numeric characters. If |break_on_space| is true then the
+// resulting list will contain strings broken at whitespace. (|break_on_space|
+// indicates that the BreakIterator::BREAK_SPACE (equivalent to BREAK_LINE)
+// approach is to be used. For a complete description of this algorithm
+// refer to the comments in base/i18n/break_iterator.h.)
+//
+// Example:
+// Given: |uni_string|: "http://www.google.com/ harry the rabbit."
+// With |break_on_space| false the returned list will contain:
+// "http", "www", "google", "com", "harry", "the", "rabbit"
+// With |break_on_space| true the returned list will contain:
+// "http://", "www.google.com/", "harry", "the", "rabbit."
+String16Vector String16VectorFromString16(const string16& uni_string,
+ bool break_on_space);
+
+// Breaks the |uni_word| string down into its individual characters.
+// Note that this is temporarily intended to work on a single word, but
+// _will_ work on a string of words, perhaps with unexpected results.
+// TODO(mrossetti): Lots of optimizations possible here for not restarting
+// a search if the user is just typing along. Also, change this to uniString
+// and properly handle substring matches, scoring and sorting the results
+// by score. Also, provide the metrics for where the matches occur so that
+// the UI can highlight the matched sections.
+Char16Set Char16SetFromString16(const string16& uni_word);
+
+// Support for InMemoryURLIndex Private Data -----------------------------------
+
+// An index into a list of all of the words we have indexed.
+typedef size_t WordID;
+
+// A map allowing a WordID to be determined given a word.
+typedef std::map<string16, WordID> WordMap;
+
+// A map from character to the word_ids of words containing that character.
+typedef std::set<WordID> WordIDSet; // An index into the WordList.
+typedef std::map<char16, WordIDSet> CharWordIDMap;
+
+// A map from word (by word_id) to history items containing that word.
+typedef history::URLID HistoryID;
+typedef std::set<HistoryID> HistoryIDSet;
+typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
+typedef std::map<HistoryID, WordIDSet> HistoryIDWordMap;
+
+// A map from history_id to the history's URL and title.
+typedef std::map<HistoryID, URLRow> HistoryInfoMap;
+
+// TODO(rohitrao): Probably replace this with QueryResults.
+typedef std::vector<history::URLRow> URLRowVector;
+
+// A structure describing the index's internal data. This structure is used
+// by the InMemoryURLIndex, InMemoryURLIndexBackend, and
+// InMemoryURLCacheDatabase classes.
+class URLIndexPrivateData {
+ public:
+ URLIndexPrivateData();
+ ~URLIndexPrivateData();
+
+ // Clear all of our data.
+ void Clear();
+
+ // Adds |word_id| to |history_id|'s entry in the history/word map,
+ // creating a new entry if one does not already exist.
+ void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
+
+ // Given a set of Char16s, finds words containing those characters.
+ WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
+
+ private:
+ friend class InMemoryURLIndex;
+ friend class InMemoryURLIndexTest;
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
+ FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
+ FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
+
+ // A list of all of indexed words. The index of a word in this list is the
+ // ID of the word in the word_map_. It reduces the memory overhead by
+ // replacing a potentially long and repeated string with a simple index.
+ String16Vector word_list_;
+
+ // A list of available words slots in |word_list_|. An available word slot
+ // is the index of a unused word in word_list_ vector, also referred to as
+ // a WordID. As URL visits are added or modified new words may be added to
+ // the index, in which case any available words are used, if any, and then
+ // words are added to the end of the word_list_. When URL visits are
+ // modified or deleted old words may be removed from the index, in which
+ // case the slots for those words are added to available_words_ for resuse
+ // by future URL updates.
+ WordIDSet available_words_;
+
+ // A one-to-one mapping from the a word string to its slot number (i.e.
+ // WordID) in the |word_list_|.
+ WordMap word_map_;
+
+ // A one-to-many mapping from a single character to all WordIDs of words
+ // containing that character.
+ CharWordIDMap char_word_map_;
+
+ // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as
+ // used in the history database) of history items in which the word occurs.
+ WordIDHistoryMap word_id_history_map_;
+
+ // A one-to-many mapping from a HistoryID to all WordIDs of words that occur
+ // in the URL and/or page title of the history item referenced by that
+ // HistoryID.
+ HistoryIDWordMap history_id_word_map_;
+
+ // A one-to-one mapping from HistoryID to the history item data governing
+ // index inclusion and relevance scoring.
+ HistoryInfoMap history_info_map_;
+};
+
+} // namespace history
+
+#endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_TYPES_H_
diff --git a/chrome/browser/history/in_memory_url_index_types_unittest.cc b/chrome/browser/history/in_memory_url_index_types_unittest.cc
new file mode 100644
index 0000000..6a6bc7c
--- /dev/null
+++ b/chrome/browser/history/in_memory_url_index_types_unittest.cc
@@ -0,0 +1,107 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/string16.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/in_memory_url_index_types.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace history {
+
+class InMemoryURLIndexTypesTest : public testing::Test {
+};
+
+TEST_F(InMemoryURLIndexTypesTest, StaticFunctions) {
+ // Test WordVectorFromString16
+ string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy"));
+ String16Vector string_vec = String16VectorFromString16(string_a, false);
+ ASSERT_EQ(7U, string_vec.size());
+ // See if we got the words we expected.
+ EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]);
+ EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]);
+ EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]);
+ EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]);
+ EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]);
+ EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]);
+ EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]);
+
+ string_vec = String16VectorFromString16(string_a, true);
+ ASSERT_EQ(5U, string_vec.size());
+ EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]);
+ EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]);
+ EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]);
+ EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]);
+ EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]);
+
+ // Test WordSetFromString16
+ string16 string_b(ASCIIToUTF16(
+ "http://web.google.com/search Google Web Search"));
+ String16Set string_set = String16SetFromString16(string_b);
+ EXPECT_EQ(5U, string_set.size());
+ // See if we got the words we expected.
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end());
+ EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end());
+
+ // Test SortAndDeoverlap
+ TermMatches matches_a;
+ matches_a.push_back(TermMatch(1, 13, 10));
+ matches_a.push_back(TermMatch(2, 23, 10));
+ matches_a.push_back(TermMatch(3, 3, 10));
+ matches_a.push_back(TermMatch(4, 40, 5));
+ TermMatches matches_b = SortAndDeoverlapMatches(matches_a);
+ // Nothing should have been eliminated.
+ EXPECT_EQ(matches_a.size(), matches_b.size());
+ // The order should now be 3, 1, 2, 4.
+ EXPECT_EQ(3, matches_b[0].term_num);
+ EXPECT_EQ(1, matches_b[1].term_num);
+ EXPECT_EQ(2, matches_b[2].term_num);
+ EXPECT_EQ(4, matches_b[3].term_num);
+ matches_a.push_back(TermMatch(5, 18, 10));
+ matches_a.push_back(TermMatch(6, 38, 5));
+ matches_b = SortAndDeoverlapMatches(matches_a);
+ // Two matches should have been eliminated.
+ EXPECT_EQ(matches_a.size() - 2, matches_b.size());
+ // The order should now be 3, 1, 2, 6.
+ EXPECT_EQ(3, matches_b[0].term_num);
+ EXPECT_EQ(1, matches_b[1].term_num);
+ EXPECT_EQ(2, matches_b[2].term_num);
+ EXPECT_EQ(6, matches_b[3].term_num);
+
+ // Test MatchTermInString
+ TermMatches matches_c = MatchTermInString(
+ UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123);
+ const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 };
+ ASSERT_EQ(arraysize(expected_offsets), matches_c.size());
+ for (size_t i = 0; i < arraysize(expected_offsets); ++i)
+ EXPECT_EQ(expected_offsets[i], matches_c[i].offset);
+}
+
+TEST_F(InMemoryURLIndexTypesTest, OffsetsAndTermMatches) {
+ // Test OffsetsFromTermMatches
+ history::TermMatches matches_a;
+ matches_a.push_back(history::TermMatch(1, 1, 2));
+ matches_a.push_back(history::TermMatch(2, 4, 3));
+ matches_a.push_back(history::TermMatch(3, 9, 1));
+ matches_a.push_back(history::TermMatch(3, 10, 1));
+ matches_a.push_back(history::TermMatch(4, 14, 5));
+ std::vector<size_t> offsets = OffsetsFromTermMatches(matches_a);
+ const size_t expected_offsets_a[] = {1, 4, 9, 10, 14};
+ ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a));
+ for (size_t i = 0; i < offsets.size(); ++i)
+ EXPECT_EQ(expected_offsets_a[i], offsets[i]);
+
+ // Test ReplaceOffsetsInTermMatches
+ offsets[2] = string16::npos;
+ history::TermMatches matches_b =
+ ReplaceOffsetsInTermMatches(matches_a, offsets);
+ const size_t expected_offsets_b[] = {1, 4, 10, 14};
+ ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size());
+ for (size_t i = 0; i < matches_b.size(); ++i)
+ EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset);
+}
+
+} // namespace history
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index 62b708d..c4420bc 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -2,24 +2,18 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <stdio.h>
-
#include <fstream>
-#include <string>
-#include <vector>
#include "base/file_path.h"
#include "base/file_util.h"
-#include "base/memory/scoped_ptr.h"
#include "base/path_service.h"
+#include "base/string16.h"
#include "base/string_util.h"
-#include "base/time.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/history/in_memory_database.h"
#include "chrome/browser/history/in_memory_url_index.h"
+#include "chrome/browser/history/in_memory_url_index_types.h"
#include "chrome/common/chrome_paths.h"
-#include "sql/connection.h"
-#include "sql/statement.h"
#include "sql/transaction.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -59,9 +53,8 @@ class InMemoryURLIndexTest : public testing::Test,
int typed_count);
// Convenience functions for easily creating vectors of search terms.
- InMemoryURLIndex::String16Vector Make1Term(const char* term) const;
- InMemoryURLIndex::String16Vector Make2Terms(const char* term_1,
- const char* term_2) const;
+ String16Vector Make1Term(const char* term) const;
+ String16Vector Make2Terms(const char* term_1, const char* term_2) const;
// Validates that the given |term| is contained in |cache| and that it is
// marked as in-use.
@@ -145,20 +138,18 @@ URLRow InMemoryURLIndexTest::MakeURLRow(const char* url,
return row;
}
-InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make1Term(
- const char* term) const {
- InMemoryURLIndex::String16Vector terms;
- terms.push_back(UTF8ToUTF16(term));
- return terms;
+String16Vector InMemoryURLIndexTest::Make1Term(const char* term) const {
+ String16Vector original_terms;
+ original_terms.push_back(UTF8ToUTF16(term));
+ return original_terms;
}
-InMemoryURLIndex::String16Vector InMemoryURLIndexTest::Make2Terms(
- const char* term_1,
- const char* term_2) const {
- InMemoryURLIndex::String16Vector terms;
- terms.push_back(UTF8ToUTF16(term_1));
- terms.push_back(UTF8ToUTF16(term_2));
- return terms;
+String16Vector InMemoryURLIndexTest::Make2Terms(const char* term_1,
+ const char* term_2) const {
+ String16Vector original_terms;
+ original_terms.push_back(UTF8ToUTF16(term_1));
+ original_terms.push_back(UTF8ToUTF16(term_2));
+ return original_terms;
}
void InMemoryURLIndexTest::CheckTerm(
@@ -173,6 +164,27 @@ void InMemoryURLIndexTest::CheckTerm(
<< "Cache item '" << term << "' should be marked as being in use.";
}
+// Helper function which compares two maps for equivalence. The maps' values
+// are associative containers and their contents are compared as well.
+template<typename T>
+void ExpectMapOfContainersIdentical(const T& expected, const T& actual) {
+ ASSERT_EQ(expected.size(), actual.size());
+ for (typename T::const_iterator expected_iter = expected.begin();
+ expected_iter != expected.end(); ++expected_iter) {
+ typename T::const_iterator actual_iter = actual.find(expected_iter->first);
+ ASSERT_NE(actual.end(), actual_iter);
+ typename T::mapped_type const& expected_values(expected_iter->second);
+ typename T::mapped_type const& actual_values(actual_iter->second);
+ ASSERT_EQ(expected_values.size(), actual_values.size());
+ for (typename T::mapped_type::const_iterator set_iter =
+ expected_values.begin(); set_iter != expected_values.end(); ++set_iter)
+ EXPECT_EQ(actual_values.count(*set_iter),
+ expected_values.count(*set_iter));
+ }
+}
+
+//------------------------------------------------------------------------------
+
class LimitedInMemoryURLIndexTest : public InMemoryURLIndexTest {
protected:
FilePath::StringType TestDBName() const;
@@ -217,12 +229,12 @@ TEST_F(LimitedInMemoryURLIndexTest, Initialization) {
EXPECT_EQ(1U, row_count);
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- EXPECT_EQ(1, url_index_->history_item_count_);
+ URLIndexPrivateData& private_data(*(url_index_->private_data_));
// history_info_map_ should have the same number of items as were filtered.
- EXPECT_EQ(1U, url_index_->history_info_map_.size());
- EXPECT_EQ(35U, url_index_->char_word_map_.size());
- EXPECT_EQ(17U, url_index_->word_map_.size());
+ EXPECT_EQ(1U, private_data.history_info_map_.size());
+ EXPECT_EQ(35U, private_data.char_word_map_.size());
+ EXPECT_EQ(17U, private_data.word_map_.size());
}
TEST_F(InMemoryURLIndexTest, Retrieval) {
@@ -259,11 +271,11 @@ TEST_F(InMemoryURLIndexTest, Retrieval) {
matches[0].url_info.title());
// Search which should result in very poor result.
- InMemoryURLIndex::String16Vector terms;
- terms.push_back(ASCIIToUTF16("z"));
- terms.push_back(ASCIIToUTF16("y"));
- terms.push_back(ASCIIToUTF16("x"));
- matches = url_index_->HistoryItemsForTerms(terms);
+ String16Vector original_terms;
+ original_terms.push_back(ASCIIToUTF16("z"));
+ original_terms.push_back(ASCIIToUTF16("y"));
+ original_terms.push_back(ASCIIToUTF16("x"));
+ matches = url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, matches.size());
// The results should have a poor score.
EXPECT_LT(matches[0].raw_score, 500);
@@ -296,14 +308,15 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
// Signal if someone has changed the test DB.
- EXPECT_EQ(27U, url_index_->history_info_map_.size());
- InMemoryURLIndex::String16Vector terms;
+ EXPECT_EQ(27U, url_index_->private_data_->history_info_map_.size());
+ String16Vector original_terms;
// Ensure title is being searched.
- terms.push_back(ASCIIToUTF16("MORTGAGE"));
- terms.push_back(ASCIIToUTF16("RATE"));
- terms.push_back(ASCIIToUTF16("DROPS"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ original_terms.push_back(ASCIIToUTF16("MORTGAGE"));
+ original_terms.push_back(ASCIIToUTF16("RATE"));
+ original_terms.push_back(ASCIIToUTF16("DROPS"));
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, matches.size());
// Verify that we got back the result we expected.
@@ -315,6 +328,54 @@ TEST_F(InMemoryURLIndexTest, TitleSearch) {
matches[0].url_info.title());
}
+TEST_F(InMemoryURLIndexTest, TitleChange) {
+ url_index_.reset(new InMemoryURLIndex(FilePath()));
+ url_index_->Init(this, "en,ja,hi,zh");
+
+ // Verify current title terms retrieves desired item.
+ String16Vector original_terms;
+ original_terms.push_back(ASCIIToUTF16("lebronomics"));
+ original_terms.push_back(ASCIIToUTF16("could"));
+ original_terms.push_back(ASCIIToUTF16("high"));
+ original_terms.push_back(ASCIIToUTF16("taxes"));
+ original_terms.push_back(ASCIIToUTF16("influence"));
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(original_terms);
+ ASSERT_EQ(1U, matches.size());
+
+ // Verify that we got back the result we expected.
+ const URLID expected_id = 3;
+ EXPECT_EQ(expected_id, matches[0].url_info.id());
+ EXPECT_EQ("http://www.businessandmedia.org/articles/2010/20100708120415.aspx",
+ matches[0].url_info.url().spec());
+ EXPECT_EQ(ASCIIToUTF16(
+ "LeBronomics: Could High Taxes Influence James' Team Decision?"),
+ matches[0].url_info.title());
+ URLRow old_row(matches[0].url_info);
+
+ // Verify new title terms retrieves nothing.
+ String16Vector new_terms;
+ new_terms.push_back(ASCIIToUTF16("does"));
+ new_terms.push_back(ASCIIToUTF16("eat"));
+ new_terms.push_back(ASCIIToUTF16("oats"));
+ new_terms.push_back(ASCIIToUTF16("little"));
+ new_terms.push_back(ASCIIToUTF16("lambs"));
+ new_terms.push_back(ASCIIToUTF16("ivy"));
+ matches = url_index_->HistoryItemsForTerms(new_terms);
+ ASSERT_EQ(0U, matches.size());
+
+ // Update the row.
+ old_row.set_title(ASCIIToUTF16("Does eat oats and little lambs eat ivy"));
+ url_index_->UpdateURL(expected_id, old_row);
+
+ // Verify we get the row using the new terms but not the original terms.
+ matches = url_index_->HistoryItemsForTerms(new_terms);
+ ASSERT_EQ(1U, matches.size());
+ EXPECT_EQ(expected_id, matches[0].url_info.id());
+ matches = url_index_->HistoryItemsForTerms(original_terms);
+ ASSERT_EQ(0U, matches.size());
+}
+
TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
@@ -345,101 +406,6 @@ TEST_F(InMemoryURLIndexTest, NonUniqueTermCharacterSets) {
EXPECT_EQ(28, matches[0].url_info.id());
}
-TEST_F(InMemoryURLIndexTest, StaticFunctions) {
- // Test WordVectorFromString16
- string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy"));
- InMemoryURLIndex::String16Vector string_vec =
- InMemoryURLIndex::WordVectorFromString16(string_a, false);
- ASSERT_EQ(7U, string_vec.size());
- // See if we got the words we expected.
- EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]);
- EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]);
- EXPECT_EQ(UTF8ToUTF16("google"), string_vec[2]);
- EXPECT_EQ(UTF8ToUTF16("com"), string_vec[3]);
- EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[4]);
- EXPECT_EQ(UTF8ToUTF16("the"), string_vec[5]);
- EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]);
-
- string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true);
- ASSERT_EQ(5U, string_vec.size());
- EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]);
- EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]);
- EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]);
- EXPECT_EQ(UTF8ToUTF16("the"), string_vec[3]);
- EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[4]);
-
- // Test WordSetFromString16
- string16 string_b(ASCIIToUTF16(
- "http://web.google.com/search Google Web Search"));
- InMemoryURLIndex::String16Set string_set =
- InMemoryURLIndex::WordSetFromString16(string_b);
- EXPECT_EQ(5U, string_set.size());
- // See if we got the words we expected.
- EXPECT_TRUE(string_set.find(UTF8ToUTF16("com")) != string_set.end());
- EXPECT_TRUE(string_set.find(UTF8ToUTF16("google")) != string_set.end());
- EXPECT_TRUE(string_set.find(UTF8ToUTF16("http")) != string_set.end());
- EXPECT_TRUE(string_set.find(UTF8ToUTF16("search")) != string_set.end());
- EXPECT_TRUE(string_set.find(UTF8ToUTF16("web")) != string_set.end());
-
- // Test SortAndDeoverlap
- TermMatches matches_a;
- matches_a.push_back(TermMatch(1, 13, 10));
- matches_a.push_back(TermMatch(2, 23, 10));
- matches_a.push_back(TermMatch(3, 3, 10));
- matches_a.push_back(TermMatch(4, 40, 5));
- TermMatches matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a);
- // Nothing should have been eliminated.
- EXPECT_EQ(matches_a.size(), matches_b.size());
- // The order should now be 3, 1, 2, 4.
- EXPECT_EQ(3, matches_b[0].term_num);
- EXPECT_EQ(1, matches_b[1].term_num);
- EXPECT_EQ(2, matches_b[2].term_num);
- EXPECT_EQ(4, matches_b[3].term_num);
- matches_a.push_back(TermMatch(5, 18, 10));
- matches_a.push_back(TermMatch(6, 38, 5));
- matches_b = InMemoryURLIndex::SortAndDeoverlap(matches_a);
- // Two matches should have been eliminated.
- EXPECT_EQ(matches_a.size() - 2, matches_b.size());
- // The order should now be 3, 1, 2, 6.
- EXPECT_EQ(3, matches_b[0].term_num);
- EXPECT_EQ(1, matches_b[1].term_num);
- EXPECT_EQ(2, matches_b[2].term_num);
- EXPECT_EQ(6, matches_b[3].term_num);
-
- // Test MatchTermInString
- TermMatches matches_c = InMemoryURLIndex::MatchTermInString(
- UTF8ToUTF16("x"), UTF8ToUTF16("axbxcxdxex fxgx/hxixjx.kx"), 123);
- ASSERT_EQ(11U, matches_c.size());
- const size_t expected_offsets[] = { 1, 3, 5, 7, 9, 12, 14, 17, 19, 21, 24 };
- for (int i = 0; i < 11; ++i)
- EXPECT_EQ(expected_offsets[i], matches_c[i].offset);
-}
-
-TEST_F(InMemoryURLIndexTest, OffsetsAndTermMatches) {
- // Test OffsetsFromTermMatches
- history::TermMatches matches_a;
- matches_a.push_back(history::TermMatch(1, 1, 2));
- matches_a.push_back(history::TermMatch(2, 4, 3));
- matches_a.push_back(history::TermMatch(3, 9, 1));
- matches_a.push_back(history::TermMatch(3, 10, 1));
- matches_a.push_back(history::TermMatch(4, 14, 5));
- std::vector<size_t> offsets =
- InMemoryURLIndex::OffsetsFromTermMatches(matches_a);
- const size_t expected_offsets_a[] = {1, 4, 9, 10, 14};
- ASSERT_EQ(offsets.size(), arraysize(expected_offsets_a));
- for (size_t i = 0; i < offsets.size(); ++i)
- EXPECT_EQ(expected_offsets_a[i], offsets[i]);
-
- // Test ReplaceOffsetsInTermMatches
- offsets[2] = string16::npos;
- history::TermMatches matches_b =
- InMemoryURLIndex::ReplaceOffsetsInTermMatches(matches_a, offsets);
- const size_t expected_offsets_b[] = {1, 4, 10, 14};
- ASSERT_EQ(arraysize(expected_offsets_b), matches_b.size());
- for (size_t i = 0; i < matches_b.size(); ++i)
- EXPECT_EQ(expected_offsets_b[i], matches_b[i].offset);
-}
-
TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
// Verify that match results for previously typed characters are retained
// (in the term_char_word_set_cache_) and reused, if possible, in future
@@ -460,55 +426,55 @@ TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
// Simulate typing "r" giving "r" in the simulated omnibox. The results for
// 'r' will be not cached because it is only 1 character long.
- InMemoryURLIndex::String16Vector terms;
+ String16Vector original_terms;
string16 term_r = ASCIIToUTF16("r");
- terms.push_back(term_r);
- url_index_->HistoryItemsForTerms(terms);
+ original_terms.push_back(term_r);
+ url_index_->HistoryItemsForTerms(original_terms);
EXPECT_EQ(0U, cache.size());
// Simulate typing "re" giving "r re" in the simulated omnibox.
string16 term_re = ASCIIToUTF16("re");
- terms.push_back(term_re);
+ original_terms.push_back(term_re);
// 're' should be cached at this point but not 'r' as it is a single
// character.
- ASSERT_EQ(2U, terms.size());
- url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, original_terms.size());
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, cache.size());
CheckTerm(cache, term_re);
// Simulate typing "reco" giving "r re reco" in the simulated omnibox.
string16 term_reco = ASCIIToUTF16("reco");
- terms.push_back(term_reco);
+ original_terms.push_back(term_reco);
// 're' and 'reco' should be cached at this point but not 'r' as it is a
// single character.
- url_index_->HistoryItemsForTerms(terms);
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(2U, cache.size());
CheckTerm(cache, term_re);
CheckTerm(cache, term_reco);
- terms.clear(); // Simulate pressing <ESC>.
+ original_terms.clear(); // Simulate pressing <ESC>.
// Simulate typing "mort".
string16 term_mort = ASCIIToUTF16("mort");
- terms.push_back(term_mort);
+ original_terms.push_back(term_mort);
// Since we now have only one search term, the cached results for 're' and
// 'reco' should be purged, giving us only 1 item in the cache (for 'mort').
- url_index_->HistoryItemsForTerms(terms);
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, cache.size());
CheckTerm(cache, term_mort);
// Simulate typing "reco" giving "mort reco" in the simulated omnibox.
- terms.push_back(term_reco);
- url_index_->HistoryItemsForTerms(terms);
+ original_terms.push_back(term_reco);
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(2U, cache.size());
CheckTerm(cache, term_mort);
CheckTerm(cache, term_reco);
// Simulate a <DELETE> by removing the 'reco' and adding back the 'rec'.
- terms.resize(terms.size() - 1);
+ original_terms.resize(original_terms.size() - 1);
string16 term_rec = ASCIIToUTF16("rec");
- terms.push_back(term_rec);
- url_index_->HistoryItemsForTerms(terms);
+ original_terms.push_back(term_rec);
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(2U, cache.size());
CheckTerm(cache, term_mort);
CheckTerm(cache, term_rec);
@@ -552,14 +518,14 @@ TEST_F(InMemoryURLIndexTest, Scoring) {
TEST_F(InMemoryURLIndexTest, AddNewRows) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
+ String16Vector original_terms;
// Verify that the row we're going to add does not already exist.
URLID new_row_id = 87654321;
// Newly created URLRows get a last_visit time of 'right now' so it should
// qualify as a quick result candidate.
- terms.push_back(ASCIIToUTF16("brokeandalone"));
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty());
+ original_terms.push_back(ASCIIToUTF16("brokeandalone"));
+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());
// Add a new row.
URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id);
@@ -567,26 +533,27 @@ TEST_F(InMemoryURLIndexTest, AddNewRows) {
url_index_->UpdateURL(new_row_id, new_row);
// Verify that we can retrieve it.
- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());
// Add it again just to be sure that is harmless.
url_index_->UpdateURL(new_row_id, new_row);
- EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(terms).size());
+ EXPECT_EQ(1U, url_index_->HistoryItemsForTerms(original_terms).size());
}
TEST_F(InMemoryURLIndexTest, DeleteRows) {
url_index_.reset(new InMemoryURLIndex(FilePath()));
url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
+ String16Vector original_terms;
// Make sure we actually get an existing result.
- terms.push_back(ASCIIToUTF16("DrudgeReport"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ original_terms.push_back(ASCIIToUTF16("DrudgeReport"));
+ ScoredHistoryMatches matches =
+ url_index_->HistoryItemsForTerms(original_terms);
ASSERT_EQ(1U, matches.size());
// Determine the row id for that result, delete that id, then search again.
url_index_->DeleteURL(matches[0].url_info.id());
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty());
+ EXPECT_TRUE(url_index_->HistoryItemsForTerms(original_terms).empty());
}
TEST_F(InMemoryURLIndexTest, WhitelistedURLs) {
@@ -697,77 +664,64 @@ TEST_F(InMemoryURLIndexTest, CacheSaveRestore) {
url_index.SavePrivateData(&index_cache);
// Capture our private data so we can later compare for equality.
- int history_item_count(url_index.history_item_count_);
- InMemoryURLIndex::String16Vector word_list(url_index.word_list_);
- InMemoryURLIndex::WordMap word_map(url_index.word_map_);
- InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_);
- InMemoryURLIndex::WordIDHistoryMap word_id_history_map(
- url_index.word_id_history_map_);
- InMemoryURLIndex::HistoryInfoMap history_info_map(
- url_index.history_info_map_);
+ URLIndexPrivateData& private_data(*(url_index_->private_data_));
+ String16Vector word_list(private_data.word_list_);
+ WordMap word_map(private_data.word_map_);
+ CharWordIDMap char_word_map(private_data.char_word_map_);
+ WordIDHistoryMap word_id_history_map(private_data.word_id_history_map_);
+ HistoryIDWordMap history_id_word_map(private_data.history_id_word_map_);
+ HistoryInfoMap history_info_map(private_data.history_info_map_);
// Prove that there is really something there.
- EXPECT_GT(url_index.history_item_count_, 0);
- EXPECT_FALSE(url_index.word_list_.empty());
- EXPECT_FALSE(url_index.word_map_.empty());
- EXPECT_FALSE(url_index.char_word_map_.empty());
- EXPECT_FALSE(url_index.word_id_history_map_.empty());
- EXPECT_FALSE(url_index.history_info_map_.empty());
+ EXPECT_FALSE(private_data.word_list_.empty());
+ // available_words_ will already be empty since we have freshly built the
+ // data set for this test.
+ EXPECT_TRUE(private_data.available_words_.empty());
+ EXPECT_FALSE(private_data.word_map_.empty());
+ EXPECT_FALSE(private_data.char_word_map_.empty());
+ EXPECT_FALSE(private_data.word_id_history_map_.empty());
+ EXPECT_FALSE(private_data.history_id_word_map_.empty());
+ EXPECT_FALSE(private_data.history_info_map_.empty());
// Clear and then prove it's clear.
url_index.ClearPrivateData();
- EXPECT_EQ(0, url_index.history_item_count_);
- EXPECT_TRUE(url_index.word_list_.empty());
- EXPECT_TRUE(url_index.word_map_.empty());
- EXPECT_TRUE(url_index.char_word_map_.empty());
- EXPECT_TRUE(url_index.word_id_history_map_.empty());
- EXPECT_TRUE(url_index.history_info_map_.empty());
+ EXPECT_TRUE(private_data.word_list_.empty());
+ EXPECT_TRUE(private_data.available_words_.empty());
+ EXPECT_TRUE(private_data.word_map_.empty());
+ EXPECT_TRUE(private_data.char_word_map_.empty());
+ EXPECT_TRUE(private_data.word_id_history_map_.empty());
+ EXPECT_TRUE(private_data.history_id_word_map_.empty());
+ EXPECT_TRUE(private_data.history_info_map_.empty());
// Restore the cache.
EXPECT_TRUE(url_index.RestorePrivateData(index_cache));
// Compare the restored and captured for equality.
- EXPECT_EQ(history_item_count, url_index.history_item_count_);
- EXPECT_EQ(word_list.size(), url_index.word_list_.size());
- EXPECT_EQ(word_map.size(), url_index.word_map_.size());
- EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size());
- EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size());
- EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size());
+ EXPECT_EQ(word_list.size(), private_data.word_list_.size());
+ EXPECT_EQ(word_map.size(), private_data.word_map_.size());
+ EXPECT_EQ(char_word_map.size(), private_data.char_word_map_.size());
+ EXPECT_EQ(word_id_history_map.size(),
+ private_data.word_id_history_map_.size());
+ EXPECT_EQ(history_id_word_map.size(),
+ private_data.history_id_word_map_.size());
+ EXPECT_EQ(history_info_map.size(), private_data.history_info_map_.size());
// WordList must be index-by-index equal.
size_t count = word_list.size();
for (size_t i = 0; i < count; ++i)
- EXPECT_EQ(word_list[i], url_index.word_list_[i]);
- for (InMemoryURLIndex::CharWordIDMap::const_iterator expected =
- char_word_map.begin(); expected != char_word_map.end(); ++expected) {
- InMemoryURLIndex::CharWordIDMap::const_iterator actual =
- url_index.char_word_map_.find(expected->first);
- ASSERT_TRUE(url_index.char_word_map_.end() != actual);
- const InMemoryURLIndex::WordIDSet& expected_set(expected->second);
- const InMemoryURLIndex::WordIDSet& actual_set(actual->second);
- ASSERT_EQ(expected_set.size(), actual_set.size());
- for (InMemoryURLIndex::WordIDSet::const_iterator set_iter =
- expected_set.begin(); set_iter != expected_set.end(); ++set_iter)
- EXPECT_GT(actual_set.count(*set_iter), 0U);
- }
- for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected =
- word_id_history_map.begin(); expected != word_id_history_map.end();
- ++expected) {
- InMemoryURLIndex::WordIDHistoryMap::const_iterator actual =
- url_index.word_id_history_map_.find(expected->first);
- ASSERT_TRUE(url_index.word_id_history_map_.end() != actual);
- const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second);
- const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second);
- ASSERT_EQ(expected_set.size(), actual_set.size());
- for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter =
- expected_set.begin(); set_iter != expected_set.end(); ++set_iter)
- EXPECT_GT(actual_set.count(*set_iter), 0U);
- }
- for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected =
- history_info_map.begin(); expected != history_info_map.end();
- ++expected) {
- InMemoryURLIndex::HistoryInfoMap::const_iterator actual =
- url_index.history_info_map_.find(expected->first);
- ASSERT_FALSE(url_index.history_info_map_.end() == actual);
+ EXPECT_EQ(word_list[i], private_data.word_list_[i]);
+
+ ExpectMapOfContainersIdentical(char_word_map,
+ private_data.char_word_map_);
+ ExpectMapOfContainersIdentical(word_id_history_map,
+ private_data.word_id_history_map_);
+ ExpectMapOfContainersIdentical(history_id_word_map,
+ private_data.history_id_word_map_);
+
+ for (HistoryInfoMap::const_iterator expected = history_info_map.begin();
+ expected != history_info_map.end(); ++expected) {
+ HistoryInfoMap::const_iterator actual =
+ private_data.history_info_map_.find(expected->first);
+ ASSERT_FALSE(private_data.history_info_map_.end() == actual);
const URLRow& expected_row(expected->second);
const URLRow& actual_row(actual->second);
EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count());
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 403699d..0b93e23 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -1362,6 +1362,8 @@
'browser/history/in_memory_history_backend.h',
'browser/history/in_memory_url_index.cc',
'browser/history/in_memory_url_index.h',
+ 'browser/history/in_memory_url_index_types.cc',
+ 'browser/history/in_memory_url_index_types.h',
'browser/history/page_usage_data.cc',
'browser/history/page_usage_data.h',
'browser/history/query_parser.cc',
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 32a603f..fcbf94c 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1366,6 +1366,7 @@
'browser/history/history_unittest.cc',
'browser/history/history_unittest_base.cc',
'browser/history/history_unittest_base.h',
+ 'browser/history/in_memory_url_index_types_unittest.cc',
'browser/history/in_memory_url_index_unittest.cc',
'browser/history/query_parser_unittest.cc',
'browser/history/shortcuts_backend_unittest.cc',