diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-24 14:18:26 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-24 14:18:26 +0000 |
commit | 6c6e2e851954d14c5d183a624f842f55c1241296 (patch) | |
tree | 8fe3c11fac71c2ea9bfbdaf1c205a225bc4b8a84 /chrome | |
parent | 61623711e224217f24269ec1ba5554e3e908b92c (diff) | |
download | chromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.zip chromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.tar.gz chromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.tar.bz2 |
Revert 75882 - Add caching of the InMemoryURLIndex (part of the HistoryQuickProvider) part 1. (Transactions will be introduced in the next submission.) Fixed a problem in the caching of search results as the user types each character in a search term. Updated the unit test associated with that code.
Added (temporary) flag which can be used to turn on the HQP (enable-history-quick-provider); also added it to about:flags.
Previously reviewed as http://codereview.chromium.org/6286029/.
BUG=19736,60107
TEST=Added unit tests.
Review URL: http://codereview.chromium.org/6581024
TBR=mrossetti@chromium.org
Review URL: http://codereview.chromium.org/6575032
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75883 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
24 files changed, 248 insertions, 1117 deletions
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd index f974f25..fdd5287 100644 --- a/chrome/app/generated_resources.grd +++ b/chrome/app/generated_resources.grd @@ -4116,12 +4116,6 @@ Keep your key file in a safe place. You will need it to create new versions of y <message name="IDS_FLAGS_DISABLE_INTERACTIVE_FORM_VALIDATION_DESCRIPTION" desc="Description of the 'Disable HTML5 interactive form validation' lab."> Disable showing validation messages and preventing form submission. </message> - <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER" desc="Name of the 'Enable better visit history matching in the omnibox' lab."> - Enable better omnibox history matching - </message> - <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION" desc="Description of the 'Enable better visit history matching in the omnibox' lab."> - Enables substring and multi-fragment matching within URLs from history. - </message> <!-- Crashes --> <message name="IDS_CRASHES_TITLE" desc="Title for the chrome://crashes page."> diff --git a/chrome/browser/DEPS b/chrome/browser/DEPS index 8d658fc..064caf9 100644 --- a/chrome/browser/DEPS +++ b/chrome/browser/DEPS @@ -30,13 +30,12 @@ include_rules = [ "+media/audio", # Chrome's lightweight audio library. "+media/base", "+third_party/apple", # Apple code ImageAndTextCell. - "+third_party/cld", "+third_party/expat", "+third_party/gpsd", "+third_party/sqlite", "+third_party/libevent", # For the remote V8 debugging server "+third_party/libjingle", - "+third_party/protobuf/src/google/protobuf", + "+third_party/cld", "+third_party/undoview", "+v8/include", # Browser uses V8 to get the version and run the debugger. diff --git a/chrome/browser/about_flags.cc b/chrome/browser/about_flags.cc index 34e6f79..4f6999f 100644 --- a/chrome/browser/about_flags.cc +++ b/chrome/browser/about_flags.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -254,13 +254,6 @@ const Experiment kExperiments[] = { kOsMac, // TODO(crogers): add windows and linux when FFT is ready. SINGLE_VALUE_TYPE(switches::kEnableWebAudio) }, - { - "enable-history-quick-provider", - IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER, - IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION, - kOsAll, - SINGLE_VALUE_TYPE(switches::kEnableHistoryQuickProvider) - }, }; const Experiment* experiments = kExperiments; diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index e10c3c8..daad189 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -789,10 +789,8 @@ AutocompleteController::AutocompleteController( in_start_(false) { search_provider_ = new SearchProvider(this, profile); providers_.push_back(search_provider_); - if (CommandLine::ForCurrentProcess()->HasSwitch( - switches::kEnableHistoryQuickProvider) && - !CommandLine::ForCurrentProcess()->HasSwitch( - switches::kDisableHistoryQuickProvider)) + if (!CommandLine::ForCurrentProcess()->HasSwitch( + switches::kDisableHistoryQuickProvider)) providers_.push_back(new HistoryQuickProvider(this, profile)); if (!CommandLine::ForCurrentProcess()->HasSwitch( switches::kDisableHistoryURLProvider)) diff --git a/chrome/browser/autocomplete/history_provider_util.cc b/chrome/browser/autocomplete/history_provider_util.cc index a8b23d7..4325b6b 100644 --- a/chrome/browser/autocomplete/history_provider_util.cc +++ b/chrome/browser/autocomplete/history_provider_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -6,6 +6,10 @@ namespace history { +const int kLowQualityMatchTypedLimit = 1; +const int kLowQualityMatchVisitLimit = 3; +const int kLowQualityMatchAgeLimitInDays = 3; + HistoryMatch::HistoryMatch() : url_info(), input_location(string16::npos), @@ -27,4 +31,9 @@ bool HistoryMatch::operator==(const GURL& url) const { return url_info.url() == url; } -} // namespace history +base::Time AutocompleteAgeThreshold() { + return (base::Time::Now() - + base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays)); +} + +} diff --git a/chrome/browser/autocomplete/history_provider_util.h b/chrome/browser/autocomplete/history_provider_util.h index 5866df9..aa4b444 100644 --- a/chrome/browser/autocomplete/history_provider_util.h +++ b/chrome/browser/autocomplete/history_provider_util.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -13,6 +13,13 @@ namespace history { +// Constants which specify, when considered altogether, 'significant' +// history items. These are used to filter out insignificant items +// for consideration as autocomplete candidates. +extern const int kLowQualityMatchTypedLimit; +extern const int kLowQualityMatchVisitLimit; +extern const int kLowQualityMatchAgeLimitInDays; + // Used for intermediate history result operations. struct HistoryMatch { // Required for STL, we don't use this directly. @@ -64,6 +71,10 @@ struct Prefix { int num_components; }; typedef std::vector<Prefix> Prefixes; + +// Returns the date threshold for considering an history item as significant. +base::Time AutocompleteAgeThreshold(); + } #endif // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_PROVIDER_UTIL_H_ diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h index befb8bc..561336b 100644 --- a/chrome/browser/autocomplete/history_quick_provider.h +++ b/chrome/browser/autocomplete/history_quick_provider.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -22,6 +22,9 @@ class HistoryBackend; // the history system) which quickly (and synchronously) provides matching // results from recently or frequently visited sites in the profile's // history. +// +// TODO(mrossetti): Review to see if the following applies since we're not +// using the database during the autocomplete pass. class HistoryQuickProvider : public HistoryProvider { public: HistoryQuickProvider(ACProviderListener* listener, Profile* profile); diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc index c640456..d5b5e0d 100644 --- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc +++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -139,8 +139,7 @@ void HistoryQuickProviderTest::FillData() { history::SOURCE_BROWSED); } - history::InMemoryURLIndex* index = - new history::InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))); + history::InMemoryURLIndex* index = new history::InMemoryURLIndex(); PrefService* prefs = profile_->GetPrefs(); std::string languages(prefs->GetString(prefs::kAcceptLanguages)); index->Init(db, languages); diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 0dafe8e..1db38f0 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -15,7 +15,6 @@ #include "chrome/browser/history/history.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_database.h" -#include "chrome/browser/history/history_types.h" #include "chrome/browser/net/url_fixer_upper.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -690,12 +689,16 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { } void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { - const base::Time& threshold(history::AutocompleteAgeThreshold()); + Time recent_threshold = history::AutocompleteAgeThreshold(); for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) { - if (RowQualifiesAsSignificant(i->url_info, threshold)) - ++i; - else + const history::URLRow& url_info(i->url_info); + if ((url_info.typed_count() <= history::kLowQualityMatchTypedLimit) && + (url_info.visit_count() <= history::kLowQualityMatchVisitLimit) && + (url_info.last_visit() < recent_threshold)) { i = matches->erase(i); + } else { + ++i; + } } } diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index 84afba6..50f47d5 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -550,7 +550,7 @@ void HistoryBackend::InitImpl(const std::string& languages) { // Fill the in-memory database and send it back to the history service on the // main thread. InMemoryHistoryBackend* mem_backend = new InMemoryHistoryBackend; - if (mem_backend->Init(history_name, history_dir_, db_.get(), languages)) + if (mem_backend->Init(history_name, db_.get(), languages)) delegate_->SetInMemoryBackend(mem_backend); // Takes ownership of pointer. else delete mem_backend; // Error case, run without the in-memory DB. diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc index 65b1935..d3753bf 100644 --- a/chrome/browser/history/history_types.cc +++ b/chrome/browser/history/history_types.cc @@ -9,6 +9,9 @@ #include "base/logging.h" #include "base/stl_util-inl.h" +using base::Time; +using base::TimeDelta; + namespace history { // URLRow ---------------------------------------------------------------------- @@ -59,7 +62,7 @@ void URLRow::Initialize() { id_ = 0; visit_count_ = 0; typed_count_ = 0; - last_visit_ = base::Time(); + last_visit_ = Time(); hidden_ = false; favicon_id_ = 0; } @@ -76,7 +79,7 @@ VisitRow::VisitRow() } VisitRow::VisitRow(URLID arg_url_id, - base::Time arg_visit_time, + Time arg_visit_time, VisitID arg_referring_visit, PageTransition::Type arg_transition, SegmentID arg_segment_id) @@ -383,24 +386,4 @@ MostVisitedThumbnails::MostVisitedThumbnails() {} MostVisitedThumbnails::~MostVisitedThumbnails() {} -// Autocomplete thresholds ----------------------------------------------------- - -const int kLowQualityMatchTypedLimit = 1; -const int kLowQualityMatchVisitLimit = 3; -const int kLowQualityMatchAgeLimitInDays = 3; - -base::Time AutocompleteAgeThreshold() { - return (base::Time::Now() - - base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays)); -} - -bool RowQualifiesAsSignificant(const URLRow& row, - const base::Time& threshold) { - const base::Time& real_threshold = - threshold.is_null() ? AutocompleteAgeThreshold() : threshold; - return (row.typed_count() > kLowQualityMatchTypedLimit) || - (row.visit_count() > kLowQualityMatchVisitLimit) || - (row.last_visit() >= real_threshold); -} - } // namespace history diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index ed219a8..320c9910 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -664,26 +664,6 @@ class MostVisitedThumbnails DISALLOW_COPY_AND_ASSIGN(MostVisitedThumbnails); }; -// Autocomplete thresholds ----------------------------------------------------- - -// Constants which specify, when considered altogether, 'significant' -// history items. These are used to filter out insignificant items -// for consideration as autocomplete candidates. -extern const int kLowQualityMatchTypedLimit; -extern const int kLowQualityMatchVisitLimit; -extern const int kLowQualityMatchAgeLimitInDays; - -// Returns the date threshold for considering an history item as significant. -base::Time AutocompleteAgeThreshold(); - -// Return true if |row| qualifies as an autocomplete candidate. If |time_cache| -// is_null() then this function determines a new time threshold each time it is -// called. Since getting system time can be costly (such as for cases where -// this function will be called in a loop over many history items), you can -// provide a non-null |time_cache| by simply initializing |time_cache| with -// AutocompleteAgeThreshold() (or any other desired time in the past). -bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold); - } // namespace history #endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_ diff --git a/chrome/browser/history/history_types_unittest.cc b/chrome/browser/history/history_types_unittest.cc index beabcef..5e14de5 100644 --- a/chrome/browser/history/history_types_unittest.cc +++ b/chrome/browser/history/history_types_unittest.cc @@ -2,10 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "base/utf_string_conversions.h" #include "chrome/browser/history/history_types.h" #include "testing/gtest/include/gtest/gtest.h" +using base::Time; + namespace history { namespace { @@ -39,9 +40,9 @@ static const char kURL3[] = "http://images.google.com/"; void AddSimpleData(QueryResults* results) { GURL url1(kURL1); GURL url2(kURL2); - URLResult result1(url1, base::Time::Now()); - URLResult result2(url1, base::Time::Now()); - URLResult result3(url2, base::Time::Now()); + URLResult result1(url1, Time::Now()); + URLResult result2(url1, Time::Now()); + URLResult result3(url2, Time::Now()); // The URLResults are invalid after being inserted. results->AppendURLBySwapping(&result1); @@ -54,8 +55,8 @@ void AddSimpleData(QueryResults* results) { void AddAlternateData(QueryResults* results) { GURL url2(kURL2); GURL url3(kURL3); - URLResult result1(url2, base::Time::Now()); - URLResult result2(url3, base::Time::Now()); + URLResult result1(url2, Time::Now()); + URLResult result2(url3, Time::Now()); // The URLResults are invalid after being inserted. results->AppendURLBySwapping(&result1); @@ -167,32 +168,4 @@ TEST(HistoryQueryResult, AppendResults) { EXPECT_TRUE(matches[0] == 3); } -TEST(HistoryQueryResult, RowSignificance) { - const base::Time& threshold(AutocompleteAgeThreshold()); - const GURL url("http://www.google.com/"); - URLRow url_row(url); - url_row.set_title(UTF8ToUTF16("Google")); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_visit_count(kLowQualityMatchVisitLimit + 1); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_visit_count(1); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_typed_count(kLowQualityMatchTypedLimit + 1); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_typed_count(0); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_last_visit(base::Time::Now() - base::TimeDelta::FromDays(1)); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); - url_row.set_last_visit(base::Time::Now() - - base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1)); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); - EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); -} - } // namespace diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h index 026fd7e..6e56636 100644 --- a/chrome/browser/history/in_memory_history_backend.h +++ b/chrome/browser/history/in_memory_history_backend.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -7,7 +7,7 @@ // in-line autocomplete. // // It is created on the history thread and passed to the main thread where -// operations can be completed synchronously. It listens for notifications +// operations can be completed synchronously. It listenes for notifications // from the "regular" history backend and keeps itself in sync. #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_ @@ -41,15 +41,8 @@ class InMemoryHistoryBackend : public NotificationObserver { InMemoryHistoryBackend(); ~InMemoryHistoryBackend(); - // Initializes the backend from the history database pointed to by the - // full path in |history_filename|. |history_dir| is the path to the - // directory containing the history database and is also used - // as the directory where the InMemoryURLIndex's cache is kept. |db| is - // used for building the InMemoryURLIndex. |languages| gives the - // preferred user languages with which URLs and page titles are - // interpreted while decomposing into words and characters during indexing. + // Initializes with data from the given history database. bool Init(const FilePath& history_filename, - const FilePath& history_dir, URLDatabase* db, const std::string& languages); diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 0458699..9d2e925 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,43 +8,22 @@ #include <iterator> #include <limits> -#include "base/file_util.h" #include "base/i18n/break_iterator.h" -#include "base/metrics/histogram.h" #include "base/string_util.h" #include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/url_database.h" -#include "chrome/browser/profiles/profile.h" #include "net/base/escape.h" #include "net/base/net_util.h" -#include "third_party/protobuf/src/google/protobuf/repeated_field.h" #include "ui/base/l10n/l10n_util.h" +#include "unicode/utypes.h" // for int32_t -using google::protobuf::RepeatedField; -using google::protobuf::RepeatedPtrField; +using base::Time; +using base::TimeDelta; -using in_memory_url_index::InMemoryURLIndexCacheItem; namespace history { -typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; -typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; -typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; -typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem; -typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry - CharWordMapEntry; -typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem - WordIDHistoryMapItem; -typedef imui:: - InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry - WordIDHistoryMapEntry; -typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; -typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry - HistoryInfoMapEntry; - -const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; - ScoredHistoryMatch::ScoredHistoryMatch() : raw_score(0) {} ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info, @@ -57,49 +36,53 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info, } struct InMemoryURLIndex::TermCharWordSet { - TermCharWordSet() // Required for STL resize(). - : char_(0), - word_id_set_(), - used_(false) {} - TermCharWordSet(const char16& uni_char, + TermCharWordSet(const Char16Set& char_set, const WordIDSet& word_id_set, bool used) - : char_(uni_char), + : char_set_(char_set), word_id_set_(word_id_set), used_(used) {} - // Predicate for STL algorithm use. - bool is_not_used() const { return !used_; } + bool IsNotUsed() const { return !used_; } - char16 char_; + Char16Set char_set_; WordIDSet word_id_set_; bool used_; // true if this set has been used for the current term search. }; -InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) - : history_dir_(history_dir), - history_item_count_(0) { -} - -// Called only by unit tests. -InMemoryURLIndex::InMemoryURLIndex() - : history_item_count_(0) { -} +InMemoryURLIndex::InMemoryURLIndex() : history_item_count_(0) {} InMemoryURLIndex::~InMemoryURLIndex() {} +static const int32_t kURLToLowerBufferSize = 2048; + // Indexing bool InMemoryURLIndex::Init(history::URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; - return ReloadFromHistory(history_db, false); -} - -void InMemoryURLIndex::ShutDown() { - // Write our cache. - SaveToCacheFile(); + // Reset our indexes. + char_word_map_.clear(); + word_id_history_map_.clear(); + if (!history_db) + return false; + URLDatabase::URLEnumerator history_enum; + if (history_db->InitURLEnumeratorForEverything(&history_enum)) { + URLRow row; + Time recent_threshold = InMemoryURLIndex::RecentThreshold(); + while (history_enum.GetNextURL(&row)) { + // Do some filtering so that we only get history items which could + // possibly pass the HistoryURLProvider::CullPoorMatches filter later. + if ((row.typed_count() > kLowQualityMatchTypedLimit) || + (row.visit_count() > kLowQualityMatchVisitLimit) || + (row.last_visit() >= recent_threshold)) { + if (!IndexRow(row)) + return false; + } + } + } + return true; } bool InMemoryURLIndex::IndexRow(const URLRow& row) { @@ -119,7 +102,7 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_[history_id] = new_row; + history_info_map_.insert(std::make_pair(history_id, new_row)); // Split into individual, unique words. String16Set words = WordSetFromString16(url); @@ -135,149 +118,6 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { return true; } -bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, - bool clear_cache) { - ClearPrivateData(); - - if (!history_db) - return false; - - if (clear_cache || !RestoreFromCacheFile()) { - base::TimeTicks beginning_time = base::TimeTicks::Now(); - // The index has to be built from scratch. - URLDatabase::URLEnumerator history_enum; - if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) - return false; - URLRow row; - while (history_enum.GetNextURL(&row)) { - if (!IndexRow(row)) - return false; - } - UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", - base::TimeTicks::Now() - beginning_time); - SaveToCacheFile(); - } - return true; -} - -void InMemoryURLIndex::ClearPrivateData() { - history_item_count_ = 0; - word_list_.clear(); - word_map_.clear(); - char_word_map_.clear(); - word_id_history_map_.clear(); - term_char_word_set_cache_.clear(); - history_info_map_.clear(); -} - -bool InMemoryURLIndex::RestoreFromCacheFile() { - // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. - // That is: ensure that the database has not been modified since the cache - // was last saved. DB file modification date is inadequate. There are no - // SQLite table checksums automatically stored. - base::TimeTicks beginning_time = base::TimeTicks::Now(); - FilePath file_path; - if (!GetCacheFilePath(&file_path)) - return false; - std::string data; - if (!file_util::ReadFileToString(file_path, &data)) { - LOG(WARNING) << "Failed to read InMemoryURLIndex cache from " - << file_path.value(); - return false; - } - - InMemoryURLIndexCacheItem index_cache; - if (!index_cache.ParseFromArray(data.c_str(), data.size())) { - LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " - << file_path.value(); - return false; - } - - if (!RestorePrivateData(index_cache)) { - ClearPrivateData(); // Back to square one -- must build from scratch. - return false; - } - - UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", - base::TimeTicks::Now() - beginning_time); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); - UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); - return true; -} - -bool InMemoryURLIndex::SaveToCacheFile() { - base::TimeTicks beginning_time = base::TimeTicks::Now(); - InMemoryURLIndexCacheItem index_cache; - SavePrivateData(&index_cache); - std::string data; - if (!index_cache.SerializeToString(&data)) { - LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; - return false; - } - - // Write the cache to a file then swap it for the old cache, if any, and - // delete the old cache. - FilePath file_path; - if (!GetCacheFilePath(&file_path)) - return false; - file_util::ScopedFILE file(file_util::OpenFile(file_path, "w")); - if (!file.get()) - return false; - - int size = data.size(); - if (file_util::WriteFile(file_path, data.c_str(), size) != size) { - LOG(WARNING) << "Failed to write " << file_path.value(); - return false; - } - UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", - base::TimeTicks::Now() - beginning_time); - return true; -} - -void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { - // The row may or may not already be in our index. If it is not already - // indexed and it qualifies then it gets indexed. If it is already - // indexed and still qualifies then it gets updated, otherwise it - // is deleted from the index. - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); - if (row_pos == history_info_map_.end()) { - // This new row should be indexed if it qualifies. - if (RowQualifiesAsSignificant(row, base::Time())) - IndexRow(row); - } else if (RowQualifiesAsSignificant(row, base::Time())) { - // This indexed row still qualifies and will be re-indexed. - // The url won't have changed but the title, visit count, etc. - // might have changed. - URLRow& old_row = row_pos->second; - old_row.set_visit_count(row.visit_count()); - old_row.set_typed_count(row.typed_count()); - old_row.set_last_visit(row.last_visit()); - // TODO(mrossetti): When we start indexing the title the next line - // will need attention. - old_row.set_title(row.title()); - } else { - // This indexed row no longer qualifies and will be de-indexed. - history_info_map_.erase(row_id); - } - // This invalidates the cache. - term_char_word_set_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. -} - -void InMemoryURLIndex::DeleteURL(URLID row_id) { - // Note that this does not remove any reference to this row from the - // word_id_history_map_. That map will continue to contain (and return) - // hits against this row until that map is rebuilt, but since the - // history_info_map_ no longer references the row no erroneous results - // will propagate to the user. - history_info_map_.erase(row_id); - // This invalidates the word cache. - term_char_word_set_cache_.clear(); - // TODO(mrossetti): Record this transaction in the cache. -} - // Searching ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( @@ -315,7 +155,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( term_char_word_set_cache_.erase( std::remove_if(term_char_word_set_cache_.begin(), term_char_word_set_cache_.end(), - std::mem_fun_ref(&TermCharWordSet::is_not_used)), + std::mem_fun_ref(&TermCharWordSet::IsNotUsed)), term_char_word_set_cache_.end()); return scored_items; } @@ -366,10 +206,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& uni_word) { InMemoryURLIndex::HistoryIDSet history_id_set; - // For each unique character in the word, in order of first appearance, get - // the char/word_id map entry and intersect with the set in an incremental - // manner. - Char16Vector uni_chars = Char16VectorFromString16(uni_word); + // For each character in the word, get the char/word_id map entry and + // intersect with the set in an incremental manner. + Char16Set uni_chars = CharactersFromString16(uni_word); WordIDSet word_id_set(WordIDSetForTermChars(uni_chars)); // If any words resulted then we can compose a set of history IDs by unioning @@ -407,22 +246,7 @@ InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( } // static -InMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16( - const string16& uni_word) { - InMemoryURLIndex::Char16Vector characters; - InMemoryURLIndex::Char16Set unique_characters; - for (string16::const_iterator iter = uni_word.begin(); - iter != uni_word.end(); ++iter) { - if (!unique_characters.count(*iter)) { - unique_characters.insert(*iter); - characters.push_back(*iter); - } - } - return characters; -} - -// static -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( +InMemoryURLIndex::Char16Set InMemoryURLIndex::CharactersFromString16( const string16& uni_word) { Char16Set characters; for (string16::const_iterator iter = uni_word.begin(); @@ -453,17 +277,17 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word, HistoryID history_id) { word_list_.push_back(uni_word); WordID word_id = word_list_.size() - 1; - word_map_[uni_word] = word_id; + word_map_.insert(std::make_pair(uni_word, word_id)); HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_[word_id] = history_id_set; + word_id_history_map_.insert(std::make_pair(word_id, history_id_set)); // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. - Char16Set characters = Char16SetFromString16(uni_word); + Char16Set characters = CharactersFromString16(uni_word); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { - Char16Set::value_type uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); + Char16Set::value_type uni_string = *uni_char_iter; + CharWordIDMap::iterator char_iter = char_word_map_.find(uni_string); if (char_iter != char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); @@ -472,79 +296,89 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_[uni_char] = word_id_set; + char_word_map_.insert(std::make_pair(uni_string, word_id_set)); } } } InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - const InMemoryURLIndex::Char16Vector& uni_chars) { - size_t index = CachedResultsIndexForTerm(uni_chars); - - // If there were no unprocessed characters in the search term |uni_chars| - // then we can use the cached one as-is as the results with no further - // filtering. - if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1) - return term_char_word_set_cache_[index].word_id_set_; - - // Some or all of the characters remain to be indexed so trim the cache. - if (index + 1 < term_char_word_set_cache_.size()) - term_char_word_set_cache_.resize(index + 1); - WordIDSet word_id_set; - // Take advantage of our cached starting point, if any. - Char16Vector::const_iterator c_iter = uni_chars.begin(); - if (index != kNoCachedResultForTerm) { - word_id_set = term_char_word_set_cache_[index].word_id_set_; - c_iter += index + 1; - } - // Now process the remaining characters in the search term. - for (; c_iter != uni_chars.end(); ++c_iter) { - Char16Vector::value_type uni_char = *c_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter == char_word_map_.end()) { - // A character was not found so there are no matching results: bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - // It is possible for there to no longer be any words associated with - // a particular character. Give up in that case. - if (char_word_id_set.empty()) { - word_id_set.clear(); - break; + InMemoryURLIndex::Char16Set const& uni_chars) { + TermCharWordSet* best_term_char_word_set = NULL; + bool set_found = false; + size_t outstanding_count = 0; + Char16Set outstanding_chars; + for (TermCharWordSetVector::iterator iter = term_char_word_set_cache_.begin(); + iter != term_char_word_set_cache_.end(); ++iter) { + TermCharWordSetVector::value_type& term_char_word_set(*iter); + Char16Set& char_set(term_char_word_set.char_set_); + Char16Set exclusions; + std::set_difference(char_set.begin(), char_set.end(), + uni_chars.begin(), uni_chars.end(), + std::inserter(exclusions, exclusions.begin())); + if (exclusions.empty()) { + // Do a reverse difference so that we know which characters remain + // to be indexed. Then decide if this is a better match than any + // previous cached set. + std::set_difference(uni_chars.begin(), uni_chars.end(), + char_set.begin(), char_set.end(), + std::inserter(exclusions, exclusions.begin())); + if (!set_found || exclusions.size() < outstanding_count) { + outstanding_chars = exclusions; + best_term_char_word_set = &*iter; + outstanding_count = exclusions.size(); + set_found = true; + } } + } - if (word_id_set.empty()) { - word_id_set = char_word_id_set; + WordIDSet word_id_set; + if (set_found && outstanding_count == 0) { + // If there were no oustanding characters then we can use the cached one. + best_term_char_word_set->used_ = true; + word_id_set = best_term_char_word_set->word_id_set_; + } else { + // Some or all of the characters remain to be indexed. + bool first_character = true; + if (set_found) { + first_character = false; + word_id_set = best_term_char_word_set->word_id_set_; } else { - WordIDSet old_word_id_set(word_id_set); - word_id_set.clear(); - std::set_intersection(old_word_id_set.begin(), - old_word_id_set.end(), - char_word_id_set.begin(), - char_word_id_set.end(), - std::inserter(word_id_set, - word_id_set.begin())); + outstanding_chars = uni_chars; + } + + for (Char16Set::iterator uni_char_iter = outstanding_chars.begin(); + uni_char_iter != outstanding_chars.end(); ++uni_char_iter) { + Char16Set::value_type uni_char = *uni_char_iter; + CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); + if (char_iter == char_word_map_.end()) { + // The character was not found so bail. + word_id_set.clear(); + break; + } + WordIDSet& char_word_id_set(char_iter->second); + if (first_character) { + word_id_set = char_word_id_set; + first_character = false; + } else { + WordIDSet old_word_id_set(word_id_set); + word_id_set.clear(); + std::set_intersection(old_word_id_set.begin(), + old_word_id_set.end(), + char_word_id_set.begin(), + char_word_id_set.end(), + std::inserter(word_id_set, + word_id_set.begin())); + } + } + // Update the cache. + if (!set_found || outstanding_count) { + term_char_word_set_cache_.push_back(TermCharWordSet(uni_chars, + word_id_set, true)); } - // Add this new char/set instance to the cache. - term_char_word_set_cache_.push_back(TermCharWordSet( - uni_char, word_id_set, true)); } return word_id_set; } -size_t InMemoryURLIndex::CachedResultsIndexForTerm( - const InMemoryURLIndex::Char16Vector& uni_chars) { - TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin(); - for (Char16Vector::const_iterator c_iter = uni_chars.begin(); - (c_iter != uni_chars.end()) && - (t_iter != term_char_word_set_cache_.end()) && - (*c_iter == t_iter->char_); - ++c_iter, ++t_iter) - t_iter->used_ = true; // Update the cache. - return t_iter - term_char_word_set_cache_.begin() - 1; -} - // static int InMemoryURLIndex::RawScoreForURL(const URLRow& row, const String16Vector& terms, @@ -643,9 +477,8 @@ int InMemoryURLIndex::RawScoreForURL(const URLRow& row, } // static -base::Time InMemoryURLIndex::RecentThreshold() { - return base::Time::Now() - - base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays); +Time InMemoryURLIndex::RecentThreshold() { + return Time::Now() - TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays); } InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( @@ -661,9 +494,6 @@ void InMemoryURLIndex::AddHistoryMatch::operator()( const InMemoryURLIndex::HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = index_.history_info_map_.find(history_id); - // Note that a history_id may be present in the word_id_history_map_ yet not - // be found in the history_info_map_. This occurs when an item has been - // deleted by the user or the item no longer qualifies as a quick result. if (hist_pos != index_.history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; // TODO(mrossetti): Accommodate multiple term highlighting. @@ -700,227 +530,4 @@ void InMemoryURLIndex::AddHistoryMatch::operator()( } } -bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { - if (history_dir_.empty()) - return false; - *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); - return true; -} - -void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { - DCHECK(cache); - cache->set_timestamp(base::Time::Now().ToInternalValue()); - cache->set_history_item_count(history_item_count_); - SaveWordList(cache); - SaveWordMap(cache); - SaveCharWordMap(cache); - SaveWordIDHistoryMap(cache); - SaveHistoryInfoMap(cache); -} - -bool InMemoryURLIndex::RestorePrivateData( - const InMemoryURLIndexCacheItem& cache) { - last_saved_ = base::Time::FromInternalValue(cache.timestamp()); - history_item_count_ = cache.history_item_count(); - return (history_item_count_ == 0) || RestoreWordList(cache) && - RestoreWordMap(cache) && RestoreCharWordMap(cache) && - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache); -} - - -void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { - if (word_list_.empty()) - return; - WordListItem* list_item = cache->mutable_word_list(); - list_item->set_word_count(word_list_.size()); - for (String16Vector::const_iterator iter = word_list_.begin(); - iter != word_list_.end(); ++iter) - list_item->add_word(UTF16ToUTF8(*iter)); -} - -bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { - if (!cache.has_word_list()) - return false; - const WordListItem& list_item(cache.word_list()); - uint32 expected_item_count = list_item.word_count(); - uint32 actual_item_count = list_item.word_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - const RepeatedPtrField<std::string>& words(list_item.word()); - for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); - iter != words.end(); ++iter) - word_list_.push_back(UTF8ToUTF16(*iter)); - return true; -} - -void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { - if (word_map_.empty()) - return; - WordMapItem* map_item = cache->mutable_word_map(); - map_item->set_item_count(word_map_.size()); - for (WordMap::const_iterator iter = word_map_.begin(); - iter != word_map_.end(); ++iter) { - WordMapEntry* map_entry = map_item->add_word_map_entry(); - map_entry->set_word(UTF16ToUTF8(iter->first)); - map_entry->set_word_id(iter->second); - } -} - -bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { - if (!cache.has_word_map()) - return false; - const WordMapItem& list_item(cache.word_map()); - uint32 expected_item_count = list_item.item_count(); - uint32 actual_item_count = list_item.word_map_entry_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); - for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); - iter != entries.end(); ++iter) - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); - return true; -} - -void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { - if (char_word_map_.empty()) - return; - CharWordMapItem* map_item = cache->mutable_char_word_map(); - map_item->set_item_count(char_word_map_.size()); - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); - iter != char_word_map_.end(); ++iter) { - CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); - map_entry->set_char_16(iter->first); - const WordIDSet& word_id_set(iter->second); - map_entry->set_item_count(word_id_set.size()); - for (WordIDSet::const_iterator set_iter = word_id_set.begin(); - set_iter != word_id_set.end(); ++set_iter) - map_entry->add_word_id(*set_iter); - } -} - -bool InMemoryURLIndex::RestoreCharWordMap( - const InMemoryURLIndexCacheItem& cache) { - if (!cache.has_char_word_map()) - return false; - const CharWordMapItem& list_item(cache.char_word_map()); - uint32 expected_item_count = list_item.item_count(); - uint32 actual_item_count = list_item.char_word_map_entry_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - const RepeatedPtrField<CharWordMapEntry>& - entries(list_item.char_word_map_entry()); - for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = - entries.begin(); iter != entries.end(); ++iter) { - expected_item_count = iter->item_count(); - actual_item_count = iter->word_id_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - char16 uni_char = static_cast<char16>(iter->char_16()); - WordIDSet word_id_set; - const RepeatedField<int32>& word_ids(iter->word_id()); - for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); - jiter != word_ids.end(); ++jiter) - word_id_set.insert(*jiter); - char_word_map_[uni_char] = word_id_set; - } - return true; -} - -void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) - const { - if (word_id_history_map_.empty()) - return; - WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); - map_item->set_item_count(word_id_history_map_.size()); - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); - iter != word_id_history_map_.end(); ++iter) { - WordIDHistoryMapEntry* map_entry = - map_item->add_word_id_history_map_entry(); - map_entry->set_word_id(iter->first); - const HistoryIDSet& history_id_set(iter->second); - map_entry->set_item_count(history_id_set.size()); - for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); - set_iter != history_id_set.end(); ++set_iter) - map_entry->add_history_id(*set_iter); - } -} - -bool InMemoryURLIndex::RestoreWordIDHistoryMap( - const InMemoryURLIndexCacheItem& cache) { - if (!cache.has_word_id_history_map()) - return false; - const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); - uint32 expected_item_count = list_item.item_count(); - uint32 actual_item_count = list_item.word_id_history_map_entry_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - const RepeatedPtrField<WordIDHistoryMapEntry>& - entries(list_item.word_id_history_map_entry()); - for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = - entries.begin(); iter != entries.end(); ++iter) { - expected_item_count = iter->item_count(); - actual_item_count = iter->history_id_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - WordID word_id = iter->word_id(); - HistoryIDSet history_id_set; - const RepeatedField<int64>& history_ids(iter->history_id()); - for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); - jiter != history_ids.end(); ++jiter) - history_id_set.insert(*jiter); - word_id_history_map_[word_id] = history_id_set; - } - return true; -} - -void InMemoryURLIndex::SaveHistoryInfoMap( - InMemoryURLIndexCacheItem* cache) const { - if (history_info_map_.empty()) - return; - HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); - map_item->set_item_count(history_info_map_.size()); - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); - iter != history_info_map_.end(); ++iter) { - HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); - map_entry->set_history_id(iter->first); - const URLRow& url_row(iter->second); - // Note: We only save information that contributes to the index so there - // is no need to save term_char_word_set_cache_ (not persistent), - // languages_, etc. - map_entry->set_visit_count(url_row.visit_count()); - map_entry->set_typed_count(url_row.typed_count()); - map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); - map_entry->set_url(url_row.url().spec()); - map_entry->set_title(UTF16ToUTF8(url_row.title())); - } -} - -bool InMemoryURLIndex::RestoreHistoryInfoMap( - const InMemoryURLIndexCacheItem& cache) { - if (!cache.has_history_info_map()) - return false; - const HistoryInfoMapItem& list_item(cache.history_info_map()); - uint32 expected_item_count = list_item.item_count(); - uint32 actual_item_count = list_item.history_info_map_entry_size(); - if (actual_item_count == 0 || actual_item_count != expected_item_count) - return false; - const RepeatedPtrField<HistoryInfoMapEntry>& - entries(list_item.history_info_map_entry()); - for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = - entries.begin(); iter != entries.end(); ++iter) { - HistoryID history_id = iter->history_id(); - GURL url(iter->url()); - URLRow url_row(url, history_id); - url_row.set_visit_count(iter->visit_count()); - url_row.set_typed_count(iter->typed_count()); - url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); - if (iter->has_title()) { - string16 title(UTF8ToUTF16(iter->title())); - url_row.set_title(title); - } - history_info_map_[history_id] = url_row; - } - return true; -} - } // namespace history diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index bd84188..81336f1 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,30 +14,19 @@ #include "app/sql/connection.h" #include "base/basictypes.h" -#include "base/file_path.h" -#include "base/gtest_prod_util.h" #include "base/linked_ptr.h" #include "base/scoped_ptr.h" #include "base/string16.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/history_types.h" -#include "chrome/browser/history/in_memory_url_index_cache.pb.h" #include "testing/gtest/include/gtest/gtest_prod.h" -class Profile; - namespace base { class Time; } -namespace in_memory_url_index { -class InMemoryURLIndexCacheItem; -} - namespace history { -namespace imui = in_memory_url_index; - class URLDatabase; // Used for intermediate history result operations. @@ -78,39 +67,22 @@ typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; // multi-char16 instance. class InMemoryURLIndex { public: - // |history_dir| is a path to the directory containing the history database - // within the profile wherein the cache and transaction journals will be - // stored. - explicit InMemoryURLIndex(const FilePath& history_dir); + InMemoryURLIndex(); ~InMemoryURLIndex(); // Convenience types typedef std::vector<string16> String16Vector; - // Opens and indexes the URL history database. + // Open and index the URL history database. // |languages| gives a list of language encodings with which the history // URLs and omnibox searches are interpreted, i.e. when each is broken // down into words and each word is broken down into characters. bool Init(URLDatabase* history_db, const std::string& languages); - // Reloads the history index. Attempts to reload from the cache unless - // |clear_cache| is true. If the cache is unavailable then reload the - // index from |history_db|. - bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache); + // Reset the history index. + void Reset(); - // Signals that any outstanding initialization should be canceled and - // flushes the cache to disk. - void ShutDown(); - - // Restores the index's private data from the cache file stored in the - // profile directory and returns true if successful. - bool RestoreFromCacheFile(); - - // Caches the index private data and writes the cache file to the profile - // directory. - bool SaveToCacheFile(); - - // Given a vector containing one or more words as string16s, scans the + // Given a vector containing one or more words as string16s, scan the // history index and return a vector with all scored, matching history items. // Each term must occur somewhere in the history item for the item to // qualify; however, the terms do not necessarily have to be adjacent. @@ -120,36 +92,17 @@ class InMemoryURLIndex { // Returns the date threshold for considering an history item as significant. static base::Time RecentThreshold(); - // Updates or adds an history item to the index if it meets the minimum - // 'quick' criteria. - void UpdateURL(URLID row_id, const URLRow& row); - - // Deletes indexing data for an history item. The item may not have actually - // been indexed (which is the case if it did not previously meet minimum - // 'quick' criteria). - void DeleteURL(URLID row_id); - private: friend class AddHistoryMatch; - FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Initialization); - FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); - FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); - FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); - FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); - - // Signals that there are no previously cached results for the typed term. - static const size_t kNoCachedResultForTerm; - - // Creating one of me without a history path is not allowed (tests excepted). - InMemoryURLIndex(); + friend class InMemoryURLIndexTest; + FRIEND_TEST(InMemoryURLIndexTest, Initialization); // Convenience types. typedef std::set<string16> String16Set; typedef std::set<char16> Char16Set; - typedef std::vector<char16> Char16Vector; // An index into list of all of the words we have indexed. - typedef int WordID; + typedef int16 WordID; // A map allowing a WordID to be determined given a word. typedef std::map<string16, WordID> WordMap; @@ -165,16 +118,8 @@ class InMemoryURLIndex { typedef std::set<HistoryID> HistoryIDSet; typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; - // Support caching of term character results so that we can optimize - // searches which build upon a previous search. Each entry in this vector - // represents a progressive reduction of the result set for each unique - // character found in the search term, with each character being taken as - // initially encountered. For example, once the results for the search - // term of "mextexarkana" have been fully determined, this vector will - // contain one entry for the characters: m, e, x, t, a, r, k, & n, in - // that order. The result sets will naturally shrink in size for each - // subsequent character as the sets intersections are taken in an - // incremental manner. + // Support caching of term character intersections so that we can optimize + // searches which build upon a previous search. struct TermCharWordSet; typedef std::vector<TermCharWordSet> TermCharWordSetVector; @@ -203,37 +148,20 @@ class InMemoryURLIndex { const String16Vector& lower_terms_; }; - // Initializes all index data members in preparation for restoring the index - // from the cache or a complete rebuild from the history database. - void ClearPrivateData(); - - // Breaks a string down into individual words. + // Break a string down into individual words. static String16Set WordSetFromString16(const string16& uni_string); - // Given a vector of Char16s, representing the characters the user has typed - // into the omnibox, finds words containing those characters. If any - // existing, cached set is a proper subset then starts with that cached - // set. Updates the previously-typed-character cache. - WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars); - - // Given a vector of Char16s in |uni_chars|, compare those characters, in - // order, with the previously searched term, returning the index of the - // cached results in the term_char_word_set_cache_ of the set with best - // matching number of characters. Returns kNoCachedResultForTerm if there - // was no match at all (i.e. the first character of |uni-chars| is not the - // same as the character of the first entry in term_char_word_set_cache_). - size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars); + // Given a set of Char16s, find words containing those characters. If any + // existing, cached set is a proper subset then start with that cached + // set. Update the cache. + WordIDSet WordIDSetForTermChars(const Char16Set& uni_chars); // URL History indexing support functions. - // Indexes one URL history item. + // Index one URL history item. bool IndexRow(const URLRow& row); - // Breaks a string down into unique, individual characters in the order - // in which the characters are first encountered in the |uni_word| string. - static Char16Vector Char16VectorFromString16(const string16& uni_word); - - // Breaks the |uni_word| string down into its individual characters. + // Break a string down into its individual characters. // Note that this is temporarily intended to work on a single word, but // _will_ work on a string of words, perhaps with unexpected results. // TODO(mrossetti): Lots of optimizations possible here for not restarting @@ -241,26 +169,26 @@ class InMemoryURLIndex { // and properly handle substring matches, scoring and sorting the results // by score. Also, provide the metrics for where the matches occur so that // the UI can highlight the matched sections. - static Char16Set Char16SetFromString16(const string16& uni_word); + Char16Set CharactersFromString16(const string16& uni_word); - // Given a single word in |uni_word|, adds a reference for the containing - // history item identified by |history_id| to the index. + // Given a single word, add a reference to the containing history item + // to the index. void AddWordToIndex(const string16& uni_word, HistoryID history_id); - // Updates an existing entry in the word/history index by adding the + // Update an existing entry in the word/history index by adding the // |history_id| to set for |word_id| in the word_id_history_map_. void UpdateWordHistory(WordID word_id, HistoryID history_id); - // Creates a new entry in the word/history map for |word_id| and add + // Create a new entry in the word/history map for |word_id| and add // |history_id| as the initial element of the word's set. void AddWordHistory(const string16& uni_word, HistoryID history_id); - // Clears the search term cache. This cache holds on to the intermediate + // Clear the search term cache. This cache holds on to the intermediate // word results for each previously typed character to eliminate the need // to re-perform set operations for previously typed characters. void ResetTermCharWordSetCache(); - // Composes a set of history item IDs by intersecting the set for each word + // Compose a set of history item IDs by intersecting the set for each word // in |uni_string|. HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); @@ -268,7 +196,7 @@ class InMemoryURLIndex { // ids for the given term given in |uni_word|. HistoryIDSet HistoryIDsForTerm(const string16& uni_word); - // Calculates a raw score for this history item by first determining + // Calculate a raw score for this history item by first determining // if all of the terms in |terms_vector| occur in |row| and, if so, // calculating a raw score based on 1) starting position of each term // in the user input, 2) completeness of each term's match, 3) ordering @@ -282,53 +210,15 @@ class InMemoryURLIndex { const String16Vector& terms_vector, size_t* first_term_location); - // Utility functions supporting RestoreFromCache and SaveToCache. - - // Construct a file path for the cache file within the same directory where - // the history database is kept and saves that path to |file_path|. Returns - // true if |file_path| can be successfully constructed. (This function - // provided as a hook for unit testing.) - bool GetCacheFilePath(FilePath* file_path); - - // Encode a data structure into the protobuf |cache|. - void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; - void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; - void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; - void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; - void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; - void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; - - // Decode a data structure from the protobuf |cache|. Return false if there - // is any kind of failure. - bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); - bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); - bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); - bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); - bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); - bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); - - // Directory where cache file resides. This is, except when unit testing, - // the same directory in which the profile's history database is found. It - // should never be empty. - FilePath history_dir_; - - // The timestamp of when the cache was last saved. This is used to validate - // the transaction journal's applicability to the cache. The timestamp is - // initialized to the NULL time, indicating that the cache was not used with - // the InMemoryURLIndex was last populated. - base::Time last_saved_; - // A list of all of indexed words. The index of a word in this list is the // ID of the word in the word_map_. It reduces the memory overhead by // replacing a potentially long and repeated string with a simple index. // NOTE: A word will _never_ be removed from this vector thus insuring // the immutability of the word_id throughout the session, reducing // maintenance complexity. - // TODO(mrossetti): Profile the vector allocation and determine if judicious - // 'reserve' calls are called for. String16Vector word_list_; - int history_item_count_; + uint64 history_item_count_; WordMap word_map_; CharWordIDMap char_word_map_; WordIDHistoryMap word_id_history_map_; diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index d495db9..9306ec5 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,7 +11,6 @@ #include "app/sql/connection.h" #include "app/sql/statement.h" #include "app/sql/transaction.h" -#include "base/file_path.h" #include "base/file_util.h" #include "base/path_service.h" #include "base/scoped_ptr.h" @@ -33,6 +32,9 @@ // Note that only lines whose first character is an upper-case letter are // processed when creating the test database. +using base::Time; +using base::TimeDelta; + namespace history { class InMemoryURLIndexTest : public testing::Test, @@ -81,15 +83,15 @@ class InMemoryURLIndexTest : public testing::Test, sql::Statement statement(db.GetUniqueStatement( "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;")); EXPECT_TRUE(statement); - base::Time time_right_now = base::Time::NowFromSystemTime(); - base::TimeDelta day_delta = base::TimeDelta::FromDays(1); + Time time_right_now = Time::NowFromSystemTime(); + TimeDelta day_delta = TimeDelta::FromDays(1); { sql::Transaction transaction(&db); transaction.Begin(); while (statement.Step()) { URLRow row; FillURLRow(statement, &row); - base::Time last_visit = time_right_now; + Time last_visit = time_right_now; for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i) last_visit -= day_delta; row.set_last_visit(last_visit); @@ -103,7 +105,7 @@ class InMemoryURLIndexTest : public testing::Test, }; TEST_F(InMemoryURLIndexTest, Construction) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_.reset(new InMemoryURLIndex); EXPECT_TRUE(url_index_.get()); } @@ -115,261 +117,68 @@ TEST_F(InMemoryURLIndexTest, Initialization) { EXPECT_TRUE(statement); uint64 row_count = 0; while (statement.Step()) ++row_count; - EXPECT_EQ(row_count, 33U); + EXPECT_EQ(33U, row_count); url_index_.reset(new InMemoryURLIndex); url_index_->Init(this, "en,ja,hi,zh"); - EXPECT_EQ(url_index_->history_item_count_, 28); + + // There should have been 27 of the 31 urls accepted during filtering. + EXPECT_EQ(28U, url_index_->history_item_count_); // history_info_map_ should have the same number of items as were filtered. - EXPECT_EQ(url_index_->history_info_map_.size(), 28U); - EXPECT_EQ(url_index_->char_word_map_.size(), 37U); - EXPECT_EQ(url_index_->word_map_.size(), 91U); + EXPECT_EQ(28U, url_index_->history_info_map_.size()); + + // The resulting indexes should account for: + // 37 characters + // 88 words + EXPECT_EQ(37U, url_index_->char_word_map_.size()); + EXPECT_EQ(91U, url_index_->word_map_.size()); } TEST_F(InMemoryURLIndexTest, Retrieval) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); - url_index_->Init(this, "en,ja,hi,zh"); + url_index_.reset(new InMemoryURLIndex); + std::string languages("en,ja,hi,zh"); + url_index_->Init(this, languages); InMemoryURLIndex::String16Vector terms; // The term will be lowercased by the search. // See if a very specific term gives a single result. - terms.push_back(ASCIIToUTF16("DrudgeReport")); - EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); + string16 term = ASCIIToUTF16("DrudgeReport"); + terms.push_back(term); + ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); + EXPECT_EQ(1U, matches.size()); // Search which should result in multiple results. terms.clear(); - terms.push_back(ASCIIToUTF16("drudge")); - ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 2U); + term = ASCIIToUTF16("drudge"); + terms.push_back(term); + matches = url_index_->HistoryItemsForTerms(terms); + ASSERT_EQ(2U, matches.size()); // The results should be in descending score order. EXPECT_GT(matches[0].raw_score, matches[1].raw_score); // Search which should result in nearly perfect result. terms.clear(); - terms.push_back(ASCIIToUTF16("http")); - terms.push_back(ASCIIToUTF16("NearlyPerfectResult")); + term = ASCIIToUTF16("http"); + terms.push_back(term); + term = ASCIIToUTF16("NearlyPerfectResult"); + terms.push_back(term); matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(matches.size(), 1U); + ASSERT_EQ(1U, matches.size()); // The results should have a very high score. EXPECT_GT(matches[0].raw_score, 900); // Search which should result in very poor result. terms.clear(); - terms.push_back(ASCIIToUTF16("z")); - terms.push_back(ASCIIToUTF16("y")); - terms.push_back(ASCIIToUTF16("x")); + term = ASCIIToUTF16("z"); + terms.push_back(term); + term = ASCIIToUTF16("y"); + terms.push_back(term); + term = ASCIIToUTF16("x"); + terms.push_back(term); matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(matches.size(), 1U); + ASSERT_EQ(1U, matches.size()); // The results should have a poor score. EXPECT_LT(matches[0].raw_score, 200); } -TEST_F(InMemoryURLIndexTest, Char16Utilities) { - string16 term = ASCIIToUTF16("drudgereport"); - string16 expected = ASCIIToUTF16("drugepot"); - EXPECT_EQ(expected.size(), - InMemoryURLIndex::Char16SetFromString16(term).size()); - InMemoryURLIndex::Char16Vector c_vector = - InMemoryURLIndex::Char16VectorFromString16(term); - ASSERT_EQ(expected.size(), c_vector.size()); - - InMemoryURLIndex::Char16Vector::const_iterator c_iter = c_vector.begin(); - for (string16::const_iterator s_iter = expected.begin(); - s_iter != expected.end(); ++s_iter, ++c_iter) - EXPECT_EQ(*s_iter, *c_iter); -} - -TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { - // Verify that match results for previously typed characters are retained - // (in the term_char_word_set_cache_) and reused, if possible, in future - // autocompletes. - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); - url_index_->Init(this, "en,ja,hi,zh"); - - // Verify that we can find something that already exists. - InMemoryURLIndex::String16Vector terms; - string16 term = ASCIIToUTF16("drudgerepo"); - terms.push_back(term); - EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); - - { - // Exercise the term matching cache with the same term. - InMemoryURLIndex::Char16Vector uni_chars = - InMemoryURLIndex::Char16VectorFromString16(term); - EXPECT_EQ(uni_chars.size(), 7U); // Equivalent to 'degopru' - EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U); - } - - { - // Back off a character. - InMemoryURLIndex::Char16Vector uni_chars = - InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("drudgerep")); - EXPECT_EQ(uni_chars.size(), 6U); // Equivalent to 'degpru' - EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 5U); - } - - { - // Add a couple of characters. - InMemoryURLIndex::Char16Vector uni_chars = - InMemoryURLIndex::Char16VectorFromString16( - ASCIIToUTF16("drudgereporta")); - EXPECT_EQ(uni_chars.size(), 9U); // Equivalent to 'adegoprtu' - EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U); - } - - { - // Use different string. - InMemoryURLIndex::Char16Vector uni_chars = - InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("abcde")); - EXPECT_EQ(uni_chars.size(), 5U); - EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), - static_cast<size_t>(-1)); - } -} - -TEST_F(InMemoryURLIndexTest, AddNewRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); - url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; - - // Verify that the row we're going to add does not already exist. - URLID new_row_id = 87654321; - // Newly created URLRows get a last_visit time of 'right now' so it should - // qualify as a quick result candidate. - terms.push_back(ASCIIToUTF16("brokeandalone")); - EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); - - // Add a new row. - URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id); - new_row.set_last_visit(base::Time::Now()); - url_index_->UpdateURL(new_row_id, new_row); - - // Verify that we can retrieve it. - EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); - - // Add it again just to be sure that is harmless. - url_index_->UpdateURL(new_row_id, new_row); - EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); -} - -TEST_F(InMemoryURLIndexTest, DeleteRows) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); - url_index_->Init(this, "en,ja,hi,zh"); - InMemoryURLIndex::String16Vector terms; - - // Make sure we actually get an existing result. - terms.push_back(ASCIIToUTF16("DrudgeReport")); - ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); - EXPECT_EQ(matches.size(), 1U); - - // Determine the row id for that result, delete that id, then search again. - url_index_->DeleteURL(matches[0].url_info.id()); - EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); -} - -TEST_F(InMemoryURLIndexTest, CacheFilePath) { - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( - "/flammmy/frammy/")))); - FilePath full_file_path; - url_index_->GetCacheFilePath(&full_file_path); - std::vector<FilePath::StringType> expected_parts; - FilePath(FILE_PATH_LITERAL("/flammmy/frammy/History Provider Cache")). - GetComponents(&expected_parts); - std::vector<FilePath::StringType> actual_parts; - full_file_path.GetComponents(&actual_parts); - ASSERT_EQ(expected_parts.size(), actual_parts.size()); - size_t count = expected_parts.size(); - for (size_t i = 0; i < count; ++i) - EXPECT_EQ(expected_parts[i], actual_parts[i]); -} - -TEST_F(InMemoryURLIndexTest, CacheSaveRestore) { - // Save the cache to a protobuf, restore it, and compare the results. - url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); - InMemoryURLIndex& url_index(*(url_index_.get())); - url_index.Init(this, "en,ja,hi,zh"); - in_memory_url_index::InMemoryURLIndexCacheItem index_cache; - url_index.SavePrivateData(&index_cache); - - // Capture our private data so we can later compare for equality. - int history_item_count(url_index.history_item_count_); - InMemoryURLIndex::String16Vector word_list(url_index.word_list_); - InMemoryURLIndex::WordMap word_map(url_index.word_map_); - InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_); - InMemoryURLIndex::WordIDHistoryMap word_id_history_map( - url_index.word_id_history_map_); - InMemoryURLIndex::HistoryInfoMap history_info_map( - url_index.history_info_map_); - - // Prove that there is really something there. - EXPECT_GT(url_index.history_item_count_, 0); - EXPECT_FALSE(url_index.word_list_.empty()); - EXPECT_FALSE(url_index.word_map_.empty()); - EXPECT_FALSE(url_index.char_word_map_.empty()); - EXPECT_FALSE(url_index.word_id_history_map_.empty()); - EXPECT_FALSE(url_index.history_info_map_.empty()); - - // Clear and then prove it's clear. - url_index.ClearPrivateData(); - EXPECT_EQ(url_index.history_item_count_, 0); - EXPECT_TRUE(url_index.word_list_.empty()); - EXPECT_TRUE(url_index.word_map_.empty()); - EXPECT_TRUE(url_index.char_word_map_.empty()); - EXPECT_TRUE(url_index.word_id_history_map_.empty()); - EXPECT_TRUE(url_index.history_info_map_.empty()); - - // Restore the cache. - EXPECT_TRUE(url_index.RestorePrivateData(index_cache)); - - // Compare the restored and captured for equality. - EXPECT_EQ(history_item_count, url_index.history_item_count_); - EXPECT_EQ(word_list.size(), url_index.word_list_.size()); - EXPECT_EQ(word_map.size(), url_index.word_map_.size()); - EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size()); - EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size()); - EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size()); - // WordList must be index-by-index equal. - size_t count = word_list.size(); - for (size_t i = 0; i < count; ++i) - EXPECT_EQ(word_list[i], url_index.word_list_[i]); - for (InMemoryURLIndex::CharWordIDMap::const_iterator expected = - char_word_map.begin(); expected != char_word_map.end(); ++expected) { - InMemoryURLIndex::CharWordIDMap::const_iterator actual = - url_index.char_word_map_.find(expected->first); - ASSERT_TRUE(url_index.char_word_map_.end() != actual); - const InMemoryURLIndex::WordIDSet& expected_set(expected->second); - const InMemoryURLIndex::WordIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::WordIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected = - word_id_history_map.begin(); expected != word_id_history_map.end(); - ++expected) { - InMemoryURLIndex::WordIDHistoryMap::const_iterator actual = - url_index.word_id_history_map_.find(expected->first); - ASSERT_TRUE(url_index.word_id_history_map_.end() != actual); - const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second); - const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second); - ASSERT_EQ(expected_set.size(), actual_set.size()); - for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter = - expected_set.begin(); set_iter != expected_set.end(); ++set_iter) - EXPECT_GT(actual_set.count(*set_iter), 0U); - } - for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected = - history_info_map.begin(); expected != history_info_map.end(); - ++expected) { - InMemoryURLIndex::HistoryInfoMap::const_iterator actual = - url_index.history_info_map_.find(expected->first); - ASSERT_FALSE(url_index.history_info_map_.end() == actual); - const URLRow& expected_row(expected->second); - const URLRow& actual_row(actual->second); - EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count()); - EXPECT_EQ(expected_row.typed_count(), actual_row.typed_count()); - EXPECT_EQ(expected_row.last_visit(), actual_row.last_visit()); - EXPECT_EQ(expected_row.url(), actual_row.url()); - } -} - } // namespace history diff --git a/chrome/browser/history/url_database.cc b/chrome/browser/history/url_database.cc index f6c041c..24decc6 100644 --- a/chrome/browser/history/url_database.cc +++ b/chrome/browser/history/url_database.cc @@ -238,25 +238,6 @@ bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { return true; } -bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { - DCHECK(!enumerator->initialized_); - std::string sql("SELECT "); - sql.append(kURLRowFields); - sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count > ? OR " - "typed_count > ?"); - enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); - if (!enumerator->statement_) { - NOTREACHED() << GetDB().GetErrorMessage(); - return false; - } - enumerator->statement_.BindInt64( - 0, AutocompleteAgeThreshold().ToInternalValue()); - enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); - enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); - enumerator->initialized_ = true; - return true; -} - bool URLDatabase::IsFavIconUsed(FavIconID favicon_id) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT id FROM urls WHERE favicon_id=? LIMIT 1")); diff --git a/chrome/browser/history/url_database.h b/chrome/browser/history/url_database.h index c821ce7..ae1d7cd 100644 --- a/chrome/browser/history/url_database.h +++ b/chrome/browser/history/url_database.h @@ -127,14 +127,8 @@ class URLDatabase { sql::Statement statement_; }; - // Initializes the given enumerator to enumerator all URLs in the database. - bool InitURLEnumeratorForEverything(URLEnumerator* enumerator); - // Initializes the given enumerator to enumerator all URLs in the database - // that are historically significant: ones having been visited within 3 days, - // having their URL manually typed more than once, or having been visited - // more than 3 times. - bool InitURLEnumeratorForSignificant(URLEnumerator* enumerator); + bool InitURLEnumeratorForEverything(URLEnumerator* enumerator); // Favicons ------------------------------------------------------------------ diff --git a/chrome/browser/history/url_database_unittest.cc b/chrome/browser/history/url_database_unittest.cc index c2f22ff6..b164e48 100644 --- a/chrome/browser/history/url_database_unittest.cc +++ b/chrome/browser/history/url_database_unittest.cc @@ -125,7 +125,8 @@ TEST_F(URLDatabaseTest, AddURL) { // Tests adding, querying and deleting keyword visits. TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { - URLRow url_info1(GURL("http://www.google.com/")); + const GURL url1("http://www.google.com/"); + URLRow url_info1(url1); url_info1.set_title(UTF8ToUTF16("Google")); url_info1.set_visit_count(4); url_info1.set_typed_count(2); @@ -164,7 +165,8 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { // Make sure deleting a URL also deletes a keyword visit. TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) { - URLRow url_info1(GURL("http://www.google.com/")); + const GURL url1("http://www.google.com/"); + URLRow url_info1(url1); url_info1.set_title(UTF8ToUTF16("Google")); url_info1.set_visit_count(4); url_info1.set_typed_count(2); @@ -185,41 +187,4 @@ TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) { ASSERT_EQ(0U, matches.size()); } -TEST_F(URLDatabaseTest, EnumeratorForSignificant) { - std::set<std::string> good_urls; - // Add URLs which do and don't meet the criteria. - URLRow url_no_match(GURL("http://www.url_no_match.com/")); - EXPECT_TRUE(AddURL(url_no_match)); - - std::string url_string2("http://www.url_match_visit_count.com/"); - good_urls.insert("http://www.url_match_visit_count.com/"); - URLRow url_match_visit_count(GURL("http://www.url_match_visit_count.com/")); - url_match_visit_count.set_visit_count(kLowQualityMatchVisitLimit + 1); - EXPECT_TRUE(AddURL(url_match_visit_count)); - - good_urls.insert("http://www.url_match_typed_count.com/"); - URLRow url_match_typed_count(GURL("http://www.url_match_typed_count.com/")); - url_match_typed_count.set_typed_count(kLowQualityMatchTypedLimit + 1); - EXPECT_TRUE(AddURL(url_match_typed_count)); - - good_urls.insert("http://www.url_match_last_visit.com/"); - URLRow url_match_last_visit(GURL("http://www.url_match_last_visit.com/")); - url_match_last_visit.set_last_visit(Time::Now() - TimeDelta::FromDays(1)); - EXPECT_TRUE(AddURL(url_match_last_visit)); - - URLRow url_no_match_last_visit(GURL( - "http://www.url_no_match_last_visit.com/")); - url_no_match_last_visit.set_last_visit(Time::Now() - - TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1)); - EXPECT_TRUE(AddURL(url_no_match_last_visit)); - - URLDatabase::URLEnumerator history_enum; - EXPECT_TRUE(InitURLEnumeratorForSignificant(&history_enum)); - URLRow row; - int row_count = 0; - for (; history_enum.GetNextURL(&row); ++row_count) - EXPECT_EQ(1U, good_urls.count(row.url().spec())); - EXPECT_EQ(3, row_count); -} - } // namespace history diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index 972f1bc..b290923 100644 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -17,7 +17,6 @@ 'common', 'common_net', 'debugger', - 'in_memory_url_index_cache_proto', 'installer_util', 'platform_locale_settings', 'profile_import', @@ -1233,7 +1232,6 @@ 'browser/history/in_memory_history_backend.h', 'browser/history/in_memory_url_index.cc', 'browser/history/in_memory_url_index.h', - '<(protoc_out_dir)/chrome/browser/history/in_memory_url_index_cache.pb.cc', 'browser/history/page_usage_data.cc', 'browser/history/page_usage_data.h', 'browser/history/query_parser.cc', @@ -4497,52 +4495,6 @@ '../third_party/protobuf/protobuf.gyp:protobuf_lite', ], }, - { - # Protobuf compiler / generator for the InMemoryURLIndex caching - # protocol buffer. - 'target_name': 'in_memory_url_index_cache_proto', - 'type': 'none', - 'sources': [ 'browser/history/in_memory_url_index_cache.proto' ], - 'rules': [ - { - 'rule_name': 'genproto', - 'extension': 'proto', - 'inputs': [ - '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', - ], - 'variables': { - # The protoc compiler requires a proto_path argument with the - # directory containing the .proto file. - # There's no generator variable that corresponds to this, so fake - # it. - 'rule_input_relpath': 'browser/history', - }, - 'outputs': [ - '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.h', - '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.cc', - ], - 'action': [ - '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)', - '--proto_path=./<(rule_input_relpath)', - './<(rule_input_relpath)/<(RULE_INPUT_ROOT)<(RULE_INPUT_EXT)', - '--cpp_out=<(protoc_out_dir)/chrome/<(rule_input_relpath)', - ], - 'message': 'Generating C++ code from <(RULE_INPUT_PATH)', - }, - ], - 'dependencies': [ - '../third_party/protobuf/protobuf.gyp:protobuf_lite', - '../third_party/protobuf/protobuf.gyp:protoc#host', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - '<(protoc_out_dir)', - ] - }, - 'export_dependent_settings': [ - '../third_party/protobuf/protobuf.gyp:protobuf_lite', - ], - }, ], } diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index cb7c26f..8c99ce1 100644 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -1137,7 +1137,6 @@ ], }, 'sources': [ - '<(protoc_out_dir)/chrome/browser/history/in_memory_url_index_cache.pb.cc', 'app/breakpad_mac_stubs.mm', 'app/chrome_dll.rc', # All unittests in browser, common, renderer and service. diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc index 08f61d2..e3e325d 100644 --- a/chrome/common/chrome_switches.cc +++ b/chrome/common/chrome_switches.cc @@ -519,9 +519,6 @@ const char kEnableFastback[] = "enable-fastback"; // testing, for example page cycler and layout tests. See bug 1157243. const char kEnableFileCookies[] = "enable-file-cookies"; -// Enable the use of the HistoryQuickProvider for autocomplete results. -const char kEnableHistoryQuickProvider[] = "enable-history-quick-provider"; - // Enable FileSystem API URLs. const char kEnableFileSystemURLScheme[] = "enable-filesystem-url-scheme"; diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h index 4c21463..4504f2c 100644 --- a/chrome/common/chrome_switches.h +++ b/chrome/common/chrome_switches.h @@ -157,7 +157,6 @@ extern const char kEnableFastback[]; extern const char kEnableFileCookies[]; extern const char kEnableFileSystemURLScheme[]; extern const char kEnableGPUPlugin[]; -extern const char kEnableHistoryQuickProvider[]; extern const char kEnableInBrowserThumbnailing[]; extern const char kEnableIPv6[]; extern const char kEnableJavaScriptI18NAPI[]; |