diff options
author | Kristian Monsen <kristianm@google.com> | 2011-06-09 11:47:42 +0100 |
---|---|---|
committer | Kristian Monsen <kristianm@google.com> | 2011-06-29 14:33:03 +0100 |
commit | dc0f95d653279beabeb9817299e2902918ba123e (patch) | |
tree | 32eb121cd532053a5b9cb0c390331349af8d6baa /chrome/browser/history | |
parent | ba160cd4054d13d0cb0b1b46e61c3bed67095811 (diff) | |
download | external_chromium-dc0f95d653279beabeb9817299e2902918ba123e.zip external_chromium-dc0f95d653279beabeb9817299e2902918ba123e.tar.gz external_chromium-dc0f95d653279beabeb9817299e2902918ba123e.tar.bz2 |
Merge Chromium at r11.0.696.0: Initial merge by git
Change-Id: I273dde2843af0839dfc08b419bb443fbd449532d
Diffstat (limited to 'chrome/browser/history')
30 files changed, 1168 insertions, 252 deletions
diff --git a/chrome/browser/history/download_create_info.cc b/chrome/browser/history/download_create_info.cc index d1ea28e..eaa007a 100644 --- a/chrome/browser/history/download_create_info.cc +++ b/chrome/browser/history/download_create_info.cc @@ -31,7 +31,8 @@ DownloadCreateInfo::DownloadCreateInfo(const FilePath& path, request_id(-1), db_handle(0), prompt_user_for_save_location(false), - is_dangerous(false), + is_dangerous_file(false), + is_dangerous_url(false), is_extension_install(false) { } @@ -47,13 +48,17 @@ DownloadCreateInfo::DownloadCreateInfo() request_id(-1), db_handle(0), prompt_user_for_save_location(false), - is_dangerous(false), + is_dangerous_file(false), + is_dangerous_url(false), is_extension_install(false) { } DownloadCreateInfo::~DownloadCreateInfo() { } +bool DownloadCreateInfo::IsDangerous() { + return is_dangerous_url || is_dangerous_file; +} std::string DownloadCreateInfo::DebugString() const { return base::StringPrintf("{" " url_ = \"%s\"" @@ -76,4 +81,3 @@ std::string DownloadCreateInfo::DebugString() const { download_id, prompt_user_for_save_location ? 'T' : 'F'); } - diff --git a/chrome/browser/history/download_create_info.h b/chrome/browser/history/download_create_info.h index 0814350..455e77a 100644 --- a/chrome/browser/history/download_create_info.h +++ b/chrome/browser/history/download_create_info.h @@ -32,6 +32,9 @@ struct DownloadCreateInfo { DownloadCreateInfo(); ~DownloadCreateInfo(); + // Indicates if the download is dangerous. + bool IsDangerous(); + std::string DebugString() const; // DownloadItem fields @@ -68,8 +71,10 @@ struct DownloadCreateInfo { // False if the UI should be supressed and the download performed to the // default location. bool prompt_user_for_save_location; - // Whether this download is potentially dangerous (ex: exe, dll, ...). - bool is_dangerous; + // Whether this download file is potentially dangerous (ex: exe, dll, ...). + bool is_dangerous_file; + // If safebrowsing believes this URL leads to malware. + bool is_dangerous_url; // The original name for a dangerous download. FilePath original_name; // Whether this download is for extension install or not. diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc index d71535b..cf080a8 100644 --- a/chrome/browser/history/expire_history_backend_unittest.cc +++ b/chrome/browser/history/expire_history_backend_unittest.cc @@ -15,7 +15,6 @@ #include "base/string16.h" #include "base/utf_string_conversions.h" #include "chrome/browser/bookmarks/bookmark_model.h" -#include "chrome/browser/browser_thread.h" #include "chrome/browser/history/archived_database.h" #include "chrome/browser/history/expire_history_backend.h" #include "chrome/browser/history/history_database.h" @@ -26,6 +25,7 @@ #include "chrome/common/thumbnail_score.h" #include "chrome/test/testing_profile.h" #include "chrome/tools/profiles/thumbnail-inl.h" +#include "content/browser/browser_thread.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/skia/include/core/SkBitmap.h" #include "ui/gfx/codec/jpeg_codec.h" diff --git a/chrome/browser/history/history.cc b/chrome/browser/history/history.cc index 09434e1..4bbf6a8 100644 --- a/chrome/browser/history/history.cc +++ b/chrome/browser/history/history.cc @@ -33,7 +33,6 @@ #include "chrome/browser/autocomplete/history_url_provider.h" #include "chrome/browser/browser_list.h" #include "chrome/browser/browser_process.h" -#include "chrome/browser/browser_thread.h" #include "chrome/browser/browser_window.h" #include "chrome/browser/history/download_create_info.h" #include "chrome/browser/history/history_backend.h" @@ -50,6 +49,7 @@ #include "chrome/common/pref_names.h" #include "chrome/common/thumbnail_score.h" #include "chrome/common/url_constants.h" +#include "content/browser/browser_thread.h" #include "grit/chromium_strings.h" #include "grit/generated_resources.h" #include "third_party/skia/include/core/SkBitmap.h" diff --git a/chrome/browser/history/history.h b/chrome/browser/history/history.h index 6afe318..ce0d29e 100644 --- a/chrome/browser/history/history.h +++ b/chrome/browser/history/history.h @@ -16,7 +16,6 @@ #include "base/scoped_ptr.h" #include "base/string16.h" #include "base/task.h" -#include "chrome/browser/cancelable_request.h" #include "chrome/browser/favicon_service.h" #include "chrome/browser/history/history_types.h" #include "chrome/browser/search_engines/template_url_id.h" @@ -24,6 +23,7 @@ #include "chrome/common/notification_registrar.h" #include "chrome/common/page_transition_types.h" #include "chrome/common/ref_counted_util.h" +#include "content/browser/cancelable_request.h" class BookmarkService; struct DownloadCreateInfo; diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc index 50f47d5..dd7bb12 100644 --- a/chrome/browser/history/history_backend.cc +++ b/chrome/browser/history/history_backend.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -376,7 +376,7 @@ void HistoryBackend::AddPage(scoped_refptr<HistoryAddPageArgs> request) { // If a redirect chain is given, we expect the last item in that chain to be // the final URL. - DCHECK(request->redirects.size() == 0 || + DCHECK(request->redirects.empty() || request->redirects.back() == request->url); // Avoid duplicating times in the database, at least as long as pages are @@ -550,7 +550,7 @@ void HistoryBackend::InitImpl(const std::string& languages) { // Fill the in-memory database and send it back to the history service on the // main thread. InMemoryHistoryBackend* mem_backend = new InMemoryHistoryBackend; - if (mem_backend->Init(history_name, db_.get(), languages)) + if (mem_backend->Init(history_name, history_dir_, db_.get(), languages)) delegate_->SetInMemoryBackend(mem_backend); // Takes ownership of pointer. else delete mem_backend; // Error case, run without the in-memory DB. diff --git a/chrome/browser/history/history_browsertest.cc b/chrome/browser/history/history_browsertest.cc index d5dc39b..5a72806 100644 --- a/chrome/browser/history/history_browsertest.cc +++ b/chrome/browser/history/history_browsertest.cc @@ -5,7 +5,6 @@ #include <vector> #include "base/message_loop.h" -#include "chrome/browser/browser_thread.h" #include "chrome/browser/history/history.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" @@ -13,6 +12,7 @@ #include "chrome/common/pref_names.h" #include "chrome/test/in_process_browser_test.h" #include "chrome/test/ui_test_utils.h" +#include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" namespace { diff --git a/chrome/browser/history/history_marshaling.h b/chrome/browser/history/history_marshaling.h index 1efd670..b270b74 100644 --- a/chrome/browser/history/history_marshaling.h +++ b/chrome/browser/history/history_marshaling.h @@ -10,10 +10,10 @@ #pragma once #include "base/scoped_vector.h" -#include "chrome/browser/cancelable_request.h" #include "chrome/browser/favicon_service.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/page_usage_data.h" +#include "content/browser/cancelable_request.h" namespace history { diff --git a/chrome/browser/history/history_publisher.cc b/chrome/browser/history/history_publisher.cc index 0392632..9698517 100644 --- a/chrome/browser/history/history_publisher.cc +++ b/chrome/browser/history/history_publisher.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2008-2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -29,13 +29,11 @@ void HistoryPublisher::PublishPageContent(const base::Time& time, const GURL& url, const string16& title, const string16& contents) const { - std::wstring wide_title = UTF16ToWide(title); - std::wstring wide_contents = UTF16ToWide(contents); PageData page_data = { time, url, - wide_contents.c_str(), - wide_title.c_str(), + contents.c_str(), + title.c_str(), NULL, NULL, }; diff --git a/chrome/browser/history/history_publisher.h b/chrome/browser/history/history_publisher.h index fdf94b7..f5ac086 100644 --- a/chrome/browser/history/history_publisher.h +++ b/chrome/browser/history/history_publisher.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -46,8 +46,8 @@ class HistoryPublisher { struct PageData { const base::Time& time; const GURL& url; - const wchar_t* html; - const wchar_t* title; + const char16* html; + const char16* title; const char* thumbnail_format; const std::vector<unsigned char>* thumbnail; }; diff --git a/chrome/browser/history/history_publisher_win.cc b/chrome/browser/history/history_publisher_win.cc index 4aeb333..f54be58 100644 --- a/chrome/browser/history/history_publisher_win.cc +++ b/chrome/browser/history/history_publisher_win.cc @@ -105,7 +105,7 @@ bool HistoryPublisher::ReadRegisteredIndexersFromRegistry() { kRegKeyRegisteredIndexersInfo, &indexers_); AddRegisteredIndexers(HKEY_LOCAL_MACHINE, kRegKeyRegisteredIndexersInfo, &indexers_); - return indexers_.size() > 0; + return !indexers_.empty(); } void HistoryPublisher::PublishDataToIndexers(const PageData& page_data) diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc index d3753bf..9f56ff0 100644 --- a/chrome/browser/history/history_types.cc +++ b/chrome/browser/history/history_types.cc @@ -9,9 +9,6 @@ #include "base/logging.h" #include "base/stl_util-inl.h" -using base::Time; -using base::TimeDelta; - namespace history { // URLRow ---------------------------------------------------------------------- @@ -62,7 +59,7 @@ void URLRow::Initialize() { id_ = 0; visit_count_ = 0; typed_count_ = 0; - last_visit_ = Time(); + last_visit_ = base::Time(); hidden_ = false; favicon_id_ = 0; } @@ -79,7 +76,7 @@ VisitRow::VisitRow() } VisitRow::VisitRow(URLID arg_url_id, - Time arg_visit_time, + base::Time arg_visit_time, VisitID arg_referring_visit, PageTransition::Type arg_transition, SegmentID arg_segment_id) @@ -177,7 +174,7 @@ const size_t* QueryResults::MatchesForURL(const GURL& url, // All entries in the map should have at least one index, otherwise it // shouldn't be in the map. - DCHECK(found->second->size() > 0); + DCHECK(!found->second->empty()); if (num_matches) *num_matches = found->second->size(); return &found->second->front(); @@ -386,4 +383,24 @@ MostVisitedThumbnails::MostVisitedThumbnails() {} MostVisitedThumbnails::~MostVisitedThumbnails() {} +// Autocomplete thresholds ----------------------------------------------------- + +const int kLowQualityMatchTypedLimit = 1; +const int kLowQualityMatchVisitLimit = 3; +const int kLowQualityMatchAgeLimitInDays = 3; + +base::Time AutocompleteAgeThreshold() { + return (base::Time::Now() - + base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays)); +} + +bool RowQualifiesAsSignificant(const URLRow& row, + const base::Time& threshold) { + const base::Time& real_threshold = + threshold.is_null() ? AutocompleteAgeThreshold() : threshold; + return (row.typed_count() > kLowQualityMatchTypedLimit) || + (row.visit_count() > kLowQualityMatchVisitLimit) || + (row.last_visit() >= real_threshold); +} + } // namespace history diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h index 320c991..ed219a8 100644 --- a/chrome/browser/history/history_types.h +++ b/chrome/browser/history/history_types.h @@ -664,6 +664,26 @@ class MostVisitedThumbnails DISALLOW_COPY_AND_ASSIGN(MostVisitedThumbnails); }; +// Autocomplete thresholds ----------------------------------------------------- + +// Constants which specify, when considered altogether, 'significant' +// history items. These are used to filter out insignificant items +// for consideration as autocomplete candidates. +extern const int kLowQualityMatchTypedLimit; +extern const int kLowQualityMatchVisitLimit; +extern const int kLowQualityMatchAgeLimitInDays; + +// Returns the date threshold for considering an history item as significant. +base::Time AutocompleteAgeThreshold(); + +// Return true if |row| qualifies as an autocomplete candidate. If |time_cache| +// is_null() then this function determines a new time threshold each time it is +// called. Since getting system time can be costly (such as for cases where +// this function will be called in a loop over many history items), you can +// provide a non-null |time_cache| by simply initializing |time_cache| with +// AutocompleteAgeThreshold() (or any other desired time in the past). +bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold); + } // namespace history #endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_ diff --git a/chrome/browser/history/history_types_unittest.cc b/chrome/browser/history/history_types_unittest.cc index 5e14de5..beabcef 100644 --- a/chrome/browser/history/history_types_unittest.cc +++ b/chrome/browser/history/history_types_unittest.cc @@ -2,11 +2,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "base/utf_string_conversions.h" #include "chrome/browser/history/history_types.h" #include "testing/gtest/include/gtest/gtest.h" -using base::Time; - namespace history { namespace { @@ -40,9 +39,9 @@ static const char kURL3[] = "http://images.google.com/"; void AddSimpleData(QueryResults* results) { GURL url1(kURL1); GURL url2(kURL2); - URLResult result1(url1, Time::Now()); - URLResult result2(url1, Time::Now()); - URLResult result3(url2, Time::Now()); + URLResult result1(url1, base::Time::Now()); + URLResult result2(url1, base::Time::Now()); + URLResult result3(url2, base::Time::Now()); // The URLResults are invalid after being inserted. results->AppendURLBySwapping(&result1); @@ -55,8 +54,8 @@ void AddSimpleData(QueryResults* results) { void AddAlternateData(QueryResults* results) { GURL url2(kURL2); GURL url3(kURL3); - URLResult result1(url2, Time::Now()); - URLResult result2(url3, Time::Now()); + URLResult result1(url2, base::Time::Now()); + URLResult result2(url3, base::Time::Now()); // The URLResults are invalid after being inserted. results->AppendURLBySwapping(&result1); @@ -168,4 +167,32 @@ TEST(HistoryQueryResult, AppendResults) { EXPECT_TRUE(matches[0] == 3); } +TEST(HistoryQueryResult, RowSignificance) { + const base::Time& threshold(AutocompleteAgeThreshold()); + const GURL url("http://www.google.com/"); + URLRow url_row(url); + url_row.set_title(UTF8ToUTF16("Google")); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_visit_count(kLowQualityMatchVisitLimit + 1); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_visit_count(1); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_typed_count(kLowQualityMatchTypedLimit + 1); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_typed_count(0); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_last_visit(base::Time::Now() - base::TimeDelta::FromDays(1)); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time())); + url_row.set_last_visit(base::Time::Now() - + base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1)); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold)); + EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time())); +} + } // namespace diff --git a/chrome/browser/history/in_memory_history_backend.cc b/chrome/browser/history/in_memory_history_backend.cc index cfeb618..937e861 100644 --- a/chrome/browser/history/in_memory_history_backend.cc +++ b/chrome/browser/history/in_memory_history_backend.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,7 +8,6 @@ #include <vector> #include "base/command_line.h" -#include "base/metrics/histogram.h" #include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/browser_process.h" @@ -32,25 +31,24 @@ InMemoryHistoryBackend::InMemoryHistoryBackend() } InMemoryHistoryBackend::~InMemoryHistoryBackend() { + if (index_.get()) + index_->ShutDown(); } bool InMemoryHistoryBackend::Init(const FilePath& history_filename, + const FilePath& history_dir, URLDatabase* db, const std::string& languages) { db_.reset(new InMemoryDatabase); bool success = db_->InitFromDisk(history_filename); -#if 0 - // TODO(mrossetti): Temporary removal to help determine if the - // InMemoryURLIndex is contributing to a startup slowdown. - if (!CommandLine::ForCurrentProcess()->HasSwitch( - switches::kDisableHistoryQuickProvider)) { - index_.reset(new InMemoryURLIndex()); - base::TimeTicks beginning_time = base::TimeTicks::Now(); + if (CommandLine::ForCurrentProcess()->HasSwitch( + switches::kEnableHistoryQuickProvider) && + !CommandLine::ForCurrentProcess()->HasSwitch( + switches::kDisableHistoryQuickProvider)) { + index_.reset(new InMemoryURLIndex(history_dir)); + index_->Init(db, languages); - UMA_HISTOGRAM_TIMES("Autocomplete.HistoryDatabaseIndexingTime", - base::TimeTicks::Now() - beginning_time); } -#endif return success; } @@ -134,7 +132,9 @@ void InMemoryHistoryBackend::OnTypedURLsModified( if (id) db_->UpdateURLRow(id, *i); else - db_->AddURL(*i); + id = db_->AddURL(*i); + if (index_.get()) + index_->UpdateURL(id, *i); } } @@ -147,6 +147,8 @@ void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) { db_.reset(new InMemoryDatabase); if (!db_->InitFromScratch()) db_.reset(); + if (index_.get()) + index_->ReloadFromHistory(db_.get(), true); return; } @@ -158,6 +160,8 @@ void InMemoryHistoryBackend::OnURLsDeleted(const URLsDeletedDetails& details) { // We typically won't have most of them since we only have a subset of // history, so ignore errors. db_->DeleteURLRow(id); + if (index_.get()) + index_->DeleteURL(id); } } } diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h index 6e56636..026fd7e 100644 --- a/chrome/browser/history/in_memory_history_backend.h +++ b/chrome/browser/history/in_memory_history_backend.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -7,7 +7,7 @@ // in-line autocomplete. // // It is created on the history thread and passed to the main thread where -// operations can be completed synchronously. It listenes for notifications +// operations can be completed synchronously. It listens for notifications // from the "regular" history backend and keeps itself in sync. #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_ @@ -41,8 +41,15 @@ class InMemoryHistoryBackend : public NotificationObserver { InMemoryHistoryBackend(); ~InMemoryHistoryBackend(); - // Initializes with data from the given history database. + // Initializes the backend from the history database pointed to by the + // full path in |history_filename|. |history_dir| is the path to the + // directory containing the history database and is also used + // as the directory where the InMemoryURLIndex's cache is kept. |db| is + // used for building the InMemoryURLIndex. |languages| gives the + // preferred user languages with which URLs and page titles are + // interpreted while decomposing into words and characters during indexing. bool Init(const FilePath& history_filename, + const FilePath& history_dir, URLDatabase* db, const std::string& languages); diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 9d2e925..7cbae31 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,22 +8,43 @@ #include <iterator> #include <limits> +#include "base/file_util.h" #include "base/i18n/break_iterator.h" +#include "base/metrics/histogram.h" #include "base/string_util.h" #include "base/time.h" #include "base/utf_string_conversions.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/url_database.h" +#include "chrome/browser/profiles/profile.h" #include "net/base/escape.h" #include "net/base/net_util.h" +#include "third_party/protobuf/src/google/protobuf/repeated_field.h" #include "ui/base/l10n/l10n_util.h" -#include "unicode/utypes.h" // for int32_t -using base::Time; -using base::TimeDelta; +using google::protobuf::RepeatedField; +using google::protobuf::RepeatedPtrField; +using in_memory_url_index::InMemoryURLIndexCacheItem; namespace history { +typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; +typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; +typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; +typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem; +typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry + CharWordMapEntry; +typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem + WordIDHistoryMapItem; +typedef imui:: + InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry + WordIDHistoryMapEntry; +typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; +typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry + HistoryInfoMapEntry; + +const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; + ScoredHistoryMatch::ScoredHistoryMatch() : raw_score(0) {} ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info, @@ -36,25 +57,36 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info, } struct InMemoryURLIndex::TermCharWordSet { - TermCharWordSet(const Char16Set& char_set, + TermCharWordSet() // Required for STL resize(). + : char_(0), + word_id_set_(), + used_(false) {} + TermCharWordSet(const char16& uni_char, const WordIDSet& word_id_set, bool used) - : char_set_(char_set), + : char_(uni_char), word_id_set_(word_id_set), used_(used) {} - bool IsNotUsed() const { return !used_; } + // Predicate for STL algorithm use. + bool is_not_used() const { return !used_; } - Char16Set char_set_; + char16 char_; WordIDSet word_id_set_; bool used_; // true if this set has been used for the current term search. }; -InMemoryURLIndex::InMemoryURLIndex() : history_item_count_(0) {} +InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) + : history_dir_(history_dir), + history_item_count_(0) { +} -InMemoryURLIndex::~InMemoryURLIndex() {} +// Called only by unit tests. +InMemoryURLIndex::InMemoryURLIndex() + : history_item_count_(0) { +} -static const int32_t kURLToLowerBufferSize = 2048; +InMemoryURLIndex::~InMemoryURLIndex() {} // Indexing @@ -62,27 +94,12 @@ bool InMemoryURLIndex::Init(history::URLDatabase* history_db, const std::string& languages) { // TODO(mrossetti): Register for profile/language change notifications. languages_ = languages; - // Reset our indexes. - char_word_map_.clear(); - word_id_history_map_.clear(); - if (!history_db) - return false; - URLDatabase::URLEnumerator history_enum; - if (history_db->InitURLEnumeratorForEverything(&history_enum)) { - URLRow row; - Time recent_threshold = InMemoryURLIndex::RecentThreshold(); - while (history_enum.GetNextURL(&row)) { - // Do some filtering so that we only get history items which could - // possibly pass the HistoryURLProvider::CullPoorMatches filter later. - if ((row.typed_count() > kLowQualityMatchTypedLimit) || - (row.visit_count() > kLowQualityMatchVisitLimit) || - (row.last_visit() >= recent_threshold)) { - if (!IndexRow(row)) - return false; - } - } - } - return true; + return ReloadFromHistory(history_db, false); +} + +void InMemoryURLIndex::ShutDown() { + // Write our cache. + SaveToCacheFile(); } bool InMemoryURLIndex::IndexRow(const URLRow& row) { @@ -102,7 +119,7 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { new_row.set_typed_count(row.typed_count()); new_row.set_last_visit(row.last_visit()); new_row.set_title(row.title()); - history_info_map_.insert(std::make_pair(history_id, new_row)); + history_info_map_[history_id] = new_row; // Split into individual, unique words. String16Set words = WordSetFromString16(url); @@ -118,6 +135,149 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) { return true; } +bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, + bool clear_cache) { + ClearPrivateData(); + + if (!history_db) + return false; + + if (clear_cache || !RestoreFromCacheFile()) { + base::TimeTicks beginning_time = base::TimeTicks::Now(); + // The index has to be built from scratch. + URLDatabase::URLEnumerator history_enum; + if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) + return false; + URLRow row; + while (history_enum.GetNextURL(&row)) { + if (!IndexRow(row)) + return false; + } + UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", + base::TimeTicks::Now() - beginning_time); + SaveToCacheFile(); + } + return true; +} + +void InMemoryURLIndex::ClearPrivateData() { + history_item_count_ = 0; + word_list_.clear(); + word_map_.clear(); + char_word_map_.clear(); + word_id_history_map_.clear(); + term_char_word_set_cache_.clear(); + history_info_map_.clear(); +} + +bool InMemoryURLIndex::RestoreFromCacheFile() { + // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. + // That is: ensure that the database has not been modified since the cache + // was last saved. DB file modification date is inadequate. There are no + // SQLite table checksums automatically stored. + base::TimeTicks beginning_time = base::TimeTicks::Now(); + FilePath file_path; + if (!GetCacheFilePath(&file_path)) + return false; + std::string data; + if (!file_util::ReadFileToString(file_path, &data)) { + LOG(WARNING) << "Failed to read InMemoryURLIndex cache from " + << file_path.value(); + return false; + } + + InMemoryURLIndexCacheItem index_cache; + if (!index_cache.ParseFromArray(data.c_str(), data.size())) { + LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " + << file_path.value(); + return false; + } + + if (!RestorePrivateData(index_cache)) { + ClearPrivateData(); // Back to square one -- must build from scratch. + return false; + } + + UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", + base::TimeTicks::Now() - beginning_time); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); + UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); + return true; +} + +bool InMemoryURLIndex::SaveToCacheFile() { + base::TimeTicks beginning_time = base::TimeTicks::Now(); + InMemoryURLIndexCacheItem index_cache; + SavePrivateData(&index_cache); + std::string data; + if (!index_cache.SerializeToString(&data)) { + LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; + return false; + } + + // Write the cache to a file then swap it for the old cache, if any, and + // delete the old cache. + FilePath file_path; + if (!GetCacheFilePath(&file_path)) + return false; + file_util::ScopedFILE file(file_util::OpenFile(file_path, "w")); + if (!file.get()) + return false; + + int size = data.size(); + if (file_util::WriteFile(file_path, data.c_str(), size) != size) { + LOG(WARNING) << "Failed to write " << file_path.value(); + return false; + } + UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", + base::TimeTicks::Now() - beginning_time); + return true; +} + +void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { + // The row may or may not already be in our index. If it is not already + // indexed and it qualifies then it gets indexed. If it is already + // indexed and still qualifies then it gets updated, otherwise it + // is deleted from the index. + HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); + if (row_pos == history_info_map_.end()) { + // This new row should be indexed if it qualifies. + if (RowQualifiesAsSignificant(row, base::Time())) + IndexRow(row); + } else if (RowQualifiesAsSignificant(row, base::Time())) { + // This indexed row still qualifies and will be re-indexed. + // The url won't have changed but the title, visit count, etc. + // might have changed. + URLRow& old_row = row_pos->second; + old_row.set_visit_count(row.visit_count()); + old_row.set_typed_count(row.typed_count()); + old_row.set_last_visit(row.last_visit()); + // TODO(mrossetti): When we start indexing the title the next line + // will need attention. + old_row.set_title(row.title()); + } else { + // This indexed row no longer qualifies and will be de-indexed. + history_info_map_.erase(row_id); + } + // This invalidates the cache. + term_char_word_set_cache_.clear(); + // TODO(mrossetti): Record this transaction in the cache. +} + +void InMemoryURLIndex::DeleteURL(URLID row_id) { + // Note that this does not remove any reference to this row from the + // word_id_history_map_. That map will continue to contain (and return) + // hits against this row until that map is rebuilt, but since the + // history_info_map_ no longer references the row no erroneous results + // will propagate to the user. + history_info_map_.erase(row_id); + // This invalidates the word cache. + term_char_word_set_cache_.clear(); + // TODO(mrossetti): Record this transaction in the cache. +} + // Searching ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( @@ -155,7 +315,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( term_char_word_set_cache_.erase( std::remove_if(term_char_word_set_cache_.begin(), term_char_word_set_cache_.end(), - std::mem_fun_ref(&TermCharWordSet::IsNotUsed)), + std::mem_fun_ref(&TermCharWordSet::is_not_used)), term_char_word_set_cache_.end()); return scored_items; } @@ -206,9 +366,10 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( const string16& uni_word) { InMemoryURLIndex::HistoryIDSet history_id_set; - // For each character in the word, get the char/word_id map entry and - // intersect with the set in an incremental manner. - Char16Set uni_chars = CharactersFromString16(uni_word); + // For each unique character in the word, in order of first appearance, get + // the char/word_id map entry and intersect with the set in an incremental + // manner. + Char16Vector uni_chars = Char16VectorFromString16(uni_word); WordIDSet word_id_set(WordIDSetForTermChars(uni_chars)); // If any words resulted then we can compose a set of history IDs by unioning @@ -246,7 +407,22 @@ InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( } // static -InMemoryURLIndex::Char16Set InMemoryURLIndex::CharactersFromString16( +InMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16( + const string16& uni_word) { + InMemoryURLIndex::Char16Vector characters; + InMemoryURLIndex::Char16Set unique_characters; + for (string16::const_iterator iter = uni_word.begin(); + iter != uni_word.end(); ++iter) { + if (!unique_characters.count(*iter)) { + unique_characters.insert(*iter); + characters.push_back(*iter); + } + } + return characters; +} + +// static +InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( const string16& uni_word) { Char16Set characters; for (string16::const_iterator iter = uni_word.begin(); @@ -277,17 +453,17 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word, HistoryID history_id) { word_list_.push_back(uni_word); WordID word_id = word_list_.size() - 1; - word_map_.insert(std::make_pair(uni_word, word_id)); + word_map_[uni_word] = word_id; HistoryIDSet history_id_set; history_id_set.insert(history_id); - word_id_history_map_.insert(std::make_pair(word_id, history_id_set)); + word_id_history_map_[word_id] = history_id_set; // For each character in the newly added word (i.e. a word that is not // already in the word index), add the word to the character index. - Char16Set characters = CharactersFromString16(uni_word); + Char16Set characters = Char16SetFromString16(uni_word); for (Char16Set::iterator uni_char_iter = characters.begin(); uni_char_iter != characters.end(); ++uni_char_iter) { - Char16Set::value_type uni_string = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_string); + Char16Set::value_type uni_char = *uni_char_iter; + CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); if (char_iter != char_word_map_.end()) { // Update existing entry in the char/word index. WordIDSet& word_id_set(char_iter->second); @@ -296,89 +472,79 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word, // Create a new entry in the char/word index. WordIDSet word_id_set; word_id_set.insert(word_id); - char_word_map_.insert(std::make_pair(uni_string, word_id_set)); + char_word_map_[uni_char] = word_id_set; } } } InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( - InMemoryURLIndex::Char16Set const& uni_chars) { - TermCharWordSet* best_term_char_word_set = NULL; - bool set_found = false; - size_t outstanding_count = 0; - Char16Set outstanding_chars; - for (TermCharWordSetVector::iterator iter = term_char_word_set_cache_.begin(); - iter != term_char_word_set_cache_.end(); ++iter) { - TermCharWordSetVector::value_type& term_char_word_set(*iter); - Char16Set& char_set(term_char_word_set.char_set_); - Char16Set exclusions; - std::set_difference(char_set.begin(), char_set.end(), - uni_chars.begin(), uni_chars.end(), - std::inserter(exclusions, exclusions.begin())); - if (exclusions.empty()) { - // Do a reverse difference so that we know which characters remain - // to be indexed. Then decide if this is a better match than any - // previous cached set. - std::set_difference(uni_chars.begin(), uni_chars.end(), - char_set.begin(), char_set.end(), - std::inserter(exclusions, exclusions.begin())); - if (!set_found || exclusions.size() < outstanding_count) { - outstanding_chars = exclusions; - best_term_char_word_set = &*iter; - outstanding_count = exclusions.size(); - set_found = true; - } - } - } - + const InMemoryURLIndex::Char16Vector& uni_chars) { + size_t index = CachedResultsIndexForTerm(uni_chars); + + // If there were no unprocessed characters in the search term |uni_chars| + // then we can use the cached one as-is as the results with no further + // filtering. + if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1) + return term_char_word_set_cache_[index].word_id_set_; + + // Some or all of the characters remain to be indexed so trim the cache. + if (index + 1 < term_char_word_set_cache_.size()) + term_char_word_set_cache_.resize(index + 1); WordIDSet word_id_set; - if (set_found && outstanding_count == 0) { - // If there were no oustanding characters then we can use the cached one. - best_term_char_word_set->used_ = true; - word_id_set = best_term_char_word_set->word_id_set_; - } else { - // Some or all of the characters remain to be indexed. - bool first_character = true; - if (set_found) { - first_character = false; - word_id_set = best_term_char_word_set->word_id_set_; - } else { - outstanding_chars = uni_chars; + // Take advantage of our cached starting point, if any. + Char16Vector::const_iterator c_iter = uni_chars.begin(); + if (index != kNoCachedResultForTerm) { + word_id_set = term_char_word_set_cache_[index].word_id_set_; + c_iter += index + 1; + } + // Now process the remaining characters in the search term. + for (; c_iter != uni_chars.end(); ++c_iter) { + Char16Vector::value_type uni_char = *c_iter; + CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); + if (char_iter == char_word_map_.end()) { + // A character was not found so there are no matching results: bail. + word_id_set.clear(); + break; } - - for (Char16Set::iterator uni_char_iter = outstanding_chars.begin(); - uni_char_iter != outstanding_chars.end(); ++uni_char_iter) { - Char16Set::value_type uni_char = *uni_char_iter; - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); - if (char_iter == char_word_map_.end()) { - // The character was not found so bail. - word_id_set.clear(); - break; - } - WordIDSet& char_word_id_set(char_iter->second); - if (first_character) { - word_id_set = char_word_id_set; - first_character = false; - } else { - WordIDSet old_word_id_set(word_id_set); - word_id_set.clear(); - std::set_intersection(old_word_id_set.begin(), - old_word_id_set.end(), - char_word_id_set.begin(), - char_word_id_set.end(), - std::inserter(word_id_set, - word_id_set.begin())); - } + WordIDSet& char_word_id_set(char_iter->second); + // It is possible for there to no longer be any words associated with + // a particular character. Give up in that case. + if (char_word_id_set.empty()) { + word_id_set.clear(); + break; } - // Update the cache. - if (!set_found || outstanding_count) { - term_char_word_set_cache_.push_back(TermCharWordSet(uni_chars, - word_id_set, true)); + + if (word_id_set.empty()) { + word_id_set = char_word_id_set; + } else { + WordIDSet old_word_id_set(word_id_set); + word_id_set.clear(); + std::set_intersection(old_word_id_set.begin(), + old_word_id_set.end(), + char_word_id_set.begin(), + char_word_id_set.end(), + std::inserter(word_id_set, + word_id_set.begin())); } + // Add this new char/set instance to the cache. + term_char_word_set_cache_.push_back(TermCharWordSet( + uni_char, word_id_set, true)); } return word_id_set; } +size_t InMemoryURLIndex::CachedResultsIndexForTerm( + const InMemoryURLIndex::Char16Vector& uni_chars) { + TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin(); + for (Char16Vector::const_iterator c_iter = uni_chars.begin(); + (c_iter != uni_chars.end()) && + (t_iter != term_char_word_set_cache_.end()) && + (*c_iter == t_iter->char_); + ++c_iter, ++t_iter) + t_iter->used_ = true; // Update the cache. + return t_iter - term_char_word_set_cache_.begin() - 1; +} + // static int InMemoryURLIndex::RawScoreForURL(const URLRow& row, const String16Vector& terms, @@ -477,8 +643,9 @@ int InMemoryURLIndex::RawScoreForURL(const URLRow& row, } // static -Time InMemoryURLIndex::RecentThreshold() { - return Time::Now() - TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays); +base::Time InMemoryURLIndex::RecentThreshold() { + return base::Time::Now() - + base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays); } InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( @@ -494,6 +661,9 @@ void InMemoryURLIndex::AddHistoryMatch::operator()( const InMemoryURLIndex::HistoryID history_id) { HistoryInfoMap::const_iterator hist_pos = index_.history_info_map_.find(history_id); + // Note that a history_id may be present in the word_id_history_map_ yet not + // be found in the history_info_map_. This occurs when an item has been + // deleted by the user or the item no longer qualifies as a quick result. if (hist_pos != index_.history_info_map_.end()) { const URLRow& hist_item = hist_pos->second; // TODO(mrossetti): Accommodate multiple term highlighting. @@ -530,4 +700,227 @@ void InMemoryURLIndex::AddHistoryMatch::operator()( } } +bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { + if (history_dir_.empty()) + return false; + *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); + return true; +} + +void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { + DCHECK(cache); + cache->set_timestamp(base::Time::Now().ToInternalValue()); + cache->set_history_item_count(history_item_count_); + SaveWordList(cache); + SaveWordMap(cache); + SaveCharWordMap(cache); + SaveWordIDHistoryMap(cache); + SaveHistoryInfoMap(cache); +} + +bool InMemoryURLIndex::RestorePrivateData( + const InMemoryURLIndexCacheItem& cache) { + last_saved_ = base::Time::FromInternalValue(cache.timestamp()); + history_item_count_ = cache.history_item_count(); + return (history_item_count_ == 0) || (RestoreWordList(cache) && + RestoreWordMap(cache) && RestoreCharWordMap(cache) && + RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); +} + + +void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { + if (word_list_.empty()) + return; + WordListItem* list_item = cache->mutable_word_list(); + list_item->set_word_count(word_list_.size()); + for (String16Vector::const_iterator iter = word_list_.begin(); + iter != word_list_.end(); ++iter) + list_item->add_word(UTF16ToUTF8(*iter)); +} + +bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { + if (!cache.has_word_list()) + return false; + const WordListItem& list_item(cache.word_list()); + uint32 expected_item_count = list_item.word_count(); + uint32 actual_item_count = list_item.word_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + const RepeatedPtrField<std::string>& words(list_item.word()); + for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); + iter != words.end(); ++iter) + word_list_.push_back(UTF8ToUTF16(*iter)); + return true; +} + +void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { + if (word_map_.empty()) + return; + WordMapItem* map_item = cache->mutable_word_map(); + map_item->set_item_count(word_map_.size()); + for (WordMap::const_iterator iter = word_map_.begin(); + iter != word_map_.end(); ++iter) { + WordMapEntry* map_entry = map_item->add_word_map_entry(); + map_entry->set_word(UTF16ToUTF8(iter->first)); + map_entry->set_word_id(iter->second); + } +} + +bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { + if (!cache.has_word_map()) + return false; + const WordMapItem& list_item(cache.word_map()); + uint32 expected_item_count = list_item.item_count(); + uint32 actual_item_count = list_item.word_map_entry_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); + for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); + iter != entries.end(); ++iter) + word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); + return true; +} + +void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { + if (char_word_map_.empty()) + return; + CharWordMapItem* map_item = cache->mutable_char_word_map(); + map_item->set_item_count(char_word_map_.size()); + for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); + iter != char_word_map_.end(); ++iter) { + CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); + map_entry->set_char_16(iter->first); + const WordIDSet& word_id_set(iter->second); + map_entry->set_item_count(word_id_set.size()); + for (WordIDSet::const_iterator set_iter = word_id_set.begin(); + set_iter != word_id_set.end(); ++set_iter) + map_entry->add_word_id(*set_iter); + } +} + +bool InMemoryURLIndex::RestoreCharWordMap( + const InMemoryURLIndexCacheItem& cache) { + if (!cache.has_char_word_map()) + return false; + const CharWordMapItem& list_item(cache.char_word_map()); + uint32 expected_item_count = list_item.item_count(); + uint32 actual_item_count = list_item.char_word_map_entry_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + const RepeatedPtrField<CharWordMapEntry>& + entries(list_item.char_word_map_entry()); + for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = + entries.begin(); iter != entries.end(); ++iter) { + expected_item_count = iter->item_count(); + actual_item_count = iter->word_id_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + char16 uni_char = static_cast<char16>(iter->char_16()); + WordIDSet word_id_set; + const RepeatedField<int32>& word_ids(iter->word_id()); + for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); + jiter != word_ids.end(); ++jiter) + word_id_set.insert(*jiter); + char_word_map_[uni_char] = word_id_set; + } + return true; +} + +void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) + const { + if (word_id_history_map_.empty()) + return; + WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); + map_item->set_item_count(word_id_history_map_.size()); + for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); + iter != word_id_history_map_.end(); ++iter) { + WordIDHistoryMapEntry* map_entry = + map_item->add_word_id_history_map_entry(); + map_entry->set_word_id(iter->first); + const HistoryIDSet& history_id_set(iter->second); + map_entry->set_item_count(history_id_set.size()); + for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); + set_iter != history_id_set.end(); ++set_iter) + map_entry->add_history_id(*set_iter); + } +} + +bool InMemoryURLIndex::RestoreWordIDHistoryMap( + const InMemoryURLIndexCacheItem& cache) { + if (!cache.has_word_id_history_map()) + return false; + const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); + uint32 expected_item_count = list_item.item_count(); + uint32 actual_item_count = list_item.word_id_history_map_entry_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + const RepeatedPtrField<WordIDHistoryMapEntry>& + entries(list_item.word_id_history_map_entry()); + for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = + entries.begin(); iter != entries.end(); ++iter) { + expected_item_count = iter->item_count(); + actual_item_count = iter->history_id_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + WordID word_id = iter->word_id(); + HistoryIDSet history_id_set; + const RepeatedField<int64>& history_ids(iter->history_id()); + for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); + jiter != history_ids.end(); ++jiter) + history_id_set.insert(*jiter); + word_id_history_map_[word_id] = history_id_set; + } + return true; +} + +void InMemoryURLIndex::SaveHistoryInfoMap( + InMemoryURLIndexCacheItem* cache) const { + if (history_info_map_.empty()) + return; + HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); + map_item->set_item_count(history_info_map_.size()); + for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); + iter != history_info_map_.end(); ++iter) { + HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); + map_entry->set_history_id(iter->first); + const URLRow& url_row(iter->second); + // Note: We only save information that contributes to the index so there + // is no need to save term_char_word_set_cache_ (not persistent), + // languages_, etc. + map_entry->set_visit_count(url_row.visit_count()); + map_entry->set_typed_count(url_row.typed_count()); + map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); + map_entry->set_url(url_row.url().spec()); + map_entry->set_title(UTF16ToUTF8(url_row.title())); + } +} + +bool InMemoryURLIndex::RestoreHistoryInfoMap( + const InMemoryURLIndexCacheItem& cache) { + if (!cache.has_history_info_map()) + return false; + const HistoryInfoMapItem& list_item(cache.history_info_map()); + uint32 expected_item_count = list_item.item_count(); + uint32 actual_item_count = list_item.history_info_map_entry_size(); + if (actual_item_count == 0 || actual_item_count != expected_item_count) + return false; + const RepeatedPtrField<HistoryInfoMapEntry>& + entries(list_item.history_info_map_entry()); + for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = + entries.begin(); iter != entries.end(); ++iter) { + HistoryID history_id = iter->history_id(); + GURL url(iter->url()); + URLRow url_row(url, history_id); + url_row.set_visit_count(iter->visit_count()); + url_row.set_typed_count(iter->typed_count()); + url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); + if (iter->has_title()) { + string16 title(UTF8ToUTF16(iter->title())); + url_row.set_title(title); + } + history_info_map_[history_id] = url_row; + } + return true; +} + } // namespace history diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index 81336f1..bd84188 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,19 +14,30 @@ #include "app/sql/connection.h" #include "base/basictypes.h" +#include "base/file_path.h" +#include "base/gtest_prod_util.h" #include "base/linked_ptr.h" #include "base/scoped_ptr.h" #include "base/string16.h" #include "chrome/browser/autocomplete/history_provider_util.h" #include "chrome/browser/history/history_types.h" +#include "chrome/browser/history/in_memory_url_index_cache.pb.h" #include "testing/gtest/include/gtest/gtest_prod.h" +class Profile; + namespace base { class Time; } +namespace in_memory_url_index { +class InMemoryURLIndexCacheItem; +} + namespace history { +namespace imui = in_memory_url_index; + class URLDatabase; // Used for intermediate history result operations. @@ -67,22 +78,39 @@ typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; // multi-char16 instance. class InMemoryURLIndex { public: - InMemoryURLIndex(); + // |history_dir| is a path to the directory containing the history database + // within the profile wherein the cache and transaction journals will be + // stored. + explicit InMemoryURLIndex(const FilePath& history_dir); ~InMemoryURLIndex(); // Convenience types typedef std::vector<string16> String16Vector; - // Open and index the URL history database. + // Opens and indexes the URL history database. // |languages| gives a list of language encodings with which the history // URLs and omnibox searches are interpreted, i.e. when each is broken // down into words and each word is broken down into characters. bool Init(URLDatabase* history_db, const std::string& languages); - // Reset the history index. - void Reset(); + // Reloads the history index. Attempts to reload from the cache unless + // |clear_cache| is true. If the cache is unavailable then reload the + // index from |history_db|. + bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache); - // Given a vector containing one or more words as string16s, scan the + // Signals that any outstanding initialization should be canceled and + // flushes the cache to disk. + void ShutDown(); + + // Restores the index's private data from the cache file stored in the + // profile directory and returns true if successful. + bool RestoreFromCacheFile(); + + // Caches the index private data and writes the cache file to the profile + // directory. + bool SaveToCacheFile(); + + // Given a vector containing one or more words as string16s, scans the // history index and return a vector with all scored, matching history items. // Each term must occur somewhere in the history item for the item to // qualify; however, the terms do not necessarily have to be adjacent. @@ -92,17 +120,36 @@ class InMemoryURLIndex { // Returns the date threshold for considering an history item as significant. static base::Time RecentThreshold(); + // Updates or adds an history item to the index if it meets the minimum + // 'quick' criteria. + void UpdateURL(URLID row_id, const URLRow& row); + + // Deletes indexing data for an history item. The item may not have actually + // been indexed (which is the case if it did not previously meet minimum + // 'quick' criteria). + void DeleteURL(URLID row_id); + private: friend class AddHistoryMatch; - friend class InMemoryURLIndexTest; - FRIEND_TEST(InMemoryURLIndexTest, Initialization); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Initialization); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); + + // Signals that there are no previously cached results for the typed term. + static const size_t kNoCachedResultForTerm; + + // Creating one of me without a history path is not allowed (tests excepted). + InMemoryURLIndex(); // Convenience types. typedef std::set<string16> String16Set; typedef std::set<char16> Char16Set; + typedef std::vector<char16> Char16Vector; // An index into list of all of the words we have indexed. - typedef int16 WordID; + typedef int WordID; // A map allowing a WordID to be determined given a word. typedef std::map<string16, WordID> WordMap; @@ -118,8 +165,16 @@ class InMemoryURLIndex { typedef std::set<HistoryID> HistoryIDSet; typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap; - // Support caching of term character intersections so that we can optimize - // searches which build upon a previous search. + // Support caching of term character results so that we can optimize + // searches which build upon a previous search. Each entry in this vector + // represents a progressive reduction of the result set for each unique + // character found in the search term, with each character being taken as + // initially encountered. For example, once the results for the search + // term of "mextexarkana" have been fully determined, this vector will + // contain one entry for the characters: m, e, x, t, a, r, k, & n, in + // that order. The result sets will naturally shrink in size for each + // subsequent character as the sets intersections are taken in an + // incremental manner. struct TermCharWordSet; typedef std::vector<TermCharWordSet> TermCharWordSetVector; @@ -148,20 +203,37 @@ class InMemoryURLIndex { const String16Vector& lower_terms_; }; - // Break a string down into individual words. + // Initializes all index data members in preparation for restoring the index + // from the cache or a complete rebuild from the history database. + void ClearPrivateData(); + + // Breaks a string down into individual words. static String16Set WordSetFromString16(const string16& uni_string); - // Given a set of Char16s, find words containing those characters. If any - // existing, cached set is a proper subset then start with that cached - // set. Update the cache. - WordIDSet WordIDSetForTermChars(const Char16Set& uni_chars); + // Given a vector of Char16s, representing the characters the user has typed + // into the omnibox, finds words containing those characters. If any + // existing, cached set is a proper subset then starts with that cached + // set. Updates the previously-typed-character cache. + WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars); + + // Given a vector of Char16s in |uni_chars|, compare those characters, in + // order, with the previously searched term, returning the index of the + // cached results in the term_char_word_set_cache_ of the set with best + // matching number of characters. Returns kNoCachedResultForTerm if there + // was no match at all (i.e. the first character of |uni-chars| is not the + // same as the character of the first entry in term_char_word_set_cache_). + size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars); // URL History indexing support functions. - // Index one URL history item. + // Indexes one URL history item. bool IndexRow(const URLRow& row); - // Break a string down into its individual characters. + // Breaks a string down into unique, individual characters in the order + // in which the characters are first encountered in the |uni_word| string. + static Char16Vector Char16VectorFromString16(const string16& uni_word); + + // Breaks the |uni_word| string down into its individual characters. // Note that this is temporarily intended to work on a single word, but // _will_ work on a string of words, perhaps with unexpected results. // TODO(mrossetti): Lots of optimizations possible here for not restarting @@ -169,26 +241,26 @@ class InMemoryURLIndex { // and properly handle substring matches, scoring and sorting the results // by score. Also, provide the metrics for where the matches occur so that // the UI can highlight the matched sections. - Char16Set CharactersFromString16(const string16& uni_word); + static Char16Set Char16SetFromString16(const string16& uni_word); - // Given a single word, add a reference to the containing history item - // to the index. + // Given a single word in |uni_word|, adds a reference for the containing + // history item identified by |history_id| to the index. void AddWordToIndex(const string16& uni_word, HistoryID history_id); - // Update an existing entry in the word/history index by adding the + // Updates an existing entry in the word/history index by adding the // |history_id| to set for |word_id| in the word_id_history_map_. void UpdateWordHistory(WordID word_id, HistoryID history_id); - // Create a new entry in the word/history map for |word_id| and add + // Creates a new entry in the word/history map for |word_id| and add // |history_id| as the initial element of the word's set. void AddWordHistory(const string16& uni_word, HistoryID history_id); - // Clear the search term cache. This cache holds on to the intermediate + // Clears the search term cache. This cache holds on to the intermediate // word results for each previously typed character to eliminate the need // to re-perform set operations for previously typed characters. void ResetTermCharWordSetCache(); - // Compose a set of history item IDs by intersecting the set for each word + // Composes a set of history item IDs by intersecting the set for each word // in |uni_string|. HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); @@ -196,7 +268,7 @@ class InMemoryURLIndex { // ids for the given term given in |uni_word|. HistoryIDSet HistoryIDsForTerm(const string16& uni_word); - // Calculate a raw score for this history item by first determining + // Calculates a raw score for this history item by first determining // if all of the terms in |terms_vector| occur in |row| and, if so, // calculating a raw score based on 1) starting position of each term // in the user input, 2) completeness of each term's match, 3) ordering @@ -210,15 +282,53 @@ class InMemoryURLIndex { const String16Vector& terms_vector, size_t* first_term_location); + // Utility functions supporting RestoreFromCache and SaveToCache. + + // Construct a file path for the cache file within the same directory where + // the history database is kept and saves that path to |file_path|. Returns + // true if |file_path| can be successfully constructed. (This function + // provided as a hook for unit testing.) + bool GetCacheFilePath(FilePath* file_path); + + // Encode a data structure into the protobuf |cache|. + void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; + void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; + void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; + void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; + void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; + void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; + + // Decode a data structure from the protobuf |cache|. Return false if there + // is any kind of failure. + bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); + bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); + bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); + bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); + bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); + bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); + + // Directory where cache file resides. This is, except when unit testing, + // the same directory in which the profile's history database is found. It + // should never be empty. + FilePath history_dir_; + + // The timestamp of when the cache was last saved. This is used to validate + // the transaction journal's applicability to the cache. The timestamp is + // initialized to the NULL time, indicating that the cache was not used with + // the InMemoryURLIndex was last populated. + base::Time last_saved_; + // A list of all of indexed words. The index of a word in this list is the // ID of the word in the word_map_. It reduces the memory overhead by // replacing a potentially long and repeated string with a simple index. // NOTE: A word will _never_ be removed from this vector thus insuring // the immutability of the word_id throughout the session, reducing // maintenance complexity. + // TODO(mrossetti): Profile the vector allocation and determine if judicious + // 'reserve' calls are called for. String16Vector word_list_; - uint64 history_item_count_; + int history_item_count_; WordMap word_map_; CharWordIDMap char_word_map_; WordIDHistoryMap word_id_history_map_; diff --git a/chrome/browser/history/in_memory_url_index_cache.proto b/chrome/browser/history/in_memory_url_index_cache.proto new file mode 100644 index 0000000..22e50ad --- /dev/null +++ b/chrome/browser/history/in_memory_url_index_cache.proto @@ -0,0 +1,80 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// InMemoryURLIndex caching protocol buffers. +// +// At certain times during browser operation, the indexes from the +// InMemoryURLIndex are written to a disk-based cache using the +// following protobuf description. + +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package in_memory_url_index; + +message InMemoryURLIndexCacheItem { + + message WordListItem { + required uint32 word_count = 1; + repeated string word = 2; + } + + message WordMapItem { + message WordMapEntry { + required string word = 1; + required int32 word_id = 2; + } + + required uint32 item_count = 1; + repeated WordMapEntry word_map_entry = 2; + } + + message CharWordMapItem { + message CharWordMapEntry { + required uint32 item_count = 1; + required int32 char_16 = 2; + repeated int32 word_id = 3 [packed=true]; + } + + required uint32 item_count = 1; + repeated CharWordMapEntry char_word_map_entry = 2; + } + + message WordIDHistoryMapItem { + message WordIDHistoryMapEntry { + required uint32 item_count = 1; + required int32 word_id = 2; + repeated int64 history_id = 3 [packed=true]; + } + + required uint32 item_count = 1; + repeated WordIDHistoryMapEntry word_id_history_map_entry = 2; + } + + message HistoryInfoMapItem { + message HistoryInfoMapEntry { + required int64 history_id = 1; + required int32 visit_count = 2; + required int32 typed_count = 3; + required int64 last_visit = 4; + required string url = 5; + optional string title = 6; + } + + required uint32 item_count = 1; + repeated HistoryInfoMapEntry history_info_map_entry = 2; + } + + // The timestamp of the cache, used to validate against an + // accompanying transaction journal. + required int64 timestamp = 1; + required int32 history_item_count = 2; + + optional WordListItem word_list = 3; + optional WordMapItem word_map = 4; + optional CharWordMapItem char_word_map = 5; + optional WordIDHistoryMapItem word_id_history_map = 6; + optional HistoryInfoMapItem history_info_map = 7; +} diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index 9306ec5..d495db9 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -11,6 +11,7 @@ #include "app/sql/connection.h" #include "app/sql/statement.h" #include "app/sql/transaction.h" +#include "base/file_path.h" #include "base/file_util.h" #include "base/path_service.h" #include "base/scoped_ptr.h" @@ -32,9 +33,6 @@ // Note that only lines whose first character is an upper-case letter are // processed when creating the test database. -using base::Time; -using base::TimeDelta; - namespace history { class InMemoryURLIndexTest : public testing::Test, @@ -83,15 +81,15 @@ class InMemoryURLIndexTest : public testing::Test, sql::Statement statement(db.GetUniqueStatement( "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;")); EXPECT_TRUE(statement); - Time time_right_now = Time::NowFromSystemTime(); - TimeDelta day_delta = TimeDelta::FromDays(1); + base::Time time_right_now = base::Time::NowFromSystemTime(); + base::TimeDelta day_delta = base::TimeDelta::FromDays(1); { sql::Transaction transaction(&db); transaction.Begin(); while (statement.Step()) { URLRow row; FillURLRow(statement, &row); - Time last_visit = time_right_now; + base::Time last_visit = time_right_now; for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i) last_visit -= day_delta; row.set_last_visit(last_visit); @@ -105,7 +103,7 @@ class InMemoryURLIndexTest : public testing::Test, }; TEST_F(InMemoryURLIndexTest, Construction) { - url_index_.reset(new InMemoryURLIndex); + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); EXPECT_TRUE(url_index_.get()); } @@ -117,68 +115,261 @@ TEST_F(InMemoryURLIndexTest, Initialization) { EXPECT_TRUE(statement); uint64 row_count = 0; while (statement.Step()) ++row_count; - EXPECT_EQ(33U, row_count); + EXPECT_EQ(row_count, 33U); url_index_.reset(new InMemoryURLIndex); url_index_->Init(this, "en,ja,hi,zh"); - - // There should have been 27 of the 31 urls accepted during filtering. - EXPECT_EQ(28U, url_index_->history_item_count_); + EXPECT_EQ(url_index_->history_item_count_, 28); // history_info_map_ should have the same number of items as were filtered. - EXPECT_EQ(28U, url_index_->history_info_map_.size()); - - // The resulting indexes should account for: - // 37 characters - // 88 words - EXPECT_EQ(37U, url_index_->char_word_map_.size()); - EXPECT_EQ(91U, url_index_->word_map_.size()); + EXPECT_EQ(url_index_->history_info_map_.size(), 28U); + EXPECT_EQ(url_index_->char_word_map_.size(), 37U); + EXPECT_EQ(url_index_->word_map_.size(), 91U); } TEST_F(InMemoryURLIndexTest, Retrieval) { - url_index_.reset(new InMemoryURLIndex); - std::string languages("en,ja,hi,zh"); - url_index_->Init(this, languages); + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_->Init(this, "en,ja,hi,zh"); InMemoryURLIndex::String16Vector terms; // The term will be lowercased by the search. // See if a very specific term gives a single result. - string16 term = ASCIIToUTF16("DrudgeReport"); - terms.push_back(term); - ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); - EXPECT_EQ(1U, matches.size()); + terms.push_back(ASCIIToUTF16("DrudgeReport")); + EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); // Search which should result in multiple results. terms.clear(); - term = ASCIIToUTF16("drudge"); - terms.push_back(term); - matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(2U, matches.size()); + terms.push_back(ASCIIToUTF16("drudge")); + ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); + ASSERT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 2U); // The results should be in descending score order. EXPECT_GT(matches[0].raw_score, matches[1].raw_score); // Search which should result in nearly perfect result. terms.clear(); - term = ASCIIToUTF16("http"); - terms.push_back(term); - term = ASCIIToUTF16("NearlyPerfectResult"); - terms.push_back(term); + terms.push_back(ASCIIToUTF16("http")); + terms.push_back(ASCIIToUTF16("NearlyPerfectResult")); matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(1U, matches.size()); + ASSERT_EQ(matches.size(), 1U); // The results should have a very high score. EXPECT_GT(matches[0].raw_score, 900); // Search which should result in very poor result. terms.clear(); - term = ASCIIToUTF16("z"); - terms.push_back(term); - term = ASCIIToUTF16("y"); - terms.push_back(term); - term = ASCIIToUTF16("x"); - terms.push_back(term); + terms.push_back(ASCIIToUTF16("z")); + terms.push_back(ASCIIToUTF16("y")); + terms.push_back(ASCIIToUTF16("x")); matches = url_index_->HistoryItemsForTerms(terms); - ASSERT_EQ(1U, matches.size()); + ASSERT_EQ(matches.size(), 1U); // The results should have a poor score. EXPECT_LT(matches[0].raw_score, 200); } +TEST_F(InMemoryURLIndexTest, Char16Utilities) { + string16 term = ASCIIToUTF16("drudgereport"); + string16 expected = ASCIIToUTF16("drugepot"); + EXPECT_EQ(expected.size(), + InMemoryURLIndex::Char16SetFromString16(term).size()); + InMemoryURLIndex::Char16Vector c_vector = + InMemoryURLIndex::Char16VectorFromString16(term); + ASSERT_EQ(expected.size(), c_vector.size()); + + InMemoryURLIndex::Char16Vector::const_iterator c_iter = c_vector.begin(); + for (string16::const_iterator s_iter = expected.begin(); + s_iter != expected.end(); ++s_iter, ++c_iter) + EXPECT_EQ(*s_iter, *c_iter); +} + +TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) { + // Verify that match results for previously typed characters are retained + // (in the term_char_word_set_cache_) and reused, if possible, in future + // autocompletes. + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_->Init(this, "en,ja,hi,zh"); + + // Verify that we can find something that already exists. + InMemoryURLIndex::String16Vector terms; + string16 term = ASCIIToUTF16("drudgerepo"); + terms.push_back(term); + EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); + + { + // Exercise the term matching cache with the same term. + InMemoryURLIndex::Char16Vector uni_chars = + InMemoryURLIndex::Char16VectorFromString16(term); + EXPECT_EQ(uni_chars.size(), 7U); // Equivalent to 'degopru' + EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U); + } + + { + // Back off a character. + InMemoryURLIndex::Char16Vector uni_chars = + InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("drudgerep")); + EXPECT_EQ(uni_chars.size(), 6U); // Equivalent to 'degpru' + EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 5U); + } + + { + // Add a couple of characters. + InMemoryURLIndex::Char16Vector uni_chars = + InMemoryURLIndex::Char16VectorFromString16( + ASCIIToUTF16("drudgereporta")); + EXPECT_EQ(uni_chars.size(), 9U); // Equivalent to 'adegoprtu' + EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U); + } + + { + // Use different string. + InMemoryURLIndex::Char16Vector uni_chars = + InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("abcde")); + EXPECT_EQ(uni_chars.size(), 5U); + EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), + static_cast<size_t>(-1)); + } +} + +TEST_F(InMemoryURLIndexTest, AddNewRows) { + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_->Init(this, "en,ja,hi,zh"); + InMemoryURLIndex::String16Vector terms; + + // Verify that the row we're going to add does not already exist. + URLID new_row_id = 87654321; + // Newly created URLRows get a last_visit time of 'right now' so it should + // qualify as a quick result candidate. + terms.push_back(ASCIIToUTF16("brokeandalone")); + EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); + + // Add a new row. + URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id); + new_row.set_last_visit(base::Time::Now()); + url_index_->UpdateURL(new_row_id, new_row); + + // Verify that we can retrieve it. + EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); + + // Add it again just to be sure that is harmless. + url_index_->UpdateURL(new_row_id, new_row); + EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U); +} + +TEST_F(InMemoryURLIndexTest, DeleteRows) { + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + url_index_->Init(this, "en,ja,hi,zh"); + InMemoryURLIndex::String16Vector terms; + + // Make sure we actually get an existing result. + terms.push_back(ASCIIToUTF16("DrudgeReport")); + ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms); + EXPECT_EQ(matches.size(), 1U); + + // Determine the row id for that result, delete that id, then search again. + url_index_->DeleteURL(matches[0].url_info.id()); + EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty()); +} + +TEST_F(InMemoryURLIndexTest, CacheFilePath) { + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL( + "/flammmy/frammy/")))); + FilePath full_file_path; + url_index_->GetCacheFilePath(&full_file_path); + std::vector<FilePath::StringType> expected_parts; + FilePath(FILE_PATH_LITERAL("/flammmy/frammy/History Provider Cache")). + GetComponents(&expected_parts); + std::vector<FilePath::StringType> actual_parts; + full_file_path.GetComponents(&actual_parts); + ASSERT_EQ(expected_parts.size(), actual_parts.size()); + size_t count = expected_parts.size(); + for (size_t i = 0; i < count; ++i) + EXPECT_EQ(expected_parts[i], actual_parts[i]); +} + +TEST_F(InMemoryURLIndexTest, CacheSaveRestore) { + // Save the cache to a protobuf, restore it, and compare the results. + url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")))); + InMemoryURLIndex& url_index(*(url_index_.get())); + url_index.Init(this, "en,ja,hi,zh"); + in_memory_url_index::InMemoryURLIndexCacheItem index_cache; + url_index.SavePrivateData(&index_cache); + + // Capture our private data so we can later compare for equality. + int history_item_count(url_index.history_item_count_); + InMemoryURLIndex::String16Vector word_list(url_index.word_list_); + InMemoryURLIndex::WordMap word_map(url_index.word_map_); + InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_); + InMemoryURLIndex::WordIDHistoryMap word_id_history_map( + url_index.word_id_history_map_); + InMemoryURLIndex::HistoryInfoMap history_info_map( + url_index.history_info_map_); + + // Prove that there is really something there. + EXPECT_GT(url_index.history_item_count_, 0); + EXPECT_FALSE(url_index.word_list_.empty()); + EXPECT_FALSE(url_index.word_map_.empty()); + EXPECT_FALSE(url_index.char_word_map_.empty()); + EXPECT_FALSE(url_index.word_id_history_map_.empty()); + EXPECT_FALSE(url_index.history_info_map_.empty()); + + // Clear and then prove it's clear. + url_index.ClearPrivateData(); + EXPECT_EQ(url_index.history_item_count_, 0); + EXPECT_TRUE(url_index.word_list_.empty()); + EXPECT_TRUE(url_index.word_map_.empty()); + EXPECT_TRUE(url_index.char_word_map_.empty()); + EXPECT_TRUE(url_index.word_id_history_map_.empty()); + EXPECT_TRUE(url_index.history_info_map_.empty()); + + // Restore the cache. + EXPECT_TRUE(url_index.RestorePrivateData(index_cache)); + + // Compare the restored and captured for equality. + EXPECT_EQ(history_item_count, url_index.history_item_count_); + EXPECT_EQ(word_list.size(), url_index.word_list_.size()); + EXPECT_EQ(word_map.size(), url_index.word_map_.size()); + EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size()); + EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size()); + EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size()); + // WordList must be index-by-index equal. + size_t count = word_list.size(); + for (size_t i = 0; i < count; ++i) + EXPECT_EQ(word_list[i], url_index.word_list_[i]); + for (InMemoryURLIndex::CharWordIDMap::const_iterator expected = + char_word_map.begin(); expected != char_word_map.end(); ++expected) { + InMemoryURLIndex::CharWordIDMap::const_iterator actual = + url_index.char_word_map_.find(expected->first); + ASSERT_TRUE(url_index.char_word_map_.end() != actual); + const InMemoryURLIndex::WordIDSet& expected_set(expected->second); + const InMemoryURLIndex::WordIDSet& actual_set(actual->second); + ASSERT_EQ(expected_set.size(), actual_set.size()); + for (InMemoryURLIndex::WordIDSet::const_iterator set_iter = + expected_set.begin(); set_iter != expected_set.end(); ++set_iter) + EXPECT_GT(actual_set.count(*set_iter), 0U); + } + for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected = + word_id_history_map.begin(); expected != word_id_history_map.end(); + ++expected) { + InMemoryURLIndex::WordIDHistoryMap::const_iterator actual = + url_index.word_id_history_map_.find(expected->first); + ASSERT_TRUE(url_index.word_id_history_map_.end() != actual); + const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second); + const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second); + ASSERT_EQ(expected_set.size(), actual_set.size()); + for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter = + expected_set.begin(); set_iter != expected_set.end(); ++set_iter) + EXPECT_GT(actual_set.count(*set_iter), 0U); + } + for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected = + history_info_map.begin(); expected != history_info_map.end(); + ++expected) { + InMemoryURLIndex::HistoryInfoMap::const_iterator actual = + url_index.history_info_map_.find(expected->first); + ASSERT_FALSE(url_index.history_info_map_.end() == actual); + const URLRow& expected_row(expected->second); + const URLRow& actual_row(actual->second); + EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count()); + EXPECT_EQ(expected_row.typed_count(), actual_row.typed_count()); + EXPECT_EQ(expected_row.last_visit(), actual_row.last_visit()); + EXPECT_EQ(expected_row.url(), actual_row.url()); + } +} + } // namespace history diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index 1037b6c..a5447f6 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -245,7 +245,7 @@ QueryParser::QueryParser() { // static bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { - DCHECK(word.size() > 0); + DCHECK(!word.empty()); size_t minimum_length = 3; // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) // because they 'behave like' Latin letters. Moreover, we should diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc index 6bf3270..28c8da4 100644 --- a/chrome/browser/history/text_database.cc +++ b/chrome/browser/history/text_database.cc @@ -367,7 +367,7 @@ void TextDatabase::GetTextMatches(const std::string& query, // When we have returned all the results possible (or determined that there // are none), then we have searched all the time requested, so we can // set the first_time_searched to that value. - if (results->size() == 0 || + if (results->empty() || options.max_count == 0 || // Special case for wanting all the results. static_cast<int>(results->size()) < options.max_count) { *first_time_searched = options.begin_time; diff --git a/chrome/browser/history/top_sites.cc b/chrome/browser/history/top_sites.cc index 155c3fc..306070f 100644 --- a/chrome/browser/history/top_sites.cc +++ b/chrome/browser/history/top_sites.cc @@ -13,8 +13,6 @@ #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "base/values.h" -#include "chrome/browser/browser_thread.h" -#include "chrome/browser/dom_ui/most_visited_handler.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_notifications.h" #include "chrome/browser/history/page_usage_data.h" @@ -22,12 +20,14 @@ #include "chrome/browser/history/top_sites_cache.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile.h" -#include "chrome/browser/tab_contents/navigation_controller.h" -#include "chrome/browser/tab_contents/navigation_entry.h" +#include "chrome/browser/ui/webui/most_visited_handler.h" #include "chrome/common/chrome_switches.h" #include "chrome/common/notification_service.h" #include "chrome/common/pref_names.h" #include "chrome/common/thumbnail_score.h" +#include "content/browser/browser_thread.h" +#include "content/browser/tab_contents/navigation_controller.h" +#include "content/browser/tab_contents/navigation_entry.h" #include "grit/chromium_strings.h" #include "grit/generated_resources.h" #include "grit/locale_settings.h" diff --git a/chrome/browser/history/top_sites.h b/chrome/browser/history/top_sites.h index f92b877..5e3fa14 100644 --- a/chrome/browser/history/top_sites.h +++ b/chrome/browser/history/top_sites.h @@ -18,11 +18,11 @@ #include "base/synchronization/lock.h" #include "base/time.h" #include "base/timer.h" -#include "chrome/browser/cancelable_request.h" #include "chrome/browser/history/history_types.h" #include "chrome/browser/history/history.h" #include "chrome/browser/history/page_usage_data.h" #include "chrome/common/thumbnail_score.h" +#include "content/browser/cancelable_request.h" #include "googleurl/src/gurl.h" class DictionaryValue; diff --git a/chrome/browser/history/top_sites_backend.cc b/chrome/browser/history/top_sites_backend.cc index eb27a5e..bb25e1f 100644 --- a/chrome/browser/history/top_sites_backend.cc +++ b/chrome/browser/history/top_sites_backend.cc @@ -6,8 +6,8 @@ #include "base/file_path.h" #include "base/file_util.h" -#include "chrome/browser/browser_thread.h" #include "chrome/browser/history/top_sites_database.h" +#include "content/browser/browser_thread.h" namespace history { diff --git a/chrome/browser/history/top_sites_backend.h b/chrome/browser/history/top_sites_backend.h index 50d0867..ebd8c6a 100644 --- a/chrome/browser/history/top_sites_backend.h +++ b/chrome/browser/history/top_sites_backend.h @@ -9,8 +9,8 @@ #include "base/file_path.h" #include "base/ref_counted.h" #include "base/scoped_ptr.h" -#include "chrome/browser/cancelable_request.h" #include "chrome/browser/history/history_types.h" +#include "content/browser/cancelable_request.h" class FilePath; diff --git a/chrome/browser/history/top_sites_unittest.cc b/chrome/browser/history/top_sites_unittest.cc index b380a57..866ba3c 100644 --- a/chrome/browser/history/top_sites_unittest.cc +++ b/chrome/browser/history/top_sites_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -10,8 +10,6 @@ #include "base/string_util.h" #include "base/utf_string_conversions.h" #include "base/values.h" -#include "chrome/browser/browser_thread.h" -#include "chrome/browser/dom_ui/most_visited_handler.h" #include "chrome/browser/history/history_backend.h" #include "chrome/browser/history/history_database.h" #include "chrome/browser/history/history_marshaling.h" @@ -20,11 +18,13 @@ #include "chrome/browser/history/top_sites_backend.h" #include "chrome/browser/history/top_sites_cache.h" #include "chrome/browser/history/top_sites_database.h" +#include "chrome/browser/ui/webui/most_visited_handler.h" #include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_paths.h" #include "chrome/common/chrome_switches.h" #include "chrome/test/testing_profile.h" #include "chrome/tools/profiles/thumbnail-inl.h" +#include "content/browser/browser_thread.h" #include "googleurl/src/gurl.h" #include "grit/chromium_strings.h" #include "grit/generated_resources.h" diff --git a/chrome/browser/history/url_database.cc b/chrome/browser/history/url_database.cc index 24decc6..f6c041c 100644 --- a/chrome/browser/history/url_database.cc +++ b/chrome/browser/history/url_database.cc @@ -238,6 +238,25 @@ bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { return true; } +bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { + DCHECK(!enumerator->initialized_); + std::string sql("SELECT "); + sql.append(kURLRowFields); + sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count > ? OR " + "typed_count > ?"); + enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); + if (!enumerator->statement_) { + NOTREACHED() << GetDB().GetErrorMessage(); + return false; + } + enumerator->statement_.BindInt64( + 0, AutocompleteAgeThreshold().ToInternalValue()); + enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); + enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); + enumerator->initialized_ = true; + return true; +} + bool URLDatabase::IsFavIconUsed(FavIconID favicon_id) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT id FROM urls WHERE favicon_id=? LIMIT 1")); diff --git a/chrome/browser/history/url_database.h b/chrome/browser/history/url_database.h index ae1d7cd..c821ce7 100644 --- a/chrome/browser/history/url_database.h +++ b/chrome/browser/history/url_database.h @@ -127,9 +127,15 @@ class URLDatabase { sql::Statement statement_; }; - // Initializes the given enumerator to enumerator all URLs in the database + // Initializes the given enumerator to enumerator all URLs in the database. bool InitURLEnumeratorForEverything(URLEnumerator* enumerator); + // Initializes the given enumerator to enumerator all URLs in the database + // that are historically significant: ones having been visited within 3 days, + // having their URL manually typed more than once, or having been visited + // more than 3 times. + bool InitURLEnumeratorForSignificant(URLEnumerator* enumerator); + // Favicons ------------------------------------------------------------------ // Check whether a favicon is used by any URLs in the database. diff --git a/chrome/browser/history/url_database_unittest.cc b/chrome/browser/history/url_database_unittest.cc index b164e48..c2f22ff 100644 --- a/chrome/browser/history/url_database_unittest.cc +++ b/chrome/browser/history/url_database_unittest.cc @@ -125,8 +125,7 @@ TEST_F(URLDatabaseTest, AddURL) { // Tests adding, querying and deleting keyword visits. TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { - const GURL url1("http://www.google.com/"); - URLRow url_info1(url1); + URLRow url_info1(GURL("http://www.google.com/")); url_info1.set_title(UTF8ToUTF16("Google")); url_info1.set_visit_count(4); url_info1.set_typed_count(2); @@ -165,8 +164,7 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { // Make sure deleting a URL also deletes a keyword visit. TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) { - const GURL url1("http://www.google.com/"); - URLRow url_info1(url1); + URLRow url_info1(GURL("http://www.google.com/")); url_info1.set_title(UTF8ToUTF16("Google")); url_info1.set_visit_count(4); url_info1.set_typed_count(2); @@ -187,4 +185,41 @@ TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) { ASSERT_EQ(0U, matches.size()); } +TEST_F(URLDatabaseTest, EnumeratorForSignificant) { + std::set<std::string> good_urls; + // Add URLs which do and don't meet the criteria. + URLRow url_no_match(GURL("http://www.url_no_match.com/")); + EXPECT_TRUE(AddURL(url_no_match)); + + std::string url_string2("http://www.url_match_visit_count.com/"); + good_urls.insert("http://www.url_match_visit_count.com/"); + URLRow url_match_visit_count(GURL("http://www.url_match_visit_count.com/")); + url_match_visit_count.set_visit_count(kLowQualityMatchVisitLimit + 1); + EXPECT_TRUE(AddURL(url_match_visit_count)); + + good_urls.insert("http://www.url_match_typed_count.com/"); + URLRow url_match_typed_count(GURL("http://www.url_match_typed_count.com/")); + url_match_typed_count.set_typed_count(kLowQualityMatchTypedLimit + 1); + EXPECT_TRUE(AddURL(url_match_typed_count)); + + good_urls.insert("http://www.url_match_last_visit.com/"); + URLRow url_match_last_visit(GURL("http://www.url_match_last_visit.com/")); + url_match_last_visit.set_last_visit(Time::Now() - TimeDelta::FromDays(1)); + EXPECT_TRUE(AddURL(url_match_last_visit)); + + URLRow url_no_match_last_visit(GURL( + "http://www.url_no_match_last_visit.com/")); + url_no_match_last_visit.set_last_visit(Time::Now() - + TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1)); + EXPECT_TRUE(AddURL(url_no_match_last_visit)); + + URLDatabase::URLEnumerator history_enum; + EXPECT_TRUE(InitURLEnumeratorForSignificant(&history_enum)); + URLRow row; + int row_count = 0; + for (; history_enum.GetNextURL(&row); ++row_count) + EXPECT_EQ(1U, good_urls.count(row.url().spec())); + EXPECT_EQ(3, row_count); +} + } // namespace history |