summaryrefslogtreecommitdiffstats
path: root/chrome
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-24 14:18:26 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-24 14:18:26 +0000
commit6c6e2e851954d14c5d183a624f842f55c1241296 (patch)
tree8fe3c11fac71c2ea9bfbdaf1c205a225bc4b8a84 /chrome
parent61623711e224217f24269ec1ba5554e3e908b92c (diff)
downloadchromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.zip
chromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.tar.gz
chromium_src-6c6e2e851954d14c5d183a624f842f55c1241296.tar.bz2
Revert 75882 - Add caching of the InMemoryURLIndex (part of the HistoryQuickProvider) part 1. (Transactions will be introduced in the next submission.) Fixed a problem in the caching of search results as the user types each character in a search term. Updated the unit test associated with that code.
Added (temporary) flag which can be used to turn on the HQP (enable-history-quick-provider); also added it to about:flags. Previously reviewed as http://codereview.chromium.org/6286029/. BUG=19736,60107 TEST=Added unit tests. Review URL: http://codereview.chromium.org/6581024 TBR=mrossetti@chromium.org Review URL: http://codereview.chromium.org/6575032 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75883 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome')
-rw-r--r--chrome/app/generated_resources.grd6
-rw-r--r--chrome/browser/DEPS3
-rw-r--r--chrome/browser/about_flags.cc9
-rw-r--r--chrome/browser/autocomplete/autocomplete.cc6
-rw-r--r--chrome/browser/autocomplete/history_provider_util.cc13
-rw-r--r--chrome/browser/autocomplete/history_provider_util.h13
-rw-r--r--chrome/browser/autocomplete/history_quick_provider.h5
-rw-r--r--chrome/browser/autocomplete/history_quick_provider_unittest.cc5
-rw-r--r--chrome/browser/autocomplete/history_url_provider.cc15
-rw-r--r--chrome/browser/history/history_backend.cc4
-rw-r--r--chrome/browser/history/history_types.cc27
-rw-r--r--chrome/browser/history/history_types.h20
-rw-r--r--chrome/browser/history/history_types_unittest.cc41
-rw-r--r--chrome/browser/history/in_memory_history_backend.h13
-rw-r--r--chrome/browser/history/in_memory_url_index.cc623
-rw-r--r--chrome/browser/history/in_memory_url_index.h164
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc275
-rw-r--r--chrome/browser/history/url_database.cc19
-rw-r--r--chrome/browser/history/url_database.h8
-rw-r--r--chrome/browser/history/url_database_unittest.cc43
-rw-r--r--chrome/chrome_browser.gypi48
-rw-r--r--chrome/chrome_tests.gypi1
-rw-r--r--chrome/common/chrome_switches.cc3
-rw-r--r--chrome/common/chrome_switches.h1
24 files changed, 248 insertions, 1117 deletions
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd
index f974f25..fdd5287 100644
--- a/chrome/app/generated_resources.grd
+++ b/chrome/app/generated_resources.grd
@@ -4116,12 +4116,6 @@ Keep your key file in a safe place. You will need it to create new versions of y
<message name="IDS_FLAGS_DISABLE_INTERACTIVE_FORM_VALIDATION_DESCRIPTION" desc="Description of the 'Disable HTML5 interactive form validation' lab.">
Disable showing validation messages and preventing form submission.
</message>
- <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER" desc="Name of the 'Enable better visit history matching in the omnibox' lab.">
- Enable better omnibox history matching
- </message>
- <message name="IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION" desc="Description of the 'Enable better visit history matching in the omnibox' lab.">
- Enables substring and multi-fragment matching within URLs from history.
- </message>
<!-- Crashes -->
<message name="IDS_CRASHES_TITLE" desc="Title for the chrome://crashes page.">
diff --git a/chrome/browser/DEPS b/chrome/browser/DEPS
index 8d658fc..064caf9 100644
--- a/chrome/browser/DEPS
+++ b/chrome/browser/DEPS
@@ -30,13 +30,12 @@ include_rules = [
"+media/audio", # Chrome's lightweight audio library.
"+media/base",
"+third_party/apple", # Apple code ImageAndTextCell.
- "+third_party/cld",
"+third_party/expat",
"+third_party/gpsd",
"+third_party/sqlite",
"+third_party/libevent", # For the remote V8 debugging server
"+third_party/libjingle",
- "+third_party/protobuf/src/google/protobuf",
+ "+third_party/cld",
"+third_party/undoview",
"+v8/include", # Browser uses V8 to get the version and run the debugger.
diff --git a/chrome/browser/about_flags.cc b/chrome/browser/about_flags.cc
index 34e6f79..4f6999f 100644
--- a/chrome/browser/about_flags.cc
+++ b/chrome/browser/about_flags.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -254,13 +254,6 @@ const Experiment kExperiments[] = {
kOsMac, // TODO(crogers): add windows and linux when FFT is ready.
SINGLE_VALUE_TYPE(switches::kEnableWebAudio)
},
- {
- "enable-history-quick-provider",
- IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER,
- IDS_FLAGS_ENABLE_HISTORY_QUICK_PROVIDER_DESCRIPTION,
- kOsAll,
- SINGLE_VALUE_TYPE(switches::kEnableHistoryQuickProvider)
- },
};
const Experiment* experiments = kExperiments;
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc
index e10c3c8..daad189 100644
--- a/chrome/browser/autocomplete/autocomplete.cc
+++ b/chrome/browser/autocomplete/autocomplete.cc
@@ -789,10 +789,8 @@ AutocompleteController::AutocompleteController(
in_start_(false) {
search_provider_ = new SearchProvider(this, profile);
providers_.push_back(search_provider_);
- if (CommandLine::ForCurrentProcess()->HasSwitch(
- switches::kEnableHistoryQuickProvider) &&
- !CommandLine::ForCurrentProcess()->HasSwitch(
- switches::kDisableHistoryQuickProvider))
+ if (!CommandLine::ForCurrentProcess()->HasSwitch(
+ switches::kDisableHistoryQuickProvider))
providers_.push_back(new HistoryQuickProvider(this, profile));
if (!CommandLine::ForCurrentProcess()->HasSwitch(
switches::kDisableHistoryURLProvider))
diff --git a/chrome/browser/autocomplete/history_provider_util.cc b/chrome/browser/autocomplete/history_provider_util.cc
index a8b23d7..4325b6b 100644
--- a/chrome/browser/autocomplete/history_provider_util.cc
+++ b/chrome/browser/autocomplete/history_provider_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -6,6 +6,10 @@
namespace history {
+const int kLowQualityMatchTypedLimit = 1;
+const int kLowQualityMatchVisitLimit = 3;
+const int kLowQualityMatchAgeLimitInDays = 3;
+
HistoryMatch::HistoryMatch()
: url_info(),
input_location(string16::npos),
@@ -27,4 +31,9 @@ bool HistoryMatch::operator==(const GURL& url) const {
return url_info.url() == url;
}
-} // namespace history
+base::Time AutocompleteAgeThreshold() {
+ return (base::Time::Now() -
+ base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays));
+}
+
+}
diff --git a/chrome/browser/autocomplete/history_provider_util.h b/chrome/browser/autocomplete/history_provider_util.h
index 5866df9..aa4b444 100644
--- a/chrome/browser/autocomplete/history_provider_util.h
+++ b/chrome/browser/autocomplete/history_provider_util.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -13,6 +13,13 @@
namespace history {
+// Constants which specify, when considered altogether, 'significant'
+// history items. These are used to filter out insignificant items
+// for consideration as autocomplete candidates.
+extern const int kLowQualityMatchTypedLimit;
+extern const int kLowQualityMatchVisitLimit;
+extern const int kLowQualityMatchAgeLimitInDays;
+
// Used for intermediate history result operations.
struct HistoryMatch {
// Required for STL, we don't use this directly.
@@ -64,6 +71,10 @@ struct Prefix {
int num_components;
};
typedef std::vector<Prefix> Prefixes;
+
+// Returns the date threshold for considering an history item as significant.
+base::Time AutocompleteAgeThreshold();
+
}
#endif // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_PROVIDER_UTIL_H_
diff --git a/chrome/browser/autocomplete/history_quick_provider.h b/chrome/browser/autocomplete/history_quick_provider.h
index befb8bc..561336b 100644
--- a/chrome/browser/autocomplete/history_quick_provider.h
+++ b/chrome/browser/autocomplete/history_quick_provider.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -22,6 +22,9 @@ class HistoryBackend;
// the history system) which quickly (and synchronously) provides matching
// results from recently or frequently visited sites in the profile's
// history.
+//
+// TODO(mrossetti): Review to see if the following applies since we're not
+// using the database during the autocomplete pass.
class HistoryQuickProvider : public HistoryProvider {
public:
HistoryQuickProvider(ACProviderListener* listener, Profile* profile);
diff --git a/chrome/browser/autocomplete/history_quick_provider_unittest.cc b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
index c640456..d5b5e0d 100644
--- a/chrome/browser/autocomplete/history_quick_provider_unittest.cc
+++ b/chrome/browser/autocomplete/history_quick_provider_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -139,8 +139,7 @@ void HistoryQuickProviderTest::FillData() {
history::SOURCE_BROWSED);
}
- history::InMemoryURLIndex* index =
- new history::InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy")));
+ history::InMemoryURLIndex* index = new history::InMemoryURLIndex();
PrefService* prefs = profile_->GetPrefs();
std::string languages(prefs->GetString(prefs::kAcceptLanguages));
index->Init(db, languages);
diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc
index 0dafe8e..1db38f0 100644
--- a/chrome/browser/autocomplete/history_url_provider.cc
+++ b/chrome/browser/autocomplete/history_url_provider.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -15,7 +15,6 @@
#include "chrome/browser/history/history.h"
#include "chrome/browser/history/history_backend.h"
#include "chrome/browser/history/history_database.h"
-#include "chrome/browser/history/history_types.h"
#include "chrome/browser/net/url_fixer_upper.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/profiles/profile.h"
@@ -690,12 +689,16 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
}
void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const {
- const base::Time& threshold(history::AutocompleteAgeThreshold());
+ Time recent_threshold = history::AutocompleteAgeThreshold();
for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) {
- if (RowQualifiesAsSignificant(i->url_info, threshold))
- ++i;
- else
+ const history::URLRow& url_info(i->url_info);
+ if ((url_info.typed_count() <= history::kLowQualityMatchTypedLimit) &&
+ (url_info.visit_count() <= history::kLowQualityMatchVisitLimit) &&
+ (url_info.last_visit() < recent_threshold)) {
i = matches->erase(i);
+ } else {
+ ++i;
+ }
}
}
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index 84afba6..50f47d5 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -550,7 +550,7 @@ void HistoryBackend::InitImpl(const std::string& languages) {
// Fill the in-memory database and send it back to the history service on the
// main thread.
InMemoryHistoryBackend* mem_backend = new InMemoryHistoryBackend;
- if (mem_backend->Init(history_name, history_dir_, db_.get(), languages))
+ if (mem_backend->Init(history_name, db_.get(), languages))
delegate_->SetInMemoryBackend(mem_backend); // Takes ownership of pointer.
else
delete mem_backend; // Error case, run without the in-memory DB.
diff --git a/chrome/browser/history/history_types.cc b/chrome/browser/history/history_types.cc
index 65b1935..d3753bf 100644
--- a/chrome/browser/history/history_types.cc
+++ b/chrome/browser/history/history_types.cc
@@ -9,6 +9,9 @@
#include "base/logging.h"
#include "base/stl_util-inl.h"
+using base::Time;
+using base::TimeDelta;
+
namespace history {
// URLRow ----------------------------------------------------------------------
@@ -59,7 +62,7 @@ void URLRow::Initialize() {
id_ = 0;
visit_count_ = 0;
typed_count_ = 0;
- last_visit_ = base::Time();
+ last_visit_ = Time();
hidden_ = false;
favicon_id_ = 0;
}
@@ -76,7 +79,7 @@ VisitRow::VisitRow()
}
VisitRow::VisitRow(URLID arg_url_id,
- base::Time arg_visit_time,
+ Time arg_visit_time,
VisitID arg_referring_visit,
PageTransition::Type arg_transition,
SegmentID arg_segment_id)
@@ -383,24 +386,4 @@ MostVisitedThumbnails::MostVisitedThumbnails() {}
MostVisitedThumbnails::~MostVisitedThumbnails() {}
-// Autocomplete thresholds -----------------------------------------------------
-
-const int kLowQualityMatchTypedLimit = 1;
-const int kLowQualityMatchVisitLimit = 3;
-const int kLowQualityMatchAgeLimitInDays = 3;
-
-base::Time AutocompleteAgeThreshold() {
- return (base::Time::Now() -
- base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays));
-}
-
-bool RowQualifiesAsSignificant(const URLRow& row,
- const base::Time& threshold) {
- const base::Time& real_threshold =
- threshold.is_null() ? AutocompleteAgeThreshold() : threshold;
- return (row.typed_count() > kLowQualityMatchTypedLimit) ||
- (row.visit_count() > kLowQualityMatchVisitLimit) ||
- (row.last_visit() >= real_threshold);
-}
-
} // namespace history
diff --git a/chrome/browser/history/history_types.h b/chrome/browser/history/history_types.h
index ed219a8..320c9910 100644
--- a/chrome/browser/history/history_types.h
+++ b/chrome/browser/history/history_types.h
@@ -664,26 +664,6 @@ class MostVisitedThumbnails
DISALLOW_COPY_AND_ASSIGN(MostVisitedThumbnails);
};
-// Autocomplete thresholds -----------------------------------------------------
-
-// Constants which specify, when considered altogether, 'significant'
-// history items. These are used to filter out insignificant items
-// for consideration as autocomplete candidates.
-extern const int kLowQualityMatchTypedLimit;
-extern const int kLowQualityMatchVisitLimit;
-extern const int kLowQualityMatchAgeLimitInDays;
-
-// Returns the date threshold for considering an history item as significant.
-base::Time AutocompleteAgeThreshold();
-
-// Return true if |row| qualifies as an autocomplete candidate. If |time_cache|
-// is_null() then this function determines a new time threshold each time it is
-// called. Since getting system time can be costly (such as for cases where
-// this function will be called in a loop over many history items), you can
-// provide a non-null |time_cache| by simply initializing |time_cache| with
-// AutocompleteAgeThreshold() (or any other desired time in the past).
-bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold);
-
} // namespace history
#endif // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
diff --git a/chrome/browser/history/history_types_unittest.cc b/chrome/browser/history/history_types_unittest.cc
index beabcef..5e14de5 100644
--- a/chrome/browser/history/history_types_unittest.cc
+++ b/chrome/browser/history/history_types_unittest.cc
@@ -2,10 +2,11 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/utf_string_conversions.h"
#include "chrome/browser/history/history_types.h"
#include "testing/gtest/include/gtest/gtest.h"
+using base::Time;
+
namespace history {
namespace {
@@ -39,9 +40,9 @@ static const char kURL3[] = "http://images.google.com/";
void AddSimpleData(QueryResults* results) {
GURL url1(kURL1);
GURL url2(kURL2);
- URLResult result1(url1, base::Time::Now());
- URLResult result2(url1, base::Time::Now());
- URLResult result3(url2, base::Time::Now());
+ URLResult result1(url1, Time::Now());
+ URLResult result2(url1, Time::Now());
+ URLResult result3(url2, Time::Now());
// The URLResults are invalid after being inserted.
results->AppendURLBySwapping(&result1);
@@ -54,8 +55,8 @@ void AddSimpleData(QueryResults* results) {
void AddAlternateData(QueryResults* results) {
GURL url2(kURL2);
GURL url3(kURL3);
- URLResult result1(url2, base::Time::Now());
- URLResult result2(url3, base::Time::Now());
+ URLResult result1(url2, Time::Now());
+ URLResult result2(url3, Time::Now());
// The URLResults are invalid after being inserted.
results->AppendURLBySwapping(&result1);
@@ -167,32 +168,4 @@ TEST(HistoryQueryResult, AppendResults) {
EXPECT_TRUE(matches[0] == 3);
}
-TEST(HistoryQueryResult, RowSignificance) {
- const base::Time& threshold(AutocompleteAgeThreshold());
- const GURL url("http://www.google.com/");
- URLRow url_row(url);
- url_row.set_title(UTF8ToUTF16("Google"));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_visit_count(kLowQualityMatchVisitLimit + 1);
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_visit_count(1);
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_typed_count(kLowQualityMatchTypedLimit + 1);
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_typed_count(0);
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_last_visit(base::Time::Now() - base::TimeDelta::FromDays(1));
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_TRUE(RowQualifiesAsSignificant(url_row, base::Time()));
- url_row.set_last_visit(base::Time::Now() -
- base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, threshold));
- EXPECT_FALSE(RowQualifiesAsSignificant(url_row, base::Time()));
-}
-
} // namespace
diff --git a/chrome/browser/history/in_memory_history_backend.h b/chrome/browser/history/in_memory_history_backend.h
index 026fd7e..6e56636 100644
--- a/chrome/browser/history/in_memory_history_backend.h
+++ b/chrome/browser/history/in_memory_history_backend.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -7,7 +7,7 @@
// in-line autocomplete.
//
// It is created on the history thread and passed to the main thread where
-// operations can be completed synchronously. It listens for notifications
+// operations can be completed synchronously. It listenes for notifications
// from the "regular" history backend and keeps itself in sync.
#ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_HISTORY_BACKEND_H_
@@ -41,15 +41,8 @@ class InMemoryHistoryBackend : public NotificationObserver {
InMemoryHistoryBackend();
~InMemoryHistoryBackend();
- // Initializes the backend from the history database pointed to by the
- // full path in |history_filename|. |history_dir| is the path to the
- // directory containing the history database and is also used
- // as the directory where the InMemoryURLIndex's cache is kept. |db| is
- // used for building the InMemoryURLIndex. |languages| gives the
- // preferred user languages with which URLs and page titles are
- // interpreted while decomposing into words and characters during indexing.
+ // Initializes with data from the given history database.
bool Init(const FilePath& history_filename,
- const FilePath& history_dir,
URLDatabase* db,
const std::string& languages);
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 0458699..9d2e925 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -8,43 +8,22 @@
#include <iterator>
#include <limits>
-#include "base/file_util.h"
#include "base/i18n/break_iterator.h"
-#include "base/metrics/histogram.h"
#include "base/string_util.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/autocomplete/history_provider_util.h"
#include "chrome/browser/history/url_database.h"
-#include "chrome/browser/profiles/profile.h"
#include "net/base/escape.h"
#include "net/base/net_util.h"
-#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
#include "ui/base/l10n/l10n_util.h"
+#include "unicode/utypes.h" // for int32_t
-using google::protobuf::RepeatedField;
-using google::protobuf::RepeatedPtrField;
+using base::Time;
+using base::TimeDelta;
-using in_memory_url_index::InMemoryURLIndexCacheItem;
namespace history {
-typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
-typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
-typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;
-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry
- CharWordMapEntry;
-typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
- WordIDHistoryMapItem;
-typedef imui::
- InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
- WordIDHistoryMapEntry;
-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
- HistoryInfoMapEntry;
-
-const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
-
ScoredHistoryMatch::ScoredHistoryMatch() : raw_score(0) {}
ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info,
@@ -57,49 +36,53 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info,
}
struct InMemoryURLIndex::TermCharWordSet {
- TermCharWordSet() // Required for STL resize().
- : char_(0),
- word_id_set_(),
- used_(false) {}
- TermCharWordSet(const char16& uni_char,
+ TermCharWordSet(const Char16Set& char_set,
const WordIDSet& word_id_set,
bool used)
- : char_(uni_char),
+ : char_set_(char_set),
word_id_set_(word_id_set),
used_(used) {}
- // Predicate for STL algorithm use.
- bool is_not_used() const { return !used_; }
+ bool IsNotUsed() const { return !used_; }
- char16 char_;
+ Char16Set char_set_;
WordIDSet word_id_set_;
bool used_; // true if this set has been used for the current term search.
};
-InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
- : history_dir_(history_dir),
- history_item_count_(0) {
-}
-
-// Called only by unit tests.
-InMemoryURLIndex::InMemoryURLIndex()
- : history_item_count_(0) {
-}
+InMemoryURLIndex::InMemoryURLIndex() : history_item_count_(0) {}
InMemoryURLIndex::~InMemoryURLIndex() {}
+static const int32_t kURLToLowerBufferSize = 2048;
+
// Indexing
bool InMemoryURLIndex::Init(history::URLDatabase* history_db,
const std::string& languages) {
// TODO(mrossetti): Register for profile/language change notifications.
languages_ = languages;
- return ReloadFromHistory(history_db, false);
-}
-
-void InMemoryURLIndex::ShutDown() {
- // Write our cache.
- SaveToCacheFile();
+ // Reset our indexes.
+ char_word_map_.clear();
+ word_id_history_map_.clear();
+ if (!history_db)
+ return false;
+ URLDatabase::URLEnumerator history_enum;
+ if (history_db->InitURLEnumeratorForEverything(&history_enum)) {
+ URLRow row;
+ Time recent_threshold = InMemoryURLIndex::RecentThreshold();
+ while (history_enum.GetNextURL(&row)) {
+ // Do some filtering so that we only get history items which could
+ // possibly pass the HistoryURLProvider::CullPoorMatches filter later.
+ if ((row.typed_count() > kLowQualityMatchTypedLimit) ||
+ (row.visit_count() > kLowQualityMatchVisitLimit) ||
+ (row.last_visit() >= recent_threshold)) {
+ if (!IndexRow(row))
+ return false;
+ }
+ }
+ }
+ return true;
}
bool InMemoryURLIndex::IndexRow(const URLRow& row) {
@@ -119,7 +102,7 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) {
new_row.set_typed_count(row.typed_count());
new_row.set_last_visit(row.last_visit());
new_row.set_title(row.title());
- history_info_map_[history_id] = new_row;
+ history_info_map_.insert(std::make_pair(history_id, new_row));
// Split into individual, unique words.
String16Set words = WordSetFromString16(url);
@@ -135,149 +118,6 @@ bool InMemoryURLIndex::IndexRow(const URLRow& row) {
return true;
}
-bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
- bool clear_cache) {
- ClearPrivateData();
-
- if (!history_db)
- return false;
-
- if (clear_cache || !RestoreFromCacheFile()) {
- base::TimeTicks beginning_time = base::TimeTicks::Now();
- // The index has to be built from scratch.
- URLDatabase::URLEnumerator history_enum;
- if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
- return false;
- URLRow row;
- while (history_enum.GetNextURL(&row)) {
- if (!IndexRow(row))
- return false;
- }
- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
- base::TimeTicks::Now() - beginning_time);
- SaveToCacheFile();
- }
- return true;
-}
-
-void InMemoryURLIndex::ClearPrivateData() {
- history_item_count_ = 0;
- word_list_.clear();
- word_map_.clear();
- char_word_map_.clear();
- word_id_history_map_.clear();
- term_char_word_set_cache_.clear();
- history_info_map_.clear();
-}
-
-bool InMemoryURLIndex::RestoreFromCacheFile() {
- // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
- // That is: ensure that the database has not been modified since the cache
- // was last saved. DB file modification date is inadequate. There are no
- // SQLite table checksums automatically stored.
- base::TimeTicks beginning_time = base::TimeTicks::Now();
- FilePath file_path;
- if (!GetCacheFilePath(&file_path))
- return false;
- std::string data;
- if (!file_util::ReadFileToString(file_path, &data)) {
- LOG(WARNING) << "Failed to read InMemoryURLIndex cache from "
- << file_path.value();
- return false;
- }
-
- InMemoryURLIndexCacheItem index_cache;
- if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
- LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
- << file_path.value();
- return false;
- }
-
- if (!RestorePrivateData(index_cache)) {
- ClearPrivateData(); // Back to square one -- must build from scratch.
- return false;
- }
-
- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
- base::TimeTicks::Now() - beginning_time);
- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
- UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
- return true;
-}
-
-bool InMemoryURLIndex::SaveToCacheFile() {
- base::TimeTicks beginning_time = base::TimeTicks::Now();
- InMemoryURLIndexCacheItem index_cache;
- SavePrivateData(&index_cache);
- std::string data;
- if (!index_cache.SerializeToString(&data)) {
- LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
- return false;
- }
-
- // Write the cache to a file then swap it for the old cache, if any, and
- // delete the old cache.
- FilePath file_path;
- if (!GetCacheFilePath(&file_path))
- return false;
- file_util::ScopedFILE file(file_util::OpenFile(file_path, "w"));
- if (!file.get())
- return false;
-
- int size = data.size();
- if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
- LOG(WARNING) << "Failed to write " << file_path.value();
- return false;
- }
- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
- base::TimeTicks::Now() - beginning_time);
- return true;
-}
-
-void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
- // The row may or may not already be in our index. If it is not already
- // indexed and it qualifies then it gets indexed. If it is already
- // indexed and still qualifies then it gets updated, otherwise it
- // is deleted from the index.
- HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
- if (row_pos == history_info_map_.end()) {
- // This new row should be indexed if it qualifies.
- if (RowQualifiesAsSignificant(row, base::Time()))
- IndexRow(row);
- } else if (RowQualifiesAsSignificant(row, base::Time())) {
- // This indexed row still qualifies and will be re-indexed.
- // The url won't have changed but the title, visit count, etc.
- // might have changed.
- URLRow& old_row = row_pos->second;
- old_row.set_visit_count(row.visit_count());
- old_row.set_typed_count(row.typed_count());
- old_row.set_last_visit(row.last_visit());
- // TODO(mrossetti): When we start indexing the title the next line
- // will need attention.
- old_row.set_title(row.title());
- } else {
- // This indexed row no longer qualifies and will be de-indexed.
- history_info_map_.erase(row_id);
- }
- // This invalidates the cache.
- term_char_word_set_cache_.clear();
- // TODO(mrossetti): Record this transaction in the cache.
-}
-
-void InMemoryURLIndex::DeleteURL(URLID row_id) {
- // Note that this does not remove any reference to this row from the
- // word_id_history_map_. That map will continue to contain (and return)
- // hits against this row until that map is rebuilt, but since the
- // history_info_map_ no longer references the row no erroneous results
- // will propagate to the user.
- history_info_map_.erase(row_id);
- // This invalidates the word cache.
- term_char_word_set_cache_.clear();
- // TODO(mrossetti): Record this transaction in the cache.
-}
-
// Searching
ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
@@ -315,7 +155,7 @@ ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
term_char_word_set_cache_.erase(
std::remove_if(term_char_word_set_cache_.begin(),
term_char_word_set_cache_.end(),
- std::mem_fun_ref(&TermCharWordSet::is_not_used)),
+ std::mem_fun_ref(&TermCharWordSet::IsNotUsed)),
term_char_word_set_cache_.end());
return scored_items;
}
@@ -366,10 +206,9 @@ InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
const string16& uni_word) {
InMemoryURLIndex::HistoryIDSet history_id_set;
- // For each unique character in the word, in order of first appearance, get
- // the char/word_id map entry and intersect with the set in an incremental
- // manner.
- Char16Vector uni_chars = Char16VectorFromString16(uni_word);
+ // For each character in the word, get the char/word_id map entry and
+ // intersect with the set in an incremental manner.
+ Char16Set uni_chars = CharactersFromString16(uni_word);
WordIDSet word_id_set(WordIDSetForTermChars(uni_chars));
// If any words resulted then we can compose a set of history IDs by unioning
@@ -407,22 +246,7 @@ InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
}
// static
-InMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16(
- const string16& uni_word) {
- InMemoryURLIndex::Char16Vector characters;
- InMemoryURLIndex::Char16Set unique_characters;
- for (string16::const_iterator iter = uni_word.begin();
- iter != uni_word.end(); ++iter) {
- if (!unique_characters.count(*iter)) {
- unique_characters.insert(*iter);
- characters.push_back(*iter);
- }
- }
- return characters;
-}
-
-// static
-InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
+InMemoryURLIndex::Char16Set InMemoryURLIndex::CharactersFromString16(
const string16& uni_word) {
Char16Set characters;
for (string16::const_iterator iter = uni_word.begin();
@@ -453,17 +277,17 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
HistoryID history_id) {
word_list_.push_back(uni_word);
WordID word_id = word_list_.size() - 1;
- word_map_[uni_word] = word_id;
+ word_map_.insert(std::make_pair(uni_word, word_id));
HistoryIDSet history_id_set;
history_id_set.insert(history_id);
- word_id_history_map_[word_id] = history_id_set;
+ word_id_history_map_.insert(std::make_pair(word_id, history_id_set));
// For each character in the newly added word (i.e. a word that is not
// already in the word index), add the word to the character index.
- Char16Set characters = Char16SetFromString16(uni_word);
+ Char16Set characters = CharactersFromString16(uni_word);
for (Char16Set::iterator uni_char_iter = characters.begin();
uni_char_iter != characters.end(); ++uni_char_iter) {
- Char16Set::value_type uni_char = *uni_char_iter;
- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
+ Char16Set::value_type uni_string = *uni_char_iter;
+ CharWordIDMap::iterator char_iter = char_word_map_.find(uni_string);
if (char_iter != char_word_map_.end()) {
// Update existing entry in the char/word index.
WordIDSet& word_id_set(char_iter->second);
@@ -472,79 +296,89 @@ void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
// Create a new entry in the char/word index.
WordIDSet word_id_set;
word_id_set.insert(word_id);
- char_word_map_[uni_char] = word_id_set;
+ char_word_map_.insert(std::make_pair(uni_string, word_id_set));
}
}
}
InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
- const InMemoryURLIndex::Char16Vector& uni_chars) {
- size_t index = CachedResultsIndexForTerm(uni_chars);
-
- // If there were no unprocessed characters in the search term |uni_chars|
- // then we can use the cached one as-is as the results with no further
- // filtering.
- if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1)
- return term_char_word_set_cache_[index].word_id_set_;
-
- // Some or all of the characters remain to be indexed so trim the cache.
- if (index + 1 < term_char_word_set_cache_.size())
- term_char_word_set_cache_.resize(index + 1);
- WordIDSet word_id_set;
- // Take advantage of our cached starting point, if any.
- Char16Vector::const_iterator c_iter = uni_chars.begin();
- if (index != kNoCachedResultForTerm) {
- word_id_set = term_char_word_set_cache_[index].word_id_set_;
- c_iter += index + 1;
- }
- // Now process the remaining characters in the search term.
- for (; c_iter != uni_chars.end(); ++c_iter) {
- Char16Vector::value_type uni_char = *c_iter;
- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
- if (char_iter == char_word_map_.end()) {
- // A character was not found so there are no matching results: bail.
- word_id_set.clear();
- break;
- }
- WordIDSet& char_word_id_set(char_iter->second);
- // It is possible for there to no longer be any words associated with
- // a particular character. Give up in that case.
- if (char_word_id_set.empty()) {
- word_id_set.clear();
- break;
+ InMemoryURLIndex::Char16Set const& uni_chars) {
+ TermCharWordSet* best_term_char_word_set = NULL;
+ bool set_found = false;
+ size_t outstanding_count = 0;
+ Char16Set outstanding_chars;
+ for (TermCharWordSetVector::iterator iter = term_char_word_set_cache_.begin();
+ iter != term_char_word_set_cache_.end(); ++iter) {
+ TermCharWordSetVector::value_type& term_char_word_set(*iter);
+ Char16Set& char_set(term_char_word_set.char_set_);
+ Char16Set exclusions;
+ std::set_difference(char_set.begin(), char_set.end(),
+ uni_chars.begin(), uni_chars.end(),
+ std::inserter(exclusions, exclusions.begin()));
+ if (exclusions.empty()) {
+ // Do a reverse difference so that we know which characters remain
+ // to be indexed. Then decide if this is a better match than any
+ // previous cached set.
+ std::set_difference(uni_chars.begin(), uni_chars.end(),
+ char_set.begin(), char_set.end(),
+ std::inserter(exclusions, exclusions.begin()));
+ if (!set_found || exclusions.size() < outstanding_count) {
+ outstanding_chars = exclusions;
+ best_term_char_word_set = &*iter;
+ outstanding_count = exclusions.size();
+ set_found = true;
+ }
}
+ }
- if (word_id_set.empty()) {
- word_id_set = char_word_id_set;
+ WordIDSet word_id_set;
+ if (set_found && outstanding_count == 0) {
+ // If there were no oustanding characters then we can use the cached one.
+ best_term_char_word_set->used_ = true;
+ word_id_set = best_term_char_word_set->word_id_set_;
+ } else {
+ // Some or all of the characters remain to be indexed.
+ bool first_character = true;
+ if (set_found) {
+ first_character = false;
+ word_id_set = best_term_char_word_set->word_id_set_;
} else {
- WordIDSet old_word_id_set(word_id_set);
- word_id_set.clear();
- std::set_intersection(old_word_id_set.begin(),
- old_word_id_set.end(),
- char_word_id_set.begin(),
- char_word_id_set.end(),
- std::inserter(word_id_set,
- word_id_set.begin()));
+ outstanding_chars = uni_chars;
+ }
+
+ for (Char16Set::iterator uni_char_iter = outstanding_chars.begin();
+ uni_char_iter != outstanding_chars.end(); ++uni_char_iter) {
+ Char16Set::value_type uni_char = *uni_char_iter;
+ CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
+ if (char_iter == char_word_map_.end()) {
+ // The character was not found so bail.
+ word_id_set.clear();
+ break;
+ }
+ WordIDSet& char_word_id_set(char_iter->second);
+ if (first_character) {
+ word_id_set = char_word_id_set;
+ first_character = false;
+ } else {
+ WordIDSet old_word_id_set(word_id_set);
+ word_id_set.clear();
+ std::set_intersection(old_word_id_set.begin(),
+ old_word_id_set.end(),
+ char_word_id_set.begin(),
+ char_word_id_set.end(),
+ std::inserter(word_id_set,
+ word_id_set.begin()));
+ }
+ }
+ // Update the cache.
+ if (!set_found || outstanding_count) {
+ term_char_word_set_cache_.push_back(TermCharWordSet(uni_chars,
+ word_id_set, true));
}
- // Add this new char/set instance to the cache.
- term_char_word_set_cache_.push_back(TermCharWordSet(
- uni_char, word_id_set, true));
}
return word_id_set;
}
-size_t InMemoryURLIndex::CachedResultsIndexForTerm(
- const InMemoryURLIndex::Char16Vector& uni_chars) {
- TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin();
- for (Char16Vector::const_iterator c_iter = uni_chars.begin();
- (c_iter != uni_chars.end()) &&
- (t_iter != term_char_word_set_cache_.end()) &&
- (*c_iter == t_iter->char_);
- ++c_iter, ++t_iter)
- t_iter->used_ = true; // Update the cache.
- return t_iter - term_char_word_set_cache_.begin() - 1;
-}
-
// static
int InMemoryURLIndex::RawScoreForURL(const URLRow& row,
const String16Vector& terms,
@@ -643,9 +477,8 @@ int InMemoryURLIndex::RawScoreForURL(const URLRow& row,
}
// static
-base::Time InMemoryURLIndex::RecentThreshold() {
- return base::Time::Now() -
- base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays);
+Time InMemoryURLIndex::RecentThreshold() {
+ return Time::Now() - TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays);
}
InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
@@ -661,9 +494,6 @@ void InMemoryURLIndex::AddHistoryMatch::operator()(
const InMemoryURLIndex::HistoryID history_id) {
HistoryInfoMap::const_iterator hist_pos =
index_.history_info_map_.find(history_id);
- // Note that a history_id may be present in the word_id_history_map_ yet not
- // be found in the history_info_map_. This occurs when an item has been
- // deleted by the user or the item no longer qualifies as a quick result.
if (hist_pos != index_.history_info_map_.end()) {
const URLRow& hist_item = hist_pos->second;
// TODO(mrossetti): Accommodate multiple term highlighting.
@@ -700,227 +530,4 @@ void InMemoryURLIndex::AddHistoryMatch::operator()(
}
}
-bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {
- if (history_dir_.empty())
- return false;
- *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
- return true;
-}
-
-void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
- DCHECK(cache);
- cache->set_timestamp(base::Time::Now().ToInternalValue());
- cache->set_history_item_count(history_item_count_);
- SaveWordList(cache);
- SaveWordMap(cache);
- SaveCharWordMap(cache);
- SaveWordIDHistoryMap(cache);
- SaveHistoryInfoMap(cache);
-}
-
-bool InMemoryURLIndex::RestorePrivateData(
- const InMemoryURLIndexCacheItem& cache) {
- last_saved_ = base::Time::FromInternalValue(cache.timestamp());
- history_item_count_ = cache.history_item_count();
- return (history_item_count_ == 0) || RestoreWordList(cache) &&
- RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
- RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache);
-}
-
-
-void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
- if (word_list_.empty())
- return;
- WordListItem* list_item = cache->mutable_word_list();
- list_item->set_word_count(word_list_.size());
- for (String16Vector::const_iterator iter = word_list_.begin();
- iter != word_list_.end(); ++iter)
- list_item->add_word(UTF16ToUTF8(*iter));
-}
-
-bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
- if (!cache.has_word_list())
- return false;
- const WordListItem& list_item(cache.word_list());
- uint32 expected_item_count = list_item.word_count();
- uint32 actual_item_count = list_item.word_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- const RepeatedPtrField<std::string>& words(list_item.word());
- for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
- iter != words.end(); ++iter)
- word_list_.push_back(UTF8ToUTF16(*iter));
- return true;
-}
-
-void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
- if (word_map_.empty())
- return;
- WordMapItem* map_item = cache->mutable_word_map();
- map_item->set_item_count(word_map_.size());
- for (WordMap::const_iterator iter = word_map_.begin();
- iter != word_map_.end(); ++iter) {
- WordMapEntry* map_entry = map_item->add_word_map_entry();
- map_entry->set_word(UTF16ToUTF8(iter->first));
- map_entry->set_word_id(iter->second);
- }
-}
-
-bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
- if (!cache.has_word_map())
- return false;
- const WordMapItem& list_item(cache.word_map());
- uint32 expected_item_count = list_item.item_count();
- uint32 actual_item_count = list_item.word_map_entry_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
- for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
- iter != entries.end(); ++iter)
- word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
- return true;
-}
-
-void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
- if (char_word_map_.empty())
- return;
- CharWordMapItem* map_item = cache->mutable_char_word_map();
- map_item->set_item_count(char_word_map_.size());
- for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
- iter != char_word_map_.end(); ++iter) {
- CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
- map_entry->set_char_16(iter->first);
- const WordIDSet& word_id_set(iter->second);
- map_entry->set_item_count(word_id_set.size());
- for (WordIDSet::const_iterator set_iter = word_id_set.begin();
- set_iter != word_id_set.end(); ++set_iter)
- map_entry->add_word_id(*set_iter);
- }
-}
-
-bool InMemoryURLIndex::RestoreCharWordMap(
- const InMemoryURLIndexCacheItem& cache) {
- if (!cache.has_char_word_map())
- return false;
- const CharWordMapItem& list_item(cache.char_word_map());
- uint32 expected_item_count = list_item.item_count();
- uint32 actual_item_count = list_item.char_word_map_entry_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- const RepeatedPtrField<CharWordMapEntry>&
- entries(list_item.char_word_map_entry());
- for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
- entries.begin(); iter != entries.end(); ++iter) {
- expected_item_count = iter->item_count();
- actual_item_count = iter->word_id_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- char16 uni_char = static_cast<char16>(iter->char_16());
- WordIDSet word_id_set;
- const RepeatedField<int32>& word_ids(iter->word_id());
- for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
- jiter != word_ids.end(); ++jiter)
- word_id_set.insert(*jiter);
- char_word_map_[uni_char] = word_id_set;
- }
- return true;
-}
-
-void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
- const {
- if (word_id_history_map_.empty())
- return;
- WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
- map_item->set_item_count(word_id_history_map_.size());
- for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
- iter != word_id_history_map_.end(); ++iter) {
- WordIDHistoryMapEntry* map_entry =
- map_item->add_word_id_history_map_entry();
- map_entry->set_word_id(iter->first);
- const HistoryIDSet& history_id_set(iter->second);
- map_entry->set_item_count(history_id_set.size());
- for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
- set_iter != history_id_set.end(); ++set_iter)
- map_entry->add_history_id(*set_iter);
- }
-}
-
-bool InMemoryURLIndex::RestoreWordIDHistoryMap(
- const InMemoryURLIndexCacheItem& cache) {
- if (!cache.has_word_id_history_map())
- return false;
- const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
- uint32 expected_item_count = list_item.item_count();
- uint32 actual_item_count = list_item.word_id_history_map_entry_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- const RepeatedPtrField<WordIDHistoryMapEntry>&
- entries(list_item.word_id_history_map_entry());
- for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
- entries.begin(); iter != entries.end(); ++iter) {
- expected_item_count = iter->item_count();
- actual_item_count = iter->history_id_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- WordID word_id = iter->word_id();
- HistoryIDSet history_id_set;
- const RepeatedField<int64>& history_ids(iter->history_id());
- for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
- jiter != history_ids.end(); ++jiter)
- history_id_set.insert(*jiter);
- word_id_history_map_[word_id] = history_id_set;
- }
- return true;
-}
-
-void InMemoryURLIndex::SaveHistoryInfoMap(
- InMemoryURLIndexCacheItem* cache) const {
- if (history_info_map_.empty())
- return;
- HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
- map_item->set_item_count(history_info_map_.size());
- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
- iter != history_info_map_.end(); ++iter) {
- HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
- map_entry->set_history_id(iter->first);
- const URLRow& url_row(iter->second);
- // Note: We only save information that contributes to the index so there
- // is no need to save term_char_word_set_cache_ (not persistent),
- // languages_, etc.
- map_entry->set_visit_count(url_row.visit_count());
- map_entry->set_typed_count(url_row.typed_count());
- map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
- map_entry->set_url(url_row.url().spec());
- map_entry->set_title(UTF16ToUTF8(url_row.title()));
- }
-}
-
-bool InMemoryURLIndex::RestoreHistoryInfoMap(
- const InMemoryURLIndexCacheItem& cache) {
- if (!cache.has_history_info_map())
- return false;
- const HistoryInfoMapItem& list_item(cache.history_info_map());
- uint32 expected_item_count = list_item.item_count();
- uint32 actual_item_count = list_item.history_info_map_entry_size();
- if (actual_item_count == 0 || actual_item_count != expected_item_count)
- return false;
- const RepeatedPtrField<HistoryInfoMapEntry>&
- entries(list_item.history_info_map_entry());
- for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
- entries.begin(); iter != entries.end(); ++iter) {
- HistoryID history_id = iter->history_id();
- GURL url(iter->url());
- URLRow url_row(url, history_id);
- url_row.set_visit_count(iter->visit_count());
- url_row.set_typed_count(iter->typed_count());
- url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
- if (iter->has_title()) {
- string16 title(UTF8ToUTF16(iter->title()));
- url_row.set_title(title);
- }
- history_info_map_[history_id] = url_row;
- }
- return true;
-}
-
} // namespace history
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index bd84188..81336f1 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -14,30 +14,19 @@
#include "app/sql/connection.h"
#include "base/basictypes.h"
-#include "base/file_path.h"
-#include "base/gtest_prod_util.h"
#include "base/linked_ptr.h"
#include "base/scoped_ptr.h"
#include "base/string16.h"
#include "chrome/browser/autocomplete/history_provider_util.h"
#include "chrome/browser/history/history_types.h"
-#include "chrome/browser/history/in_memory_url_index_cache.pb.h"
#include "testing/gtest/include/gtest/gtest_prod.h"
-class Profile;
-
namespace base {
class Time;
}
-namespace in_memory_url_index {
-class InMemoryURLIndexCacheItem;
-}
-
namespace history {
-namespace imui = in_memory_url_index;
-
class URLDatabase;
// Used for intermediate history result operations.
@@ -78,39 +67,22 @@ typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
// multi-char16 instance.
class InMemoryURLIndex {
public:
- // |history_dir| is a path to the directory containing the history database
- // within the profile wherein the cache and transaction journals will be
- // stored.
- explicit InMemoryURLIndex(const FilePath& history_dir);
+ InMemoryURLIndex();
~InMemoryURLIndex();
// Convenience types
typedef std::vector<string16> String16Vector;
- // Opens and indexes the URL history database.
+ // Open and index the URL history database.
// |languages| gives a list of language encodings with which the history
// URLs and omnibox searches are interpreted, i.e. when each is broken
// down into words and each word is broken down into characters.
bool Init(URLDatabase* history_db, const std::string& languages);
- // Reloads the history index. Attempts to reload from the cache unless
- // |clear_cache| is true. If the cache is unavailable then reload the
- // index from |history_db|.
- bool ReloadFromHistory(URLDatabase* history_db, bool clear_cache);
+ // Reset the history index.
+ void Reset();
- // Signals that any outstanding initialization should be canceled and
- // flushes the cache to disk.
- void ShutDown();
-
- // Restores the index's private data from the cache file stored in the
- // profile directory and returns true if successful.
- bool RestoreFromCacheFile();
-
- // Caches the index private data and writes the cache file to the profile
- // directory.
- bool SaveToCacheFile();
-
- // Given a vector containing one or more words as string16s, scans the
+ // Given a vector containing one or more words as string16s, scan the
// history index and return a vector with all scored, matching history items.
// Each term must occur somewhere in the history item for the item to
// qualify; however, the terms do not necessarily have to be adjacent.
@@ -120,36 +92,17 @@ class InMemoryURLIndex {
// Returns the date threshold for considering an history item as significant.
static base::Time RecentThreshold();
- // Updates or adds an history item to the index if it meets the minimum
- // 'quick' criteria.
- void UpdateURL(URLID row_id, const URLRow& row);
-
- // Deletes indexing data for an history item. The item may not have actually
- // been indexed (which is the case if it did not previously meet minimum
- // 'quick' criteria).
- void DeleteURL(URLID row_id);
-
private:
friend class AddHistoryMatch;
- FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Initialization);
- FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities);
- FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
- FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath);
- FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
-
- // Signals that there are no previously cached results for the typed term.
- static const size_t kNoCachedResultForTerm;
-
- // Creating one of me without a history path is not allowed (tests excepted).
- InMemoryURLIndex();
+ friend class InMemoryURLIndexTest;
+ FRIEND_TEST(InMemoryURLIndexTest, Initialization);
// Convenience types.
typedef std::set<string16> String16Set;
typedef std::set<char16> Char16Set;
- typedef std::vector<char16> Char16Vector;
// An index into list of all of the words we have indexed.
- typedef int WordID;
+ typedef int16 WordID;
// A map allowing a WordID to be determined given a word.
typedef std::map<string16, WordID> WordMap;
@@ -165,16 +118,8 @@ class InMemoryURLIndex {
typedef std::set<HistoryID> HistoryIDSet;
typedef std::map<WordID, HistoryIDSet> WordIDHistoryMap;
- // Support caching of term character results so that we can optimize
- // searches which build upon a previous search. Each entry in this vector
- // represents a progressive reduction of the result set for each unique
- // character found in the search term, with each character being taken as
- // initially encountered. For example, once the results for the search
- // term of "mextexarkana" have been fully determined, this vector will
- // contain one entry for the characters: m, e, x, t, a, r, k, & n, in
- // that order. The result sets will naturally shrink in size for each
- // subsequent character as the sets intersections are taken in an
- // incremental manner.
+ // Support caching of term character intersections so that we can optimize
+ // searches which build upon a previous search.
struct TermCharWordSet;
typedef std::vector<TermCharWordSet> TermCharWordSetVector;
@@ -203,37 +148,20 @@ class InMemoryURLIndex {
const String16Vector& lower_terms_;
};
- // Initializes all index data members in preparation for restoring the index
- // from the cache or a complete rebuild from the history database.
- void ClearPrivateData();
-
- // Breaks a string down into individual words.
+ // Break a string down into individual words.
static String16Set WordSetFromString16(const string16& uni_string);
- // Given a vector of Char16s, representing the characters the user has typed
- // into the omnibox, finds words containing those characters. If any
- // existing, cached set is a proper subset then starts with that cached
- // set. Updates the previously-typed-character cache.
- WordIDSet WordIDSetForTermChars(const Char16Vector& uni_chars);
-
- // Given a vector of Char16s in |uni_chars|, compare those characters, in
- // order, with the previously searched term, returning the index of the
- // cached results in the term_char_word_set_cache_ of the set with best
- // matching number of characters. Returns kNoCachedResultForTerm if there
- // was no match at all (i.e. the first character of |uni-chars| is not the
- // same as the character of the first entry in term_char_word_set_cache_).
- size_t CachedResultsIndexForTerm(const Char16Vector& uni_chars);
+ // Given a set of Char16s, find words containing those characters. If any
+ // existing, cached set is a proper subset then start with that cached
+ // set. Update the cache.
+ WordIDSet WordIDSetForTermChars(const Char16Set& uni_chars);
// URL History indexing support functions.
- // Indexes one URL history item.
+ // Index one URL history item.
bool IndexRow(const URLRow& row);
- // Breaks a string down into unique, individual characters in the order
- // in which the characters are first encountered in the |uni_word| string.
- static Char16Vector Char16VectorFromString16(const string16& uni_word);
-
- // Breaks the |uni_word| string down into its individual characters.
+ // Break a string down into its individual characters.
// Note that this is temporarily intended to work on a single word, but
// _will_ work on a string of words, perhaps with unexpected results.
// TODO(mrossetti): Lots of optimizations possible here for not restarting
@@ -241,26 +169,26 @@ class InMemoryURLIndex {
// and properly handle substring matches, scoring and sorting the results
// by score. Also, provide the metrics for where the matches occur so that
// the UI can highlight the matched sections.
- static Char16Set Char16SetFromString16(const string16& uni_word);
+ Char16Set CharactersFromString16(const string16& uni_word);
- // Given a single word in |uni_word|, adds a reference for the containing
- // history item identified by |history_id| to the index.
+ // Given a single word, add a reference to the containing history item
+ // to the index.
void AddWordToIndex(const string16& uni_word, HistoryID history_id);
- // Updates an existing entry in the word/history index by adding the
+ // Update an existing entry in the word/history index by adding the
// |history_id| to set for |word_id| in the word_id_history_map_.
void UpdateWordHistory(WordID word_id, HistoryID history_id);
- // Creates a new entry in the word/history map for |word_id| and add
+ // Create a new entry in the word/history map for |word_id| and add
// |history_id| as the initial element of the word's set.
void AddWordHistory(const string16& uni_word, HistoryID history_id);
- // Clears the search term cache. This cache holds on to the intermediate
+ // Clear the search term cache. This cache holds on to the intermediate
// word results for each previously typed character to eliminate the need
// to re-perform set operations for previously typed characters.
void ResetTermCharWordSetCache();
- // Composes a set of history item IDs by intersecting the set for each word
+ // Compose a set of history item IDs by intersecting the set for each word
// in |uni_string|.
HistoryIDSet HistoryIDSetFromWords(const string16& uni_string);
@@ -268,7 +196,7 @@ class InMemoryURLIndex {
// ids for the given term given in |uni_word|.
HistoryIDSet HistoryIDsForTerm(const string16& uni_word);
- // Calculates a raw score for this history item by first determining
+ // Calculate a raw score for this history item by first determining
// if all of the terms in |terms_vector| occur in |row| and, if so,
// calculating a raw score based on 1) starting position of each term
// in the user input, 2) completeness of each term's match, 3) ordering
@@ -282,53 +210,15 @@ class InMemoryURLIndex {
const String16Vector& terms_vector,
size_t* first_term_location);
- // Utility functions supporting RestoreFromCache and SaveToCache.
-
- // Construct a file path for the cache file within the same directory where
- // the history database is kept and saves that path to |file_path|. Returns
- // true if |file_path| can be successfully constructed. (This function
- // provided as a hook for unit testing.)
- bool GetCacheFilePath(FilePath* file_path);
-
- // Encode a data structure into the protobuf |cache|.
- void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
- void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
- void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
- void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
- void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
- void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
-
- // Decode a data structure from the protobuf |cache|. Return false if there
- // is any kind of failure.
- bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache);
- bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
- bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
- bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
- bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
- bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
-
- // Directory where cache file resides. This is, except when unit testing,
- // the same directory in which the profile's history database is found. It
- // should never be empty.
- FilePath history_dir_;
-
- // The timestamp of when the cache was last saved. This is used to validate
- // the transaction journal's applicability to the cache. The timestamp is
- // initialized to the NULL time, indicating that the cache was not used with
- // the InMemoryURLIndex was last populated.
- base::Time last_saved_;
-
// A list of all of indexed words. The index of a word in this list is the
// ID of the word in the word_map_. It reduces the memory overhead by
// replacing a potentially long and repeated string with a simple index.
// NOTE: A word will _never_ be removed from this vector thus insuring
// the immutability of the word_id throughout the session, reducing
// maintenance complexity.
- // TODO(mrossetti): Profile the vector allocation and determine if judicious
- // 'reserve' calls are called for.
String16Vector word_list_;
- int history_item_count_;
+ uint64 history_item_count_;
WordMap word_map_;
CharWordIDMap char_word_map_;
WordIDHistoryMap word_id_history_map_;
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index d495db9..9306ec5 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -11,7 +11,6 @@
#include "app/sql/connection.h"
#include "app/sql/statement.h"
#include "app/sql/transaction.h"
-#include "base/file_path.h"
#include "base/file_util.h"
#include "base/path_service.h"
#include "base/scoped_ptr.h"
@@ -33,6 +32,9 @@
// Note that only lines whose first character is an upper-case letter are
// processed when creating the test database.
+using base::Time;
+using base::TimeDelta;
+
namespace history {
class InMemoryURLIndexTest : public testing::Test,
@@ -81,15 +83,15 @@ class InMemoryURLIndexTest : public testing::Test,
sql::Statement statement(db.GetUniqueStatement(
"SELECT" HISTORY_URL_ROW_FIELDS "FROM urls;"));
EXPECT_TRUE(statement);
- base::Time time_right_now = base::Time::NowFromSystemTime();
- base::TimeDelta day_delta = base::TimeDelta::FromDays(1);
+ Time time_right_now = Time::NowFromSystemTime();
+ TimeDelta day_delta = TimeDelta::FromDays(1);
{
sql::Transaction transaction(&db);
transaction.Begin();
while (statement.Step()) {
URLRow row;
FillURLRow(statement, &row);
- base::Time last_visit = time_right_now;
+ Time last_visit = time_right_now;
for (int64 i = row.last_visit().ToInternalValue(); i > 0; --i)
last_visit -= day_delta;
row.set_last_visit(last_visit);
@@ -103,7 +105,7 @@ class InMemoryURLIndexTest : public testing::Test,
};
TEST_F(InMemoryURLIndexTest, Construction) {
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
+ url_index_.reset(new InMemoryURLIndex);
EXPECT_TRUE(url_index_.get());
}
@@ -115,261 +117,68 @@ TEST_F(InMemoryURLIndexTest, Initialization) {
EXPECT_TRUE(statement);
uint64 row_count = 0;
while (statement.Step()) ++row_count;
- EXPECT_EQ(row_count, 33U);
+ EXPECT_EQ(33U, row_count);
url_index_.reset(new InMemoryURLIndex);
url_index_->Init(this, "en,ja,hi,zh");
- EXPECT_EQ(url_index_->history_item_count_, 28);
+
+ // There should have been 27 of the 31 urls accepted during filtering.
+ EXPECT_EQ(28U, url_index_->history_item_count_);
// history_info_map_ should have the same number of items as were filtered.
- EXPECT_EQ(url_index_->history_info_map_.size(), 28U);
- EXPECT_EQ(url_index_->char_word_map_.size(), 37U);
- EXPECT_EQ(url_index_->word_map_.size(), 91U);
+ EXPECT_EQ(28U, url_index_->history_info_map_.size());
+
+ // The resulting indexes should account for:
+ // 37 characters
+ // 88 words
+ EXPECT_EQ(37U, url_index_->char_word_map_.size());
+ EXPECT_EQ(91U, url_index_->word_map_.size());
}
TEST_F(InMemoryURLIndexTest, Retrieval) {
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
- url_index_->Init(this, "en,ja,hi,zh");
+ url_index_.reset(new InMemoryURLIndex);
+ std::string languages("en,ja,hi,zh");
+ url_index_->Init(this, languages);
InMemoryURLIndex::String16Vector terms;
// The term will be lowercased by the search.
// See if a very specific term gives a single result.
- terms.push_back(ASCIIToUTF16("DrudgeReport"));
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
+ string16 term = ASCIIToUTF16("DrudgeReport");
+ terms.push_back(term);
+ ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
+ EXPECT_EQ(1U, matches.size());
// Search which should result in multiple results.
terms.clear();
- terms.push_back(ASCIIToUTF16("drudge"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 2U);
+ term = ASCIIToUTF16("drudge");
+ terms.push_back(term);
+ matches = url_index_->HistoryItemsForTerms(terms);
+ ASSERT_EQ(2U, matches.size());
// The results should be in descending score order.
EXPECT_GT(matches[0].raw_score, matches[1].raw_score);
// Search which should result in nearly perfect result.
terms.clear();
- terms.push_back(ASCIIToUTF16("http"));
- terms.push_back(ASCIIToUTF16("NearlyPerfectResult"));
+ term = ASCIIToUTF16("http");
+ terms.push_back(term);
+ term = ASCIIToUTF16("NearlyPerfectResult");
+ terms.push_back(term);
matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(matches.size(), 1U);
+ ASSERT_EQ(1U, matches.size());
// The results should have a very high score.
EXPECT_GT(matches[0].raw_score, 900);
// Search which should result in very poor result.
terms.clear();
- terms.push_back(ASCIIToUTF16("z"));
- terms.push_back(ASCIIToUTF16("y"));
- terms.push_back(ASCIIToUTF16("x"));
+ term = ASCIIToUTF16("z");
+ terms.push_back(term);
+ term = ASCIIToUTF16("y");
+ terms.push_back(term);
+ term = ASCIIToUTF16("x");
+ terms.push_back(term);
matches = url_index_->HistoryItemsForTerms(terms);
- ASSERT_EQ(matches.size(), 1U);
+ ASSERT_EQ(1U, matches.size());
// The results should have a poor score.
EXPECT_LT(matches[0].raw_score, 200);
}
-TEST_F(InMemoryURLIndexTest, Char16Utilities) {
- string16 term = ASCIIToUTF16("drudgereport");
- string16 expected = ASCIIToUTF16("drugepot");
- EXPECT_EQ(expected.size(),
- InMemoryURLIndex::Char16SetFromString16(term).size());
- InMemoryURLIndex::Char16Vector c_vector =
- InMemoryURLIndex::Char16VectorFromString16(term);
- ASSERT_EQ(expected.size(), c_vector.size());
-
- InMemoryURLIndex::Char16Vector::const_iterator c_iter = c_vector.begin();
- for (string16::const_iterator s_iter = expected.begin();
- s_iter != expected.end(); ++s_iter, ++c_iter)
- EXPECT_EQ(*s_iter, *c_iter);
-}
-
-TEST_F(InMemoryURLIndexTest, TypedCharacterCaching) {
- // Verify that match results for previously typed characters are retained
- // (in the term_char_word_set_cache_) and reused, if possible, in future
- // autocompletes.
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
- url_index_->Init(this, "en,ja,hi,zh");
-
- // Verify that we can find something that already exists.
- InMemoryURLIndex::String16Vector terms;
- string16 term = ASCIIToUTF16("drudgerepo");
- terms.push_back(term);
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
-
- {
- // Exercise the term matching cache with the same term.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(term);
- EXPECT_EQ(uni_chars.size(), 7U); // Equivalent to 'degopru'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U);
- }
-
- {
- // Back off a character.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("drudgerep"));
- EXPECT_EQ(uni_chars.size(), 6U); // Equivalent to 'degpru'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 5U);
- }
-
- {
- // Add a couple of characters.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(
- ASCIIToUTF16("drudgereporta"));
- EXPECT_EQ(uni_chars.size(), 9U); // Equivalent to 'adegoprtu'
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars), 6U);
- }
-
- {
- // Use different string.
- InMemoryURLIndex::Char16Vector uni_chars =
- InMemoryURLIndex::Char16VectorFromString16(ASCIIToUTF16("abcde"));
- EXPECT_EQ(uni_chars.size(), 5U);
- EXPECT_EQ(url_index_->CachedResultsIndexForTerm(uni_chars),
- static_cast<size_t>(-1));
- }
-}
-
-TEST_F(InMemoryURLIndexTest, AddNewRows) {
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
- url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
-
- // Verify that the row we're going to add does not already exist.
- URLID new_row_id = 87654321;
- // Newly created URLRows get a last_visit time of 'right now' so it should
- // qualify as a quick result candidate.
- terms.push_back(ASCIIToUTF16("brokeandalone"));
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty());
-
- // Add a new row.
- URLRow new_row(GURL("http://www.brokeandaloneinmanitoba.com/"), new_row_id);
- new_row.set_last_visit(base::Time::Now());
- url_index_->UpdateURL(new_row_id, new_row);
-
- // Verify that we can retrieve it.
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
-
- // Add it again just to be sure that is harmless.
- url_index_->UpdateURL(new_row_id, new_row);
- EXPECT_EQ(url_index_->HistoryItemsForTerms(terms).size(), 1U);
-}
-
-TEST_F(InMemoryURLIndexTest, DeleteRows) {
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
- url_index_->Init(this, "en,ja,hi,zh");
- InMemoryURLIndex::String16Vector terms;
-
- // Make sure we actually get an existing result.
- terms.push_back(ASCIIToUTF16("DrudgeReport"));
- ScoredHistoryMatches matches = url_index_->HistoryItemsForTerms(terms);
- EXPECT_EQ(matches.size(), 1U);
-
- // Determine the row id for that result, delete that id, then search again.
- url_index_->DeleteURL(matches[0].url_info.id());
- EXPECT_TRUE(url_index_->HistoryItemsForTerms(terms).empty());
-}
-
-TEST_F(InMemoryURLIndexTest, CacheFilePath) {
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL(
- "/flammmy/frammy/"))));
- FilePath full_file_path;
- url_index_->GetCacheFilePath(&full_file_path);
- std::vector<FilePath::StringType> expected_parts;
- FilePath(FILE_PATH_LITERAL("/flammmy/frammy/History Provider Cache")).
- GetComponents(&expected_parts);
- std::vector<FilePath::StringType> actual_parts;
- full_file_path.GetComponents(&actual_parts);
- ASSERT_EQ(expected_parts.size(), actual_parts.size());
- size_t count = expected_parts.size();
- for (size_t i = 0; i < count; ++i)
- EXPECT_EQ(expected_parts[i], actual_parts[i]);
-}
-
-TEST_F(InMemoryURLIndexTest, CacheSaveRestore) {
- // Save the cache to a protobuf, restore it, and compare the results.
- url_index_.reset(new InMemoryURLIndex(FilePath(FILE_PATH_LITERAL("/dummy"))));
- InMemoryURLIndex& url_index(*(url_index_.get()));
- url_index.Init(this, "en,ja,hi,zh");
- in_memory_url_index::InMemoryURLIndexCacheItem index_cache;
- url_index.SavePrivateData(&index_cache);
-
- // Capture our private data so we can later compare for equality.
- int history_item_count(url_index.history_item_count_);
- InMemoryURLIndex::String16Vector word_list(url_index.word_list_);
- InMemoryURLIndex::WordMap word_map(url_index.word_map_);
- InMemoryURLIndex::CharWordIDMap char_word_map(url_index.char_word_map_);
- InMemoryURLIndex::WordIDHistoryMap word_id_history_map(
- url_index.word_id_history_map_);
- InMemoryURLIndex::HistoryInfoMap history_info_map(
- url_index.history_info_map_);
-
- // Prove that there is really something there.
- EXPECT_GT(url_index.history_item_count_, 0);
- EXPECT_FALSE(url_index.word_list_.empty());
- EXPECT_FALSE(url_index.word_map_.empty());
- EXPECT_FALSE(url_index.char_word_map_.empty());
- EXPECT_FALSE(url_index.word_id_history_map_.empty());
- EXPECT_FALSE(url_index.history_info_map_.empty());
-
- // Clear and then prove it's clear.
- url_index.ClearPrivateData();
- EXPECT_EQ(url_index.history_item_count_, 0);
- EXPECT_TRUE(url_index.word_list_.empty());
- EXPECT_TRUE(url_index.word_map_.empty());
- EXPECT_TRUE(url_index.char_word_map_.empty());
- EXPECT_TRUE(url_index.word_id_history_map_.empty());
- EXPECT_TRUE(url_index.history_info_map_.empty());
-
- // Restore the cache.
- EXPECT_TRUE(url_index.RestorePrivateData(index_cache));
-
- // Compare the restored and captured for equality.
- EXPECT_EQ(history_item_count, url_index.history_item_count_);
- EXPECT_EQ(word_list.size(), url_index.word_list_.size());
- EXPECT_EQ(word_map.size(), url_index.word_map_.size());
- EXPECT_EQ(char_word_map.size(), url_index.char_word_map_.size());
- EXPECT_EQ(word_id_history_map.size(), url_index.word_id_history_map_.size());
- EXPECT_EQ(history_info_map.size(), url_index.history_info_map_.size());
- // WordList must be index-by-index equal.
- size_t count = word_list.size();
- for (size_t i = 0; i < count; ++i)
- EXPECT_EQ(word_list[i], url_index.word_list_[i]);
- for (InMemoryURLIndex::CharWordIDMap::const_iterator expected =
- char_word_map.begin(); expected != char_word_map.end(); ++expected) {
- InMemoryURLIndex::CharWordIDMap::const_iterator actual =
- url_index.char_word_map_.find(expected->first);
- ASSERT_TRUE(url_index.char_word_map_.end() != actual);
- const InMemoryURLIndex::WordIDSet& expected_set(expected->second);
- const InMemoryURLIndex::WordIDSet& actual_set(actual->second);
- ASSERT_EQ(expected_set.size(), actual_set.size());
- for (InMemoryURLIndex::WordIDSet::const_iterator set_iter =
- expected_set.begin(); set_iter != expected_set.end(); ++set_iter)
- EXPECT_GT(actual_set.count(*set_iter), 0U);
- }
- for (InMemoryURLIndex::WordIDHistoryMap::const_iterator expected =
- word_id_history_map.begin(); expected != word_id_history_map.end();
- ++expected) {
- InMemoryURLIndex::WordIDHistoryMap::const_iterator actual =
- url_index.word_id_history_map_.find(expected->first);
- ASSERT_TRUE(url_index.word_id_history_map_.end() != actual);
- const InMemoryURLIndex::HistoryIDSet& expected_set(expected->second);
- const InMemoryURLIndex::HistoryIDSet& actual_set(actual->second);
- ASSERT_EQ(expected_set.size(), actual_set.size());
- for (InMemoryURLIndex::HistoryIDSet::const_iterator set_iter =
- expected_set.begin(); set_iter != expected_set.end(); ++set_iter)
- EXPECT_GT(actual_set.count(*set_iter), 0U);
- }
- for (InMemoryURLIndex::HistoryInfoMap::const_iterator expected =
- history_info_map.begin(); expected != history_info_map.end();
- ++expected) {
- InMemoryURLIndex::HistoryInfoMap::const_iterator actual =
- url_index.history_info_map_.find(expected->first);
- ASSERT_FALSE(url_index.history_info_map_.end() == actual);
- const URLRow& expected_row(expected->second);
- const URLRow& actual_row(actual->second);
- EXPECT_EQ(expected_row.visit_count(), actual_row.visit_count());
- EXPECT_EQ(expected_row.typed_count(), actual_row.typed_count());
- EXPECT_EQ(expected_row.last_visit(), actual_row.last_visit());
- EXPECT_EQ(expected_row.url(), actual_row.url());
- }
-}
-
} // namespace history
diff --git a/chrome/browser/history/url_database.cc b/chrome/browser/history/url_database.cc
index f6c041c..24decc6 100644
--- a/chrome/browser/history/url_database.cc
+++ b/chrome/browser/history/url_database.cc
@@ -238,25 +238,6 @@ bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) {
return true;
}
-bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) {
- DCHECK(!enumerator->initialized_);
- std::string sql("SELECT ");
- sql.append(kURLRowFields);
- sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count > ? OR "
- "typed_count > ?");
- enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str()));
- if (!enumerator->statement_) {
- NOTREACHED() << GetDB().GetErrorMessage();
- return false;
- }
- enumerator->statement_.BindInt64(
- 0, AutocompleteAgeThreshold().ToInternalValue());
- enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit);
- enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit);
- enumerator->initialized_ = true;
- return true;
-}
-
bool URLDatabase::IsFavIconUsed(FavIconID favicon_id) {
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT id FROM urls WHERE favicon_id=? LIMIT 1"));
diff --git a/chrome/browser/history/url_database.h b/chrome/browser/history/url_database.h
index c821ce7..ae1d7cd 100644
--- a/chrome/browser/history/url_database.h
+++ b/chrome/browser/history/url_database.h
@@ -127,14 +127,8 @@ class URLDatabase {
sql::Statement statement_;
};
- // Initializes the given enumerator to enumerator all URLs in the database.
- bool InitURLEnumeratorForEverything(URLEnumerator* enumerator);
-
// Initializes the given enumerator to enumerator all URLs in the database
- // that are historically significant: ones having been visited within 3 days,
- // having their URL manually typed more than once, or having been visited
- // more than 3 times.
- bool InitURLEnumeratorForSignificant(URLEnumerator* enumerator);
+ bool InitURLEnumeratorForEverything(URLEnumerator* enumerator);
// Favicons ------------------------------------------------------------------
diff --git a/chrome/browser/history/url_database_unittest.cc b/chrome/browser/history/url_database_unittest.cc
index c2f22ff6..b164e48 100644
--- a/chrome/browser/history/url_database_unittest.cc
+++ b/chrome/browser/history/url_database_unittest.cc
@@ -125,7 +125,8 @@ TEST_F(URLDatabaseTest, AddURL) {
// Tests adding, querying and deleting keyword visits.
TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
- URLRow url_info1(GURL("http://www.google.com/"));
+ const GURL url1("http://www.google.com/");
+ URLRow url_info1(url1);
url_info1.set_title(UTF8ToUTF16("Google"));
url_info1.set_visit_count(4);
url_info1.set_typed_count(2);
@@ -164,7 +165,8 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
// Make sure deleting a URL also deletes a keyword visit.
TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) {
- URLRow url_info1(GURL("http://www.google.com/"));
+ const GURL url1("http://www.google.com/");
+ URLRow url_info1(url1);
url_info1.set_title(UTF8ToUTF16("Google"));
url_info1.set_visit_count(4);
url_info1.set_typed_count(2);
@@ -185,41 +187,4 @@ TEST_F(URLDatabaseTest, DeleteURLDeletesKeywordSearchTermVisit) {
ASSERT_EQ(0U, matches.size());
}
-TEST_F(URLDatabaseTest, EnumeratorForSignificant) {
- std::set<std::string> good_urls;
- // Add URLs which do and don't meet the criteria.
- URLRow url_no_match(GURL("http://www.url_no_match.com/"));
- EXPECT_TRUE(AddURL(url_no_match));
-
- std::string url_string2("http://www.url_match_visit_count.com/");
- good_urls.insert("http://www.url_match_visit_count.com/");
- URLRow url_match_visit_count(GURL("http://www.url_match_visit_count.com/"));
- url_match_visit_count.set_visit_count(kLowQualityMatchVisitLimit + 1);
- EXPECT_TRUE(AddURL(url_match_visit_count));
-
- good_urls.insert("http://www.url_match_typed_count.com/");
- URLRow url_match_typed_count(GURL("http://www.url_match_typed_count.com/"));
- url_match_typed_count.set_typed_count(kLowQualityMatchTypedLimit + 1);
- EXPECT_TRUE(AddURL(url_match_typed_count));
-
- good_urls.insert("http://www.url_match_last_visit.com/");
- URLRow url_match_last_visit(GURL("http://www.url_match_last_visit.com/"));
- url_match_last_visit.set_last_visit(Time::Now() - TimeDelta::FromDays(1));
- EXPECT_TRUE(AddURL(url_match_last_visit));
-
- URLRow url_no_match_last_visit(GURL(
- "http://www.url_no_match_last_visit.com/"));
- url_no_match_last_visit.set_last_visit(Time::Now() -
- TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays + 1));
- EXPECT_TRUE(AddURL(url_no_match_last_visit));
-
- URLDatabase::URLEnumerator history_enum;
- EXPECT_TRUE(InitURLEnumeratorForSignificant(&history_enum));
- URLRow row;
- int row_count = 0;
- for (; history_enum.GetNextURL(&row); ++row_count)
- EXPECT_EQ(1U, good_urls.count(row.url().spec()));
- EXPECT_EQ(3, row_count);
-}
-
} // namespace history
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index 972f1bc..b290923 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -17,7 +17,6 @@
'common',
'common_net',
'debugger',
- 'in_memory_url_index_cache_proto',
'installer_util',
'platform_locale_settings',
'profile_import',
@@ -1233,7 +1232,6 @@
'browser/history/in_memory_history_backend.h',
'browser/history/in_memory_url_index.cc',
'browser/history/in_memory_url_index.h',
- '<(protoc_out_dir)/chrome/browser/history/in_memory_url_index_cache.pb.cc',
'browser/history/page_usage_data.cc',
'browser/history/page_usage_data.h',
'browser/history/query_parser.cc',
@@ -4497,52 +4495,6 @@
'../third_party/protobuf/protobuf.gyp:protobuf_lite',
],
},
- {
- # Protobuf compiler / generator for the InMemoryURLIndex caching
- # protocol buffer.
- 'target_name': 'in_memory_url_index_cache_proto',
- 'type': 'none',
- 'sources': [ 'browser/history/in_memory_url_index_cache.proto' ],
- 'rules': [
- {
- 'rule_name': 'genproto',
- 'extension': 'proto',
- 'inputs': [
- '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)',
- ],
- 'variables': {
- # The protoc compiler requires a proto_path argument with the
- # directory containing the .proto file.
- # There's no generator variable that corresponds to this, so fake
- # it.
- 'rule_input_relpath': 'browser/history',
- },
- 'outputs': [
- '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.h',
- '<(protoc_out_dir)/chrome/<(rule_input_relpath)/<(RULE_INPUT_ROOT).pb.cc',
- ],
- 'action': [
- '<(PRODUCT_DIR)/<(EXECUTABLE_PREFIX)protoc<(EXECUTABLE_SUFFIX)',
- '--proto_path=./<(rule_input_relpath)',
- './<(rule_input_relpath)/<(RULE_INPUT_ROOT)<(RULE_INPUT_EXT)',
- '--cpp_out=<(protoc_out_dir)/chrome/<(rule_input_relpath)',
- ],
- 'message': 'Generating C++ code from <(RULE_INPUT_PATH)',
- },
- ],
- 'dependencies': [
- '../third_party/protobuf/protobuf.gyp:protobuf_lite',
- '../third_party/protobuf/protobuf.gyp:protoc#host',
- ],
- 'direct_dependent_settings': {
- 'include_dirs': [
- '<(protoc_out_dir)',
- ]
- },
- 'export_dependent_settings': [
- '../third_party/protobuf/protobuf.gyp:protobuf_lite',
- ],
- },
],
}
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index cb7c26f..8c99ce1 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -1137,7 +1137,6 @@
],
},
'sources': [
- '<(protoc_out_dir)/chrome/browser/history/in_memory_url_index_cache.pb.cc',
'app/breakpad_mac_stubs.mm',
'app/chrome_dll.rc',
# All unittests in browser, common, renderer and service.
diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc
index 08f61d2..e3e325d 100644
--- a/chrome/common/chrome_switches.cc
+++ b/chrome/common/chrome_switches.cc
@@ -519,9 +519,6 @@ const char kEnableFastback[] = "enable-fastback";
// testing, for example page cycler and layout tests. See bug 1157243.
const char kEnableFileCookies[] = "enable-file-cookies";
-// Enable the use of the HistoryQuickProvider for autocomplete results.
-const char kEnableHistoryQuickProvider[] = "enable-history-quick-provider";
-
// Enable FileSystem API URLs.
const char kEnableFileSystemURLScheme[] = "enable-filesystem-url-scheme";
diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h
index 4c21463..4504f2c 100644
--- a/chrome/common/chrome_switches.h
+++ b/chrome/common/chrome_switches.h
@@ -157,7 +157,6 @@ extern const char kEnableFastback[];
extern const char kEnableFileCookies[];
extern const char kEnableFileSystemURLScheme[];
extern const char kEnableGPUPlugin[];
-extern const char kEnableHistoryQuickProvider[];
extern const char kEnableInBrowserThumbnailing[];
extern const char kEnableIPv6[];
extern const char kEnableJavaScriptI18NAPI[];