summaryrefslogtreecommitdiffstats
path: root/chrome/browser
diff options
context:
space:
mode:
authorbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-30 02:36:20 +0000
committerbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-10-30 02:36:20 +0000
commit5ffd5e9b703d122dbac2dce9425a88b414f6de97 (patch)
treea9654890ca1178c026e1707a7bc29387eca03b45 /chrome/browser
parenta6604d90563ed00e56d4ffe5116d3c7726a893f9 (diff)
downloadchromium_src-5ffd5e9b703d122dbac2dce9425a88b414f6de97.zip
chromium_src-5ffd5e9b703d122dbac2dce9425a88b414f6de97.tar.gz
chromium_src-5ffd5e9b703d122dbac2dce9425a88b414f6de97.tar.bz2
Make sure titles and bodies of web pages that come into the history system very
late are indexed. Very slow pages (>20 seconds) would previously not get indexed. BUG=3835 Review URL: http://codereview.chromium.org/8899 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@4201 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser')
-rw-r--r--chrome/browser/history/expire_history_backend_unittest.cc7
-rw-r--r--chrome/browser/history/history_backend.cc3
-rw-r--r--chrome/browser/history/text_database_manager.cc56
-rw-r--r--chrome/browser/history/text_database_manager.h3
-rw-r--r--chrome/browser/history/text_database_manager_unittest.cc102
5 files changed, 139 insertions, 32 deletions
diff --git a/chrome/browser/history/expire_history_backend_unittest.cc b/chrome/browser/history/expire_history_backend_unittest.cc
index fab52e5..7195cdb 100644
--- a/chrome/browser/history/expire_history_backend_unittest.cc
+++ b/chrome/browser/history/expire_history_backend_unittest.cc
@@ -118,7 +118,8 @@ class ExpireHistoryTest : public testing::Test,
if (thumb_db_->Init(thumb_name) != INIT_OK)
thumb_db_.reset();
- text_db_.reset(new TextDatabaseManager(dir_, main_db_.get()));
+ text_db_.reset(new TextDatabaseManager(dir_,
+ main_db_.get(), main_db_.get()));
if (!text_db_->Init())
text_db_.reset();
@@ -398,7 +399,7 @@ TEST_F(ExpireHistoryTest, DeleteURLAndFavicon) {
// it just like the test set-up did.
text_db_.reset();
EXPECT_TRUE(IsStringInFile(fts_filename, "goats"));
- text_db_.reset(new TextDatabaseManager(dir_, main_db_.get()));
+ text_db_.reset(new TextDatabaseManager(dir_, main_db_.get(), main_db_.get()));
ASSERT_TRUE(text_db_->Init());
expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(),
text_db_.get());
@@ -410,7 +411,7 @@ TEST_F(ExpireHistoryTest, DeleteURLAndFavicon) {
// doesn't remove it from the file, we want to be sure we're doing the latter.
text_db_.reset();
EXPECT_FALSE(IsStringInFile(fts_filename, "goats"));
- text_db_.reset(new TextDatabaseManager(dir_, main_db_.get()));
+ text_db_.reset(new TextDatabaseManager(dir_, main_db_.get(), main_db_.get()));
ASSERT_TRUE(text_db_->Init());
expirer_.SetDatabases(main_db_.get(), archived_db_.get(), thumb_db_.get(),
text_db_.get());
diff --git a/chrome/browser/history/history_backend.cc b/chrome/browser/history/history_backend.cc
index afaee0a..c3bdd4a 100644
--- a/chrome/browser/history/history_backend.cc
+++ b/chrome/browser/history/history_backend.cc
@@ -533,7 +533,8 @@ void HistoryBackend::InitImpl() {
// Full-text database. This has to be first so we can pass it to the
// HistoryDatabase for migration.
- text_database_.reset(new TextDatabaseManager(history_dir_, db_.get()));
+ text_database_.reset(new TextDatabaseManager(history_dir_,
+ db_.get(), db_.get()));
if (!text_database_->Init()) {
LOG(WARNING) << "Text database initialization failed, running without it.";
text_database_.reset();
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc
index 21a7436..483befa 100644
--- a/chrome/browser/history/text_database_manager.cc
+++ b/chrome/browser/history/text_database_manager.cc
@@ -67,9 +67,11 @@ bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
// TextDatabaseManager ---------------------------------------------------------
TextDatabaseManager::TextDatabaseManager(const std::wstring& dir,
+ URLDatabase* url_database,
VisitDatabase* visit_database)
: dir_(dir),
db_(NULL),
+ url_database_(url_database),
visit_database_(visit_database),
recent_changes_(RecentChangeList::NO_AUTO_EVICT),
transaction_nesting_(0),
@@ -170,8 +172,38 @@ void TextDatabaseManager::AddPageURL(const GURL& url,
void TextDatabaseManager::AddPageTitle(const GURL& url,
const std::wstring& title) {
RecentChangeList::iterator found = recent_changes_.Peek(url);
- if (found == recent_changes_.end())
+ if (found == recent_changes_.end()) {
+ // This page is not in our cache of recent pages. This is very much an edge
+ // case as normally a title will come in <20 seconds after the page commits,
+ // and WebContents will avoid spamming us with >1 title per page. However,
+ // it could come up if your connection is unhappy, and we don't want to
+ // miss anything.
+ //
+ // To solve this problem, we'll just associate the most recent visit with
+ // the new title and index that using the regular code path.
+ URLRow url_row;
+ if (!url_database_->GetRowForURL(url, &url_row))
+ return; // URL is unknown, give up.
+ VisitRow visit;
+ if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
+ return; // No recent visit, give up.
+
+ if (visit.is_indexed) {
+ // If this page was already indexed, we could have a body that came in
+ // first and we don't want to overwrite it. We could go query for the
+ // current body, or have a special setter for only the title, but this is
+ // not worth it for this edge case.
+ //
+ // It will be almost impossible for the title to take longer than
+ // kExpirationSec yet we got a body in less than that time, since the
+ // title should always come in first.
+ return;
+ }
+
+ AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
+ title, std::wstring());
return; // We don't know about this page, give up.
+ }
PageInfo& info = found->second;
if (info.has_body()) {
@@ -188,8 +220,26 @@ void TextDatabaseManager::AddPageTitle(const GURL& url,
void TextDatabaseManager::AddPageContents(const GURL& url,
const std::wstring& body) {
RecentChangeList::iterator found = recent_changes_.Peek(url);
- if (found == recent_changes_.end())
- return; // We don't know about this page, give up.
+ if (found == recent_changes_.end()) {
+ // This page is not in our cache of recent pages. This means that the page
+ // took more than kExpirationSec to load. Often, this will be the result of
+ // a very slow iframe or other resource on the page that makes us think its
+ // still loading.
+ //
+ // As a fallback, set the most recent visit's contents using the input, and
+ // use the last set title in the URL table as the title to index.
+ URLRow url_row;
+ if (!url_database_->GetRowForURL(url, &url_row))
+ return; // URL is unknown, give up.
+ VisitRow visit;
+ if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
+ return; // No recent visit, give up.
+
+ // Use the title from the URL row as the title for the indexing.
+ AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
+ url_row.title(), body);
+ return;
+ }
PageInfo& info = found->second;
if (info.has_title()) {
diff --git a/chrome/browser/history/text_database_manager.h b/chrome/browser/history/text_database_manager.h
index fc5bdae..f1195ef 100644
--- a/chrome/browser/history/text_database_manager.h
+++ b/chrome/browser/history/text_database_manager.h
@@ -71,6 +71,7 @@ class TextDatabaseManager {
// (of recent visits). The visit database will be updated to refer to the
// added text database entries.
explicit TextDatabaseManager(const std::wstring& dir,
+ URLDatabase* url_database,
VisitDatabase* visit_database);
~TextDatabaseManager();
@@ -154,6 +155,7 @@ class TextDatabaseManager {
private:
// These tests call ExpireRecentChangesForTime to force expiration.
FRIEND_TEST(TextDatabaseManagerTest, InsertPartial);
+ FRIEND_TEST(TextDatabaseManagerTest, PartialComplete);
FRIEND_TEST(ExpireHistoryTest, DeleteURLAndFavicon);
FRIEND_TEST(ExpireHistoryTest, FlushRecentURLsUnstarred);
@@ -244,6 +246,7 @@ class TextDatabaseManager {
DBCloseScoper db_close_scoper_;
// Non-owning pointers to the recent history databases for URLs and visits.
+ URLDatabase* url_database_;
VisitDatabase* visit_database_;
// Lists recent additions that we have not yet filled out with the title and
diff --git a/chrome/browser/history/text_database_manager_unittest.cc b/chrome/browser/history/text_database_manager_unittest.cc
index fd9a46d..6781a76 100644
--- a/chrome/browser/history/text_database_manager_unittest.cc
+++ b/chrome/browser/history/text_database_manager_unittest.cc
@@ -56,17 +56,18 @@ class TextDatabaseManagerTest : public testing::Test {
std::wstring dir_;
};
-// This provides a simple implementation of a VisitDatabase using an in-memory
-// sqlite connection. The text database manager expects to be able to update
-// the visit database to keep in sync.
-class InMemVisitDB : public VisitDatabase {
+// This provides a simple implementation of a URL+VisitDatabase using an
+// in-memory sqlite connection. The text database manager expects to be able to
+// update the visit database to keep in sync.
+class InMemDB : public URLDatabase, public VisitDatabase {
public:
- InMemVisitDB() {
+ InMemDB() {
sqlite3_open(":memory:", &db_);
statement_cache_ = new SqliteStatementCache(db_);
+ CreateURLTable(false);
InitVisitTable();
}
- ~InMemVisitDB() {
+ ~InMemDB() {
delete statement_cache_;
sqlite3_close(db_);
}
@@ -80,7 +81,7 @@ class InMemVisitDB : public VisitDatabase {
sqlite3* db_;
SqliteStatementCache* statement_cache_;
- DISALLOW_EVIL_CONSTRUCTORS(InMemVisitDB);
+ DISALLOW_EVIL_CONSTRUCTORS(InMemDB);
};
// Adds all the pages once, and the first page once more in the next month.
@@ -166,8 +167,8 @@ bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
// Tests basic querying.
TEST_F(TextDatabaseManagerTest, InsertQuery) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
std::vector<Time> times;
@@ -198,8 +199,8 @@ TEST_F(TextDatabaseManagerTest, InsertQuery) {
// tests right now, but we test it anyway.
TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
// First add one without a visit.
@@ -222,8 +223,8 @@ TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
// visit was updated properly.
TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
// First add a visit to a page. We can just make up a URL ID since there is
@@ -261,8 +262,8 @@ TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
// Tests that partial inserts that expire are added to the database.
TEST_F(TextDatabaseManagerTest, InsertPartial) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
// Add the first one with just a URL.
@@ -305,6 +306,57 @@ TEST_F(TextDatabaseManagerTest, InsertPartial) {
EXPECT_TRUE(ResultsHaveURL(results, kURL3));
}
+// Tests that partial inserts (due to timeouts) will still get updated if the
+// data comes in later.
+TEST_F(TextDatabaseManagerTest, PartialComplete) {
+ ASSERT_TRUE(Init());
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
+ ASSERT_TRUE(manager.Init());
+
+ Time added_time = Time::Now();
+ GURL url(kURL1);
+
+ // We have to have the URL in the URL and visit databases for this test to
+ // work.
+ URLRow url_row(url);
+ url_row.set_title(L"chocolate");
+ URLID url_id = visit_db.AddURL(url_row);
+ ASSERT_TRUE(url_id);
+ VisitRow visit_row;
+ visit_row.url_id = url_id;
+ visit_row.visit_time = added_time;
+ visit_db.AddVisit(&visit_row);
+
+ // Add a URL with no title or body, and say that it expired.
+ manager.AddPageURL(url, 0, 0, added_time);
+ TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
+ manager.FlushOldChangesForTime(expire_time);
+
+ // Add the title. We should be able to query based on that. The title in the
+ // URL row we set above should not come into the picture.
+ manager.AddPageTitle(url, L"Some unique title");
+ Time first_time_searched;
+ QueryOptions options;
+ std::vector<TextDatabase::Match> results;
+ manager.GetTextMatches(L"unique", options, &results, &first_time_searched);
+ EXPECT_EQ(1, results.size());
+ manager.GetTextMatches(L"chocolate", options, &results, &first_time_searched);
+ EXPECT_EQ(0, results.size());
+
+ // Now add the body, which should be queryable.
+ manager.AddPageContents(url, L"Very awesome body");
+ manager.GetTextMatches(L"awesome", options, &results, &first_time_searched);
+ EXPECT_EQ(1, results.size());
+
+ // Adding the body will actually copy the title from the URL table rather
+ // than the previously indexed row (we made them not match above). This isn't
+ // necessarily what we want, but it's how it's implemented, and we don't want
+ // to regress it.
+ manager.GetTextMatches(L"chocolate", options, &results, &first_time_searched);
+ EXPECT_EQ(1, results.size());
+}
+
// Tests that changes get properly committed to disk.
TEST_F(TextDatabaseManagerTest, Writing) {
ASSERT_TRUE(Init());
@@ -313,11 +365,11 @@ TEST_F(TextDatabaseManagerTest, Writing) {
std::vector<TextDatabase::Match> results;
Time first_time_searched;
- InMemVisitDB visit_db;
+ InMemDB visit_db;
// Create the manager and write some stuff to it.
{
- TextDatabaseManager manager(dir_, &visit_db);
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
std::vector<Time> times;
@@ -331,7 +383,7 @@ TEST_F(TextDatabaseManagerTest, Writing) {
// Recreate the manager and make sure it finds the written stuff.
{
- TextDatabaseManager manager(dir_, &visit_db);
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
// We should have matched every page again.
@@ -349,11 +401,11 @@ TEST_F(TextDatabaseManagerTest, WritingTransaction) {
std::vector<TextDatabase::Match> results;
Time first_time_searched;
- InMemVisitDB visit_db;
+ InMemDB visit_db;
// Create the manager and write some stuff to it.
{
- TextDatabaseManager manager(dir_, &visit_db);
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
std::vector<Time> times;
@@ -369,7 +421,7 @@ TEST_F(TextDatabaseManagerTest, WritingTransaction) {
// Recreate the manager and make sure it finds the written stuff.
{
- TextDatabaseManager manager(dir_, &visit_db);
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
// We should have matched every page again.
@@ -381,8 +433,8 @@ TEST_F(TextDatabaseManagerTest, WritingTransaction) {
// Tests querying where the maximum number of items is met.
TEST_F(TextDatabaseManagerTest, QueryMax) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
std::vector<Time> times;
@@ -418,8 +470,8 @@ TEST_F(TextDatabaseManagerTest, QueryMax) {
// Tests querying backwards in time in chunks.
TEST_F(TextDatabaseManagerTest, QueryBackwards) {
ASSERT_TRUE(Init());
- InMemVisitDB visit_db;
- TextDatabaseManager manager(dir_, &visit_db);
+ InMemDB visit_db;
+ TextDatabaseManager manager(dir_, &visit_db, &visit_db);
ASSERT_TRUE(manager.Init());
std::vector<Time> times;